ddtrace 1.22.0 → 1.23.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +46 -2
- data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +8 -20
- data/ext/datadog_profiling_native_extension/collectors_thread_context.c +18 -10
- data/ext/datadog_profiling_native_extension/extconf.rb +7 -5
- data/ext/datadog_profiling_native_extension/heap_recorder.c +38 -3
- data/ext/datadog_profiling_native_extension/heap_recorder.h +5 -0
- data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +46 -0
- data/ext/datadog_profiling_native_extension/ruby_helpers.h +3 -0
- data/ext/datadog_profiling_native_extension/stack_recorder.c +156 -55
- data/lib/datadog/appsec/contrib/devise/tracking.rb +8 -0
- data/lib/datadog/core/configuration/components.rb +4 -3
- data/lib/datadog/core/configuration.rb +3 -17
- data/lib/datadog/core/telemetry/component.rb +66 -0
- data/lib/datadog/core/telemetry/emitter.rb +1 -1
- data/lib/datadog/core/telemetry/event.rb +1 -0
- data/lib/datadog/core/telemetry/http/adapters/net.rb +1 -1
- data/lib/datadog/core/telemetry/http/response.rb +4 -0
- data/lib/datadog/core/telemetry/worker.rb +158 -0
- data/lib/datadog/core/utils/only_once_successful.rb +76 -0
- data/lib/datadog/profiling/exporter.rb +6 -3
- data/lib/datadog/profiling/stack_recorder.rb +6 -2
- data/lib/ddtrace/version.rb +2 -2
- metadata +18 -7
- data/lib/datadog/core/telemetry/client.rb +0 -95
- data/lib/datadog/core/telemetry/heartbeat.rb +0 -33
@@ -169,28 +169,51 @@ static const uint8_t all_value_types_positions[] =
|
|
169
169
|
|
170
170
|
#define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType))
|
171
171
|
|
172
|
+
// Struct for storing stats related to a profile in a particular slot.
|
173
|
+
// These stats will share the same lifetime as the data in that profile slot.
|
174
|
+
typedef struct slot_stats {
|
175
|
+
// How many individual samples were recorded into this slot (un-weighted)
|
176
|
+
uint64_t recorded_samples;
|
177
|
+
} stats_slot;
|
178
|
+
|
179
|
+
typedef struct profile_slot {
|
180
|
+
ddog_prof_Profile profile;
|
181
|
+
stats_slot stats;
|
182
|
+
} profile_slot;
|
183
|
+
|
172
184
|
// Contains native state for each instance
|
173
185
|
struct stack_recorder_state {
|
174
186
|
// Heap recorder instance
|
175
187
|
heap_recorder *heap_recorder;
|
176
188
|
|
177
|
-
pthread_mutex_t
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
ddog_prof_Profile slot_two_profile;
|
189
|
+
pthread_mutex_t mutex_slot_one;
|
190
|
+
profile_slot profile_slot_one;
|
191
|
+
pthread_mutex_t mutex_slot_two;
|
192
|
+
profile_slot profile_slot_two;
|
182
193
|
|
183
194
|
short active_slot; // MUST NEVER BE ACCESSED FROM record_sample; this is NOT for the sampler thread to use.
|
184
195
|
|
185
196
|
uint8_t position_for[ALL_VALUE_TYPES_COUNT];
|
186
197
|
uint8_t enabled_values_count;
|
198
|
+
|
199
|
+
// Struct for storing stats related to behaviour of a stack recorder instance during its entire lifetime.
|
200
|
+
struct lifetime_stats {
|
201
|
+
// How many profiles have we serialized successfully so far
|
202
|
+
uint64_t serialization_successes;
|
203
|
+
// How many profiles have we serialized unsuccessfully so far
|
204
|
+
uint64_t serialization_failures;
|
205
|
+
// Stats on profile serialization time
|
206
|
+
long serialization_time_ns_min;
|
207
|
+
long serialization_time_ns_max;
|
208
|
+
uint64_t serialization_time_ns_total;
|
209
|
+
} stats_lifetime;
|
187
210
|
};
|
188
211
|
|
189
|
-
// Used to
|
190
|
-
struct
|
212
|
+
// Used to group mutex and the corresponding profile slot for easy unlocking after work is done.
|
213
|
+
typedef struct locked_profile_slot {
|
191
214
|
pthread_mutex_t *mutex;
|
192
|
-
|
193
|
-
};
|
215
|
+
profile_slot *data;
|
216
|
+
} locked_profile_slot;
|
194
217
|
|
195
218
|
struct call_serialize_without_gvl_arguments {
|
196
219
|
// Set by caller
|
@@ -198,8 +221,10 @@ struct call_serialize_without_gvl_arguments {
|
|
198
221
|
ddog_Timespec finish_timestamp;
|
199
222
|
|
200
223
|
// Set by callee
|
201
|
-
|
224
|
+
profile_slot *slot;
|
202
225
|
ddog_prof_Profile_SerializeResult result;
|
226
|
+
long heap_profile_build_time_ns;
|
227
|
+
long serialize_no_gvl_time_ns;
|
203
228
|
|
204
229
|
// Set by both
|
205
230
|
bool serialize_ran;
|
@@ -222,9 +247,9 @@ static VALUE _native_initialize(
|
|
222
247
|
static VALUE _native_serialize(VALUE self, VALUE recorder_instance);
|
223
248
|
static VALUE ruby_time_from(ddog_Timespec ddprof_time);
|
224
249
|
static void *call_serialize_without_gvl(void *call_args);
|
225
|
-
static
|
226
|
-
static void sampler_unlock_active_profile(
|
227
|
-
static
|
250
|
+
static locked_profile_slot sampler_lock_active_profile(struct stack_recorder_state *state);
|
251
|
+
static void sampler_unlock_active_profile(locked_profile_slot active_slot);
|
252
|
+
static profile_slot* serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state);
|
228
253
|
static VALUE _native_active_slot(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
|
229
254
|
static VALUE _native_is_slot_one_mutex_locked(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
|
230
255
|
static VALUE _native_is_slot_two_mutex_locked(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
|
@@ -233,7 +258,7 @@ static ddog_Timespec system_epoch_now_timespec(void);
|
|
233
258
|
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_instance);
|
234
259
|
static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time);
|
235
260
|
static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint);
|
236
|
-
static void
|
261
|
+
static void reset_profile_slot(profile_slot *slot, ddog_Timespec *start_time /* Can be null */);
|
237
262
|
static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class);
|
238
263
|
static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locations);
|
239
264
|
static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
|
@@ -241,6 +266,8 @@ static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self
|
|
241
266
|
static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
|
242
267
|
static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj);
|
243
268
|
static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj);
|
269
|
+
static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance);
|
270
|
+
static VALUE build_profile_stats(profile_slot *slot, long serialization_time_ns, long heap_iteration_prep_time_ns, long heap_profile_build_time_ns);
|
244
271
|
|
245
272
|
|
246
273
|
void stack_recorder_init(VALUE profiling_module) {
|
@@ -261,6 +288,7 @@ void stack_recorder_init(VALUE profiling_module) {
|
|
261
288
|
rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 7);
|
262
289
|
rb_define_singleton_method(stack_recorder_class, "_native_serialize", _native_serialize, 1);
|
263
290
|
rb_define_singleton_method(stack_recorder_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
|
291
|
+
rb_define_singleton_method(stack_recorder_class, "_native_stats", _native_stats, 1);
|
264
292
|
rb_define_singleton_method(testing_module, "_native_active_slot", _native_active_slot, 1);
|
265
293
|
rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1);
|
266
294
|
rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1);
|
@@ -305,6 +333,9 @@ static VALUE _native_new(VALUE klass) {
|
|
305
333
|
initialize_slot_concurrency_control(state);
|
306
334
|
for (uint8_t i = 0; i < ALL_VALUE_TYPES_COUNT; i++) { state->position_for[i] = all_value_types_positions[i]; }
|
307
335
|
state->enabled_values_count = ALL_VALUE_TYPES_COUNT;
|
336
|
+
state->stats_lifetime = (struct lifetime_stats) {
|
337
|
+
.serialization_time_ns_min = INT64_MAX,
|
338
|
+
};
|
308
339
|
|
309
340
|
// Note: At this point, slot_one_profile and slot_two_profile contain null pointers. Libdatadog validates pointers
|
310
341
|
// before using them so it's ok for us to go ahead and create the StackRecorder object.
|
@@ -325,11 +356,11 @@ static VALUE _native_new(VALUE klass) {
|
|
325
356
|
}
|
326
357
|
|
327
358
|
static void initialize_slot_concurrency_control(struct stack_recorder_state *state) {
|
328
|
-
state->
|
329
|
-
state->
|
359
|
+
state->mutex_slot_one = (pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER;
|
360
|
+
state->mutex_slot_two = (pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER;
|
330
361
|
|
331
362
|
// A newly-created StackRecorder starts with slot one being active for samples, so let's lock slot two
|
332
|
-
ENFORCE_SUCCESS_GVL(pthread_mutex_lock(&state->
|
363
|
+
ENFORCE_SUCCESS_GVL(pthread_mutex_lock(&state->mutex_slot_two));
|
333
364
|
|
334
365
|
state->active_slot = 1;
|
335
366
|
}
|
@@ -352,18 +383,22 @@ static void initialize_profiles(struct stack_recorder_state *state, ddog_prof_Sl
|
|
352
383
|
rb_raise(rb_eRuntimeError, "Failed to initialize slot two profile: %"PRIsVALUE, get_error_details_and_drop(&slot_two_profile_result.err));
|
353
384
|
}
|
354
385
|
|
355
|
-
state->
|
356
|
-
|
386
|
+
state->profile_slot_one = (profile_slot) {
|
387
|
+
.profile = slot_one_profile_result.ok,
|
388
|
+
};
|
389
|
+
state->profile_slot_two = (profile_slot) {
|
390
|
+
.profile = slot_two_profile_result.ok,
|
391
|
+
};
|
357
392
|
}
|
358
393
|
|
359
394
|
static void stack_recorder_typed_data_free(void *state_ptr) {
|
360
395
|
struct stack_recorder_state *state = (struct stack_recorder_state *) state_ptr;
|
361
396
|
|
362
|
-
pthread_mutex_destroy(&state->
|
363
|
-
ddog_prof_Profile_drop(&state->
|
397
|
+
pthread_mutex_destroy(&state->mutex_slot_one);
|
398
|
+
ddog_prof_Profile_drop(&state->profile_slot_one.profile);
|
364
399
|
|
365
|
-
pthread_mutex_destroy(&state->
|
366
|
-
ddog_prof_Profile_drop(&state->
|
400
|
+
pthread_mutex_destroy(&state->mutex_slot_two);
|
401
|
+
ddog_prof_Profile_drop(&state->profile_slot_two.profile);
|
367
402
|
|
368
403
|
heap_recorder_free(state->heap_recorder);
|
369
404
|
|
@@ -462,8 +497,8 @@ static VALUE _native_initialize(
|
|
462
497
|
state->position_for[TIMELINE_VALUE_ID] = next_disabled_pos++;
|
463
498
|
}
|
464
499
|
|
465
|
-
ddog_prof_Profile_drop(&state->
|
466
|
-
ddog_prof_Profile_drop(&state->
|
500
|
+
ddog_prof_Profile_drop(&state->profile_slot_one.profile);
|
501
|
+
ddog_prof_Profile_drop(&state->profile_slot_two.profile);
|
467
502
|
|
468
503
|
ddog_prof_Slice_ValueType sample_types = {.ptr = enabled_value_types, .len = state->enabled_values_count};
|
469
504
|
initialize_profiles(state, sample_types);
|
@@ -479,9 +514,11 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
|
|
479
514
|
// Need to do this while still holding on to the Global VM Lock; see comments on method for why
|
480
515
|
serializer_set_start_timestamp_for_next_profile(state, finish_timestamp);
|
481
516
|
|
517
|
+
long heap_iteration_prep_start_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
482
518
|
// Prepare the iteration on heap recorder we'll be doing outside the GVL. The preparation needs to
|
483
519
|
// happen while holding on to the GVL.
|
484
520
|
heap_recorder_prepare_iteration(state->heap_recorder);
|
521
|
+
long heap_iteration_prep_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - heap_iteration_prep_start_time_ns;
|
485
522
|
|
486
523
|
// We'll release the Global VM Lock while we're calling serialize, so that the Ruby VM can continue to work while this
|
487
524
|
// is pending
|
@@ -508,12 +545,27 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
|
|
508
545
|
// Cleanup after heap recorder iteration. This needs to happen while holding on to the GVL.
|
509
546
|
heap_recorder_finish_iteration(state->heap_recorder);
|
510
547
|
|
548
|
+
// NOTE: We are focusing on the serialization time outside of the GVL in this stat here. This doesn't
|
549
|
+
// really cover the full serialization process but it gives a more useful number since it bypasses
|
550
|
+
// the noise of acquiring GVLs and dealing with interruptions which is highly specific to runtime
|
551
|
+
// conditions and over which we really have no control about.
|
552
|
+
long serialization_time_ns = args.serialize_no_gvl_time_ns;
|
553
|
+
if (serialization_time_ns >= 0) {
|
554
|
+
// Only update stats if our serialization time is valid.
|
555
|
+
state->stats_lifetime.serialization_time_ns_max = long_max_of(state->stats_lifetime.serialization_time_ns_max, serialization_time_ns);
|
556
|
+
state->stats_lifetime.serialization_time_ns_min = long_min_of(state->stats_lifetime.serialization_time_ns_min, serialization_time_ns);
|
557
|
+
state->stats_lifetime.serialization_time_ns_total += serialization_time_ns;
|
558
|
+
}
|
559
|
+
|
511
560
|
ddog_prof_Profile_SerializeResult serialized_profile = args.result;
|
512
561
|
|
513
562
|
if (serialized_profile.tag == DDOG_PROF_PROFILE_SERIALIZE_RESULT_ERR) {
|
563
|
+
state->stats_lifetime.serialization_failures++;
|
514
564
|
return rb_ary_new_from_args(2, error_symbol, get_error_details_and_drop(&serialized_profile.err));
|
515
565
|
}
|
516
566
|
|
567
|
+
state->stats_lifetime.serialization_successes++;
|
568
|
+
|
517
569
|
VALUE encoded_pprof = ruby_string_from_vec_u8(serialized_profile.ok.buffer);
|
518
570
|
|
519
571
|
ddog_Timespec ddprof_start = serialized_profile.ok.start;
|
@@ -523,8 +575,9 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan
|
|
523
575
|
|
524
576
|
VALUE start = ruby_time_from(ddprof_start);
|
525
577
|
VALUE finish = ruby_time_from(ddprof_finish);
|
578
|
+
VALUE profile_stats = build_profile_stats(args.slot, serialization_time_ns, heap_iteration_prep_time_ns, args.heap_profile_build_time_ns);
|
526
579
|
|
527
|
-
return rb_ary_new_from_args(2, ok_symbol, rb_ary_new_from_args(
|
580
|
+
return rb_ary_new_from_args(2, ok_symbol, rb_ary_new_from_args(4, start, finish, encoded_pprof, profile_stats));
|
528
581
|
}
|
529
582
|
|
530
583
|
static VALUE ruby_time_from(ddog_Timespec ddprof_time) {
|
@@ -537,7 +590,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
|
|
537
590
|
struct stack_recorder_state *state;
|
538
591
|
TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
|
539
592
|
|
540
|
-
|
593
|
+
locked_profile_slot active_slot = sampler_lock_active_profile(state);
|
541
594
|
|
542
595
|
// Note: We initialize this array to have ALL_VALUE_TYPES_COUNT but only tell libdatadog to use the first
|
543
596
|
// state->enabled_values_count values. This simplifies handling disabled value types -- we still put them on the
|
@@ -561,7 +614,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
|
|
561
614
|
}
|
562
615
|
|
563
616
|
ddog_prof_Profile_Result result = ddog_prof_Profile_add(
|
564
|
-
active_slot.profile,
|
617
|
+
&active_slot.data->profile,
|
565
618
|
(ddog_prof_Sample) {
|
566
619
|
.locations = locations,
|
567
620
|
.values = (ddog_Slice_I64) {.ptr = metric_values, .len = state->enabled_values_count},
|
@@ -570,6 +623,8 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
|
|
570
623
|
labels.end_timestamp_ns
|
571
624
|
);
|
572
625
|
|
626
|
+
active_slot.data->stats.recorded_samples++;
|
627
|
+
|
573
628
|
sampler_unlock_active_profile(active_slot);
|
574
629
|
|
575
630
|
if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
|
@@ -590,9 +645,9 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_
|
|
590
645
|
struct stack_recorder_state *state;
|
591
646
|
TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
|
592
647
|
|
593
|
-
|
648
|
+
locked_profile_slot active_slot = sampler_lock_active_profile(state);
|
594
649
|
|
595
|
-
ddog_prof_Profile_Result result = ddog_prof_Profile_set_endpoint(active_slot.profile, local_root_span_id, endpoint);
|
650
|
+
ddog_prof_Profile_Result result = ddog_prof_Profile_set_endpoint(&active_slot.data->profile, local_root_span_id, endpoint);
|
596
651
|
|
597
652
|
sampler_unlock_active_profile(active_slot);
|
598
653
|
|
@@ -607,7 +662,7 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_
|
|
607
662
|
// during iteration of heap recorder live objects.
|
608
663
|
typedef struct heap_recorder_iteration_context {
|
609
664
|
struct stack_recorder_state *state;
|
610
|
-
|
665
|
+
profile_slot *slot;
|
611
666
|
|
612
667
|
bool error;
|
613
668
|
char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG];
|
@@ -643,7 +698,7 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
|
|
643
698
|
};
|
644
699
|
|
645
700
|
ddog_prof_Profile_Result result = ddog_prof_Profile_add(
|
646
|
-
context->profile,
|
701
|
+
&context->slot->profile,
|
647
702
|
(ddog_prof_Sample) {
|
648
703
|
.locations = iteration_data.locations,
|
649
704
|
.values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count},
|
@@ -655,6 +710,8 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
|
|
655
710
|
0
|
656
711
|
);
|
657
712
|
|
713
|
+
context->slot->stats.recorded_samples++;
|
714
|
+
|
658
715
|
if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
|
659
716
|
read_ddogerr_string_and_drop(&result.err, context->error_msg, MAX_LEN_HEAP_ITERATION_ERROR_MSG);
|
660
717
|
context->error = true;
|
@@ -666,10 +723,10 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
|
|
666
723
|
return true;
|
667
724
|
}
|
668
725
|
|
669
|
-
static void build_heap_profile_without_gvl(struct stack_recorder_state *state,
|
726
|
+
static void build_heap_profile_without_gvl(struct stack_recorder_state *state, profile_slot *slot) {
|
670
727
|
heap_recorder_iteration_context iteration_context = {
|
671
728
|
.state = state,
|
672
|
-
.
|
729
|
+
.slot = slot,
|
673
730
|
.error = false,
|
674
731
|
.error_msg = {0},
|
675
732
|
};
|
@@ -689,15 +746,21 @@ static void build_heap_profile_without_gvl(struct stack_recorder_state *state, d
|
|
689
746
|
static void *call_serialize_without_gvl(void *call_args) {
|
690
747
|
struct call_serialize_without_gvl_arguments *args = (struct call_serialize_without_gvl_arguments *) call_args;
|
691
748
|
|
692
|
-
|
749
|
+
long serialize_no_gvl_start_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
750
|
+
|
751
|
+
profile_slot *slot_now_inactive = serializer_flip_active_and_inactive_slots(args->state);
|
752
|
+
|
753
|
+
args->slot = slot_now_inactive;
|
693
754
|
|
694
755
|
// Now that we have the inactive profile with all but heap samples, lets fill it with heap data
|
695
756
|
// without needing to race with the active sampler
|
696
|
-
build_heap_profile_without_gvl(args->state, args->
|
757
|
+
build_heap_profile_without_gvl(args->state, args->slot);
|
758
|
+
args->heap_profile_build_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - serialize_no_gvl_start_time_ns;
|
697
759
|
|
698
760
|
// Note: The profile gets reset by the serialize call
|
699
|
-
args->result = ddog_prof_Profile_serialize(args->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */);
|
761
|
+
args->result = ddog_prof_Profile_serialize(&args->slot->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */);
|
700
762
|
args->serialize_ran = true;
|
763
|
+
args->serialize_no_gvl_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - serialize_no_gvl_start_time_ns;
|
701
764
|
|
702
765
|
return NULL; // Unused
|
703
766
|
}
|
@@ -707,42 +770,42 @@ VALUE enforce_recorder_instance(VALUE object) {
|
|
707
770
|
return object;
|
708
771
|
}
|
709
772
|
|
710
|
-
static
|
773
|
+
static locked_profile_slot sampler_lock_active_profile(struct stack_recorder_state *state) {
|
711
774
|
int error;
|
712
775
|
|
713
776
|
for (int attempts = 0; attempts < 2; attempts++) {
|
714
|
-
error = pthread_mutex_trylock(&state->
|
777
|
+
error = pthread_mutex_trylock(&state->mutex_slot_one);
|
715
778
|
if (error && error != EBUSY) ENFORCE_SUCCESS_GVL(error);
|
716
779
|
|
717
780
|
// Slot one is active
|
718
|
-
if (!error) return (
|
781
|
+
if (!error) return (locked_profile_slot) {.mutex = &state->mutex_slot_one, .data = &state->profile_slot_one};
|
719
782
|
|
720
783
|
// If we got here, slot one was not active, let's try slot two
|
721
784
|
|
722
|
-
error = pthread_mutex_trylock(&state->
|
785
|
+
error = pthread_mutex_trylock(&state->mutex_slot_two);
|
723
786
|
if (error && error != EBUSY) ENFORCE_SUCCESS_GVL(error);
|
724
787
|
|
725
788
|
// Slot two is active
|
726
|
-
if (!error) return (
|
789
|
+
if (!error) return (locked_profile_slot) {.mutex = &state->mutex_slot_two, .data = &state->profile_slot_two};
|
727
790
|
}
|
728
791
|
|
729
792
|
// We already tried both multiple times, and we did not succeed. This is not expected to happen. Let's stop sampling.
|
730
793
|
rb_raise(rb_eRuntimeError, "Failed to grab either mutex in sampler_lock_active_profile");
|
731
794
|
}
|
732
795
|
|
733
|
-
static void sampler_unlock_active_profile(
|
796
|
+
static void sampler_unlock_active_profile(locked_profile_slot active_slot) {
|
734
797
|
ENFORCE_SUCCESS_GVL(pthread_mutex_unlock(active_slot.mutex));
|
735
798
|
}
|
736
799
|
|
737
|
-
static
|
800
|
+
static profile_slot* serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state) {
|
738
801
|
int previously_active_slot = state->active_slot;
|
739
802
|
|
740
803
|
if (previously_active_slot != 1 && previously_active_slot != 2) {
|
741
804
|
grab_gvl_and_raise(rb_eRuntimeError, "Unexpected active_slot state %d in serializer_flip_active_and_inactive_slots", previously_active_slot);
|
742
805
|
}
|
743
806
|
|
744
|
-
pthread_mutex_t *previously_active = (previously_active_slot == 1) ? &state->
|
745
|
-
pthread_mutex_t *previously_inactive = (previously_active_slot == 1) ? &state->
|
807
|
+
pthread_mutex_t *previously_active = (previously_active_slot == 1) ? &state->mutex_slot_one : &state->mutex_slot_two;
|
808
|
+
pthread_mutex_t *previously_inactive = (previously_active_slot == 1) ? &state->mutex_slot_two : &state->mutex_slot_one;
|
746
809
|
|
747
810
|
// Release the lock, thus making this slot active
|
748
811
|
ENFORCE_SUCCESS_NO_GVL(pthread_mutex_unlock(previously_inactive));
|
@@ -753,8 +816,8 @@ static ddog_prof_Profile *serializer_flip_active_and_inactive_slots(struct stack
|
|
753
816
|
// Update active_slot
|
754
817
|
state->active_slot = (previously_active_slot == 1) ? 2 : 1;
|
755
818
|
|
756
|
-
// Return
|
757
|
-
return (previously_active_slot == 1) ? &state->
|
819
|
+
// Return pointer to previously active slot (now inactive)
|
820
|
+
return (previously_active_slot == 1) ? &state->profile_slot_one : &state->profile_slot_two;
|
758
821
|
}
|
759
822
|
|
760
823
|
// This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
|
@@ -778,7 +841,7 @@ static VALUE test_slot_mutex_state(VALUE recorder_instance, int slot) {
|
|
778
841
|
struct stack_recorder_state *state;
|
779
842
|
TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
|
780
843
|
|
781
|
-
pthread_mutex_t *slot_mutex = (slot == 1) ? &state->
|
844
|
+
pthread_mutex_t *slot_mutex = (slot == 1) ? &state->mutex_slot_one : &state->mutex_slot_two;
|
782
845
|
|
783
846
|
// Like Heisenberg's uncertainty principle, we can't observe without affecting...
|
784
847
|
int error = pthread_mutex_trylock(slot_mutex);
|
@@ -813,8 +876,8 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
|
|
813
876
|
// resulting state is inconsistent, we make sure to reset it back to the initial state.
|
814
877
|
initialize_slot_concurrency_control(state);
|
815
878
|
|
816
|
-
|
817
|
-
|
879
|
+
reset_profile_slot(&state->profile_slot_one, /* start_time: */ NULL);
|
880
|
+
reset_profile_slot(&state->profile_slot_two, /* start_time: */ NULL);
|
818
881
|
|
819
882
|
heap_recorder_after_fork(state->heap_recorder);
|
820
883
|
|
@@ -825,8 +888,8 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
|
|
825
888
|
// not be interrupted part-way through by a VM fork.
|
826
889
|
static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time) {
|
827
890
|
// Before making this profile active, we reset it so that it uses the correct start_time for its start
|
828
|
-
|
829
|
-
|
891
|
+
profile_slot *next_profile_slot = (state->active_slot == 1) ? &state->profile_slot_two : &state->profile_slot_one;
|
892
|
+
reset_profile_slot(next_profile_slot, &start_time);
|
830
893
|
}
|
831
894
|
|
832
895
|
static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint) {
|
@@ -872,11 +935,12 @@ static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locatio
|
|
872
935
|
return Qnil;
|
873
936
|
}
|
874
937
|
|
875
|
-
static void
|
876
|
-
ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(profile, start_time);
|
938
|
+
static void reset_profile_slot(profile_slot *slot, ddog_Timespec *start_time /* Can be null */) {
|
939
|
+
ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(&slot->profile, start_time);
|
877
940
|
if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) {
|
878
941
|
rb_raise(rb_eRuntimeError, "Failed to reset profile: %"PRIsVALUE, get_error_details_and_drop(&reset_result.err));
|
879
942
|
}
|
943
|
+
slot->stats = (stats_slot) {};
|
880
944
|
}
|
881
945
|
|
882
946
|
// This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
|
@@ -937,3 +1001,40 @@ static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj) {
|
|
937
1001
|
return Qfalse;
|
938
1002
|
#endif
|
939
1003
|
}
|
1004
|
+
|
1005
|
+
static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE recorder_instance) {
|
1006
|
+
struct stack_recorder_state *state;
|
1007
|
+
TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
|
1008
|
+
|
1009
|
+
uint64_t total_serializations = state->stats_lifetime.serialization_successes + state->stats_lifetime.serialization_failures;
|
1010
|
+
|
1011
|
+
VALUE heap_recorder_snapshot = state->heap_recorder ?
|
1012
|
+
heap_recorder_state_snapshot(state->heap_recorder) : Qnil;
|
1013
|
+
|
1014
|
+
VALUE stats_as_hash = rb_hash_new();
|
1015
|
+
VALUE arguments[] = {
|
1016
|
+
ID2SYM(rb_intern("serialization_successes")), /* => */ ULL2NUM(state->stats_lifetime.serialization_successes),
|
1017
|
+
ID2SYM(rb_intern("serialization_failures")), /* => */ ULL2NUM(state->stats_lifetime.serialization_failures),
|
1018
|
+
|
1019
|
+
ID2SYM(rb_intern("serialization_time_ns_min")), /* => */ RUBY_NUM_OR_NIL(state->stats_lifetime.serialization_time_ns_min, != INT64_MAX, LONG2NUM),
|
1020
|
+
ID2SYM(rb_intern("serialization_time_ns_max")), /* => */ RUBY_NUM_OR_NIL(state->stats_lifetime.serialization_time_ns_max, > 0, LONG2NUM),
|
1021
|
+
ID2SYM(rb_intern("serialization_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats_lifetime.serialization_time_ns_total, > 0, LONG2NUM),
|
1022
|
+
ID2SYM(rb_intern("serialization_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats_lifetime.serialization_time_ns_total, total_serializations),
|
1023
|
+
|
1024
|
+
ID2SYM(rb_intern("heap_recorder_snapshot")), /* => */ heap_recorder_snapshot,
|
1025
|
+
};
|
1026
|
+
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
|
1027
|
+
return stats_as_hash;
|
1028
|
+
}
|
1029
|
+
|
1030
|
+
static VALUE build_profile_stats(profile_slot *slot, long serialization_time_ns, long heap_iteration_prep_time_ns, long heap_profile_build_time_ns) {
|
1031
|
+
VALUE stats_as_hash = rb_hash_new();
|
1032
|
+
VALUE arguments[] = {
|
1033
|
+
ID2SYM(rb_intern("recorded_samples")), /* => */ ULL2NUM(slot->stats.recorded_samples),
|
1034
|
+
ID2SYM(rb_intern("serialization_time_ns")), /* => */ LONG2NUM(serialization_time_ns),
|
1035
|
+
ID2SYM(rb_intern("heap_iteration_prep_time_ns")), /* => */ LONG2NUM(heap_iteration_prep_time_ns),
|
1036
|
+
ID2SYM(rb_intern("heap_profile_build_time_ns")), /* => */ LONG2NUM(heap_profile_build_time_ns),
|
1037
|
+
};
|
1038
|
+
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
|
1039
|
+
return stats_as_hash;
|
1040
|
+
}
|
@@ -13,12 +13,16 @@ module Datadog
|
|
13
13
|
SIGNUP_EVENT = 'users.signup'
|
14
14
|
|
15
15
|
def self.track_login_success(trace, span, user_id:, **others)
|
16
|
+
return if trace.nil? || span.nil?
|
17
|
+
|
16
18
|
track(LOGIN_SUCCESS_EVENT, trace, span, **others)
|
17
19
|
|
18
20
|
Kit::Identity.set_user(trace, span, id: user_id.to_s, **others) if user_id
|
19
21
|
end
|
20
22
|
|
21
23
|
def self.track_login_failure(trace, span, user_id:, user_exists:, **others)
|
24
|
+
return if trace.nil? || span.nil?
|
25
|
+
|
22
26
|
track(LOGIN_FAILURE_EVENT, trace, span, **others)
|
23
27
|
|
24
28
|
span.set_tag('appsec.events.users.login.failure.usr.id', user_id) if user_id
|
@@ -26,11 +30,15 @@ module Datadog
|
|
26
30
|
end
|
27
31
|
|
28
32
|
def self.track_signup(trace, span, user_id:, **others)
|
33
|
+
return if trace.nil? || span.nil?
|
34
|
+
|
29
35
|
track(SIGNUP_EVENT, trace, span, **others)
|
30
36
|
Kit::Identity.set_user(trace, id: user_id.to_s, **others) if user_id
|
31
37
|
end
|
32
38
|
|
33
39
|
def self.track(event, trace, span, **others)
|
40
|
+
return if trace.nil? || span.nil?
|
41
|
+
|
34
42
|
span.set_tag("appsec.events.#{event}.track", 'true')
|
35
43
|
span.set_tag("_dd.appsec.events.#{event}.auto.mode", Datadog.configuration.appsec.track_user_events.mode)
|
36
44
|
|
@@ -4,7 +4,7 @@ require_relative '../diagnostics/environment_logger'
|
|
4
4
|
require_relative '../diagnostics/health'
|
5
5
|
require_relative '../logger'
|
6
6
|
require_relative '../runtime/metrics'
|
7
|
-
require_relative '../telemetry/
|
7
|
+
require_relative '../telemetry/component'
|
8
8
|
require_relative '../workers/runtime_metrics'
|
9
9
|
|
10
10
|
require_relative '../remote/component'
|
@@ -60,7 +60,7 @@ module Datadog
|
|
60
60
|
logger.debug { "Telemetry disabled. Agent network adapter not supported: #{agent_settings.adapter}" }
|
61
61
|
end
|
62
62
|
|
63
|
-
Telemetry::
|
63
|
+
Telemetry::Component.new(
|
64
64
|
enabled: enabled,
|
65
65
|
heartbeat_interval_seconds: settings.telemetry.heartbeat_interval_seconds,
|
66
66
|
dependency_collection: settings.telemetry.dependency_collection
|
@@ -165,8 +165,9 @@ module Datadog
|
|
165
165
|
unused_statsd = (old_statsd - (old_statsd & new_statsd))
|
166
166
|
unused_statsd.each(&:close)
|
167
167
|
|
168
|
-
telemetry
|
168
|
+
# enqueue closing event before stopping telemetry so it will be send out on shutdown
|
169
169
|
telemetry.emit_closing! unless replacement
|
170
|
+
telemetry.stop!
|
170
171
|
end
|
171
172
|
end
|
172
173
|
end
|
@@ -81,23 +81,16 @@ module Datadog
|
|
81
81
|
configuration = self.configuration
|
82
82
|
yield(configuration)
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
components = safely_synchronize do |write_components|
|
84
|
+
safely_synchronize do |write_components|
|
87
85
|
write_components.call(
|
88
86
|
if components?
|
89
87
|
replace_components!(configuration, @components)
|
90
88
|
else
|
91
|
-
|
92
|
-
built_components = true
|
93
|
-
components
|
89
|
+
build_components(configuration)
|
94
90
|
end
|
95
91
|
)
|
96
92
|
end
|
97
93
|
|
98
|
-
# Should only be called the first time components are built
|
99
|
-
components.telemetry.started! if built_components
|
100
|
-
|
101
94
|
configuration
|
102
95
|
end
|
103
96
|
|
@@ -197,20 +190,13 @@ module Datadog
|
|
197
190
|
current_components = COMPONENTS_READ_LOCK.synchronize { defined?(@components) && @components }
|
198
191
|
return current_components if current_components || !allow_initialization
|
199
192
|
|
200
|
-
|
201
|
-
|
202
|
-
components = safely_synchronize do |write_components|
|
193
|
+
safely_synchronize do |write_components|
|
203
194
|
if defined?(@components) && @components
|
204
195
|
@components
|
205
196
|
else
|
206
|
-
built_components = true
|
207
197
|
write_components.call(build_components(configuration))
|
208
198
|
end
|
209
199
|
end
|
210
|
-
|
211
|
-
# Should only be called the first time components are built
|
212
|
-
components.telemetry.started! if built_components && components && components.telemetry
|
213
|
-
components
|
214
200
|
end
|
215
201
|
|
216
202
|
private
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'emitter'
|
4
|
+
require_relative 'event'
|
5
|
+
require_relative 'worker'
|
6
|
+
require_relative '../utils/forking'
|
7
|
+
|
8
|
+
module Datadog
|
9
|
+
module Core
|
10
|
+
module Telemetry
|
11
|
+
# Telemetry entrypoint, coordinates sending telemetry events at various points in app lifecycle.
|
12
|
+
class Component
|
13
|
+
attr_reader :enabled
|
14
|
+
|
15
|
+
include Core::Utils::Forking
|
16
|
+
|
17
|
+
# @param enabled [Boolean] Determines whether telemetry events should be sent to the API
|
18
|
+
# @param heartbeat_interval_seconds [Float] How frequently heartbeats will be reported, in seconds.
|
19
|
+
# @param [Boolean] dependency_collection Whether to send the `app-dependencies-loaded` event
|
20
|
+
def initialize(heartbeat_interval_seconds:, dependency_collection:, enabled: true)
|
21
|
+
@enabled = enabled
|
22
|
+
@stopped = false
|
23
|
+
|
24
|
+
@worker = Telemetry::Worker.new(
|
25
|
+
enabled: @enabled,
|
26
|
+
heartbeat_interval_seconds: heartbeat_interval_seconds,
|
27
|
+
emitter: Emitter.new,
|
28
|
+
dependency_collection: dependency_collection
|
29
|
+
)
|
30
|
+
@worker.start
|
31
|
+
end
|
32
|
+
|
33
|
+
def disable!
|
34
|
+
@enabled = false
|
35
|
+
@worker.enabled = false
|
36
|
+
end
|
37
|
+
|
38
|
+
def stop!
|
39
|
+
return if @stopped
|
40
|
+
|
41
|
+
@worker.stop(true)
|
42
|
+
@stopped = true
|
43
|
+
end
|
44
|
+
|
45
|
+
def emit_closing!
|
46
|
+
return if !@enabled || forked?
|
47
|
+
|
48
|
+
@worker.enqueue(Event::AppClosing.new)
|
49
|
+
end
|
50
|
+
|
51
|
+
def integrations_change!
|
52
|
+
return if !@enabled || forked?
|
53
|
+
|
54
|
+
@worker.enqueue(Event::AppIntegrationsChange.new)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Report configuration changes caused by Remote Configuration.
|
58
|
+
def client_configuration_change!(changes)
|
59
|
+
return if !@enabled || forked?
|
60
|
+
|
61
|
+
@worker.enqueue(Event::AppClientConfigurationChange.new(changes, 'remote_config'))
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -24,7 +24,7 @@ module Datadog
|
|
24
24
|
seq_id = self.class.sequence.next
|
25
25
|
payload = Request.build_payload(event, seq_id)
|
26
26
|
res = @http_transport.request(request_type: event.type, payload: payload.to_json)
|
27
|
-
Datadog.logger.debug { "Telemetry sent for event `#{event.type}` (
|
27
|
+
Datadog.logger.debug { "Telemetry sent for event `#{event.type}` (code: #{res.code.inspect})" }
|
28
28
|
res
|
29
29
|
rescue => e
|
30
30
|
Datadog.logger.debug("Unable to send telemetry request for event `#{event.type rescue 'unknown'}`: #{e}")
|