datadog 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +51 -2
  3. data/ext/datadog_profiling_loader/extconf.rb +15 -15
  4. data/ext/datadog_profiling_native_extension/clock_id.h +1 -0
  5. data/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +1 -2
  6. data/ext/datadog_profiling_native_extension/clock_id_noop.c +1 -2
  7. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +113 -43
  8. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +49 -26
  9. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +34 -4
  10. data/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +4 -0
  11. data/ext/datadog_profiling_native_extension/collectors_stack.c +49 -37
  12. data/ext/datadog_profiling_native_extension/collectors_stack.h +2 -2
  13. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +81 -19
  14. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +1 -0
  15. data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +110 -0
  16. data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +57 -0
  17. data/ext/datadog_profiling_native_extension/extconf.rb +65 -60
  18. data/ext/datadog_profiling_native_extension/heap_recorder.c +34 -6
  19. data/ext/datadog_profiling_native_extension/heap_recorder.h +3 -1
  20. data/ext/datadog_profiling_native_extension/helpers.h +6 -17
  21. data/ext/datadog_profiling_native_extension/http_transport.c +3 -3
  22. data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +0 -86
  23. data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +2 -23
  24. data/ext/datadog_profiling_native_extension/native_extension_helpers.rb +61 -172
  25. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +64 -138
  26. data/ext/datadog_profiling_native_extension/private_vm_api_access.h +17 -11
  27. data/ext/datadog_profiling_native_extension/profiling.c +0 -2
  28. data/ext/datadog_profiling_native_extension/ruby_helpers.c +0 -33
  29. data/ext/datadog_profiling_native_extension/ruby_helpers.h +1 -26
  30. data/ext/datadog_profiling_native_extension/setup_signal_handler.h +1 -0
  31. data/ext/datadog_profiling_native_extension/stack_recorder.c +14 -2
  32. data/ext/datadog_profiling_native_extension/stack_recorder.h +1 -0
  33. data/ext/datadog_profiling_native_extension/time_helpers.c +0 -15
  34. data/ext/datadog_profiling_native_extension/time_helpers.h +36 -6
  35. data/ext/{datadog_profiling_native_extension → libdatadog_api}/crashtracker.c +19 -6
  36. data/ext/libdatadog_api/datadog_ruby_common.c +110 -0
  37. data/ext/libdatadog_api/datadog_ruby_common.h +57 -0
  38. data/ext/libdatadog_api/extconf.rb +108 -0
  39. data/ext/libdatadog_api/macos_development.md +26 -0
  40. data/ext/libdatadog_extconf_helpers.rb +130 -0
  41. data/lib/datadog/appsec/contrib/graphql/appsec_trace.rb +49 -0
  42. data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +73 -0
  43. data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +68 -0
  44. data/lib/datadog/appsec/contrib/graphql/integration.rb +41 -0
  45. data/lib/datadog/appsec/contrib/graphql/patcher.rb +37 -0
  46. data/lib/datadog/appsec/contrib/graphql/reactive/multiplex.rb +59 -0
  47. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +1 -1
  48. data/lib/datadog/appsec/processor/actions.rb +1 -1
  49. data/lib/datadog/appsec/response.rb +15 -1
  50. data/lib/datadog/appsec.rb +1 -0
  51. data/lib/datadog/core/configuration/components.rb +14 -12
  52. data/lib/datadog/core/configuration/settings.rb +54 -7
  53. data/lib/datadog/core/crashtracking/agent_base_url.rb +21 -0
  54. data/lib/datadog/core/crashtracking/component.rb +111 -0
  55. data/lib/datadog/core/crashtracking/tag_builder.rb +39 -0
  56. data/lib/datadog/core/diagnostics/environment_logger.rb +8 -11
  57. data/lib/datadog/core/telemetry/component.rb +49 -2
  58. data/lib/datadog/core/telemetry/emitter.rb +9 -11
  59. data/lib/datadog/core/telemetry/event.rb +32 -1
  60. data/lib/datadog/core/telemetry/ext.rb +1 -0
  61. data/lib/datadog/core/telemetry/http/adapters/net.rb +10 -12
  62. data/lib/datadog/core/telemetry/http/ext.rb +3 -0
  63. data/lib/datadog/core/telemetry/http/transport.rb +38 -9
  64. data/lib/datadog/core/telemetry/logging.rb +35 -0
  65. data/lib/datadog/core/utils/at_fork_monkey_patch.rb +102 -0
  66. data/lib/datadog/kit/appsec/events.rb +2 -4
  67. data/lib/datadog/opentelemetry/sdk/span_processor.rb +10 -0
  68. data/lib/datadog/opentelemetry/sdk/trace/span.rb +23 -0
  69. data/lib/datadog/profiling/collectors/code_provenance.rb +7 -7
  70. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +17 -17
  71. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +11 -13
  72. data/lib/datadog/profiling/collectors/info.rb +3 -3
  73. data/lib/datadog/profiling/collectors/thread_context.rb +4 -2
  74. data/lib/datadog/profiling/component.rb +69 -91
  75. data/lib/datadog/profiling/exporter.rb +3 -3
  76. data/lib/datadog/profiling/ext/dir_monkey_patches.rb +3 -3
  77. data/lib/datadog/profiling/ext.rb +21 -21
  78. data/lib/datadog/profiling/flush.rb +1 -1
  79. data/lib/datadog/profiling/http_transport.rb +8 -6
  80. data/lib/datadog/profiling/load_native_extension.rb +5 -5
  81. data/lib/datadog/profiling/preload.rb +1 -1
  82. data/lib/datadog/profiling/profiler.rb +5 -8
  83. data/lib/datadog/profiling/scheduler.rb +31 -25
  84. data/lib/datadog/profiling/tag_builder.rb +2 -2
  85. data/lib/datadog/profiling/tasks/exec.rb +5 -5
  86. data/lib/datadog/profiling/tasks/setup.rb +16 -35
  87. data/lib/datadog/profiling.rb +4 -5
  88. data/lib/datadog/tracing/contrib/active_record/events/sql.rb +1 -0
  89. data/lib/datadog/tracing/contrib/ext.rb +14 -0
  90. data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +1 -1
  91. data/lib/datadog/tracing/contrib/graphql/unified_trace_patcher.rb +4 -1
  92. data/lib/datadog/tracing/contrib/lograge/patcher.rb +16 -0
  93. data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +5 -0
  94. data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +17 -13
  95. data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +5 -0
  96. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +4 -1
  97. data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +28 -0
  98. data/lib/datadog/tracing/contrib/propagation/sql_comment/mode.rb +5 -1
  99. data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +22 -10
  100. data/lib/datadog/tracing/contrib/trilogy/configuration/settings.rb +5 -0
  101. data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +4 -1
  102. data/lib/datadog/tracing/diagnostics/environment_logger.rb +14 -16
  103. data/lib/datadog/tracing/metadata/errors.rb +9 -1
  104. data/lib/datadog/tracing/metadata/ext.rb +4 -0
  105. data/lib/datadog/tracing/pipeline/span_filter.rb +2 -2
  106. data/lib/datadog/tracing/span.rb +9 -2
  107. data/lib/datadog/tracing/span_event.rb +41 -0
  108. data/lib/datadog/tracing/span_operation.rb +6 -2
  109. data/lib/datadog/tracing/transport/serializable_trace.rb +3 -0
  110. data/lib/datadog/version.rb +1 -1
  111. metadata +28 -10
  112. data/lib/datadog/profiling/crashtracker.rb +0 -91
  113. data/lib/datadog/profiling/ext/forking.rb +0 -98
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 895fe8f9fdd8391d5c6c86e8d39d0a8e241c85bbce42b35c9744e1f94095853f
4
- data.tar.gz: d5a6e88ec35816de59a5d080f66896a4c85656216d2853b46bb1268dfa64df35
3
+ metadata.gz: 5d6610c8ef7e86c023f8a3fca884807bd7e9cf6b84fc6cbdd79b98e8a8762c2e
4
+ data.tar.gz: 236fafc4b8e2c809552d97c8eb025654c0ef4279685c741ad9504b2af8265b94
5
5
  SHA512:
6
- metadata.gz: fef3c78c7835c47507a1f09d87c2ee84ddcc97303f9b9d7c6b4d601381ac62a47133f3450ddbc888cf54c56353ef4417ff337009a1e60d2ae239fb19093d721b
7
- data.tar.gz: 061154162ab97a6e1cdc87f53c18d98fd0eea75b6cb7e71fd1e4e5c4a536ff7722c4e1202258760113ce5a7f0ce3838622e700858103fa97dddc33d0322275cc
6
+ metadata.gz: e633db76f69b5d151629cde5e1b7024a6bea43343aa348ea50857d5e74d049d2b6a833be252c6c1c7aee95976805a795e1607a81d42e8c3246126abd9811777d
7
+ data.tar.gz: fb2bcf3803689d8e499f2266e7d063f1c5b9b7d08bb28b5d27f81bd16e7e373c01c1d5224027d43229a5ca609f66c5179aa521ef272f347d069ca73e3613bdc3
data/CHANGELOG.md CHANGED
@@ -2,6 +2,34 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [2.3.0] - 2024-08-22
6
+
7
+ ### Added
8
+
9
+ * Core: Support agentless telemetry ([#3779][])
10
+ * Tracing: Add support for span events ([#3776][])
11
+ * Tracing: Add tags to enable inferred service dependencies for databases ([#3789][])
12
+ * Tracing: Emit log message and instructions for incompatible Lograge setup ([#3812][], [#3839][])
13
+ * Tracing: Add `append_comment` option to append SQL comment propagation for `mysql`, `pg` and `trilogy` ([#3809][])
14
+ * AppSec: Add threat detection and protection for `graphql` ([#3769][], [#3814][])
15
+
16
+ ### Changed
17
+
18
+ * Core: Enable crashtracking by default ([#3826][])
19
+ * Profiling: Reduce allocation overhead ([#3805][], [#3797][])
20
+ * Profiling: Speed up stack sampling ([#3837][])
21
+ * Profiling: Upgrade to libdatadog 11 ([#3799][])
22
+ * Profiling: Disable allocation counting feature by default ([#3798][])
23
+ * Profiling: Reduce the maximum biased result for allocation samples ([#3793][])
24
+ * Tracing: Reduce noisy integration logs ([#3785][])
25
+
26
+ ### Fixed
27
+
28
+ * Tracing: Fix `require` issue for `graphql` ([#3813][])
29
+ * AppSec: Fix an error when parsing http headers with integer value ([#3790][])
30
+ * AppSec: Fix an error when tracking login failure without `user_id` ([#3841][])
31
+ * Fix a syntax error for Ruby < 2.4 during single step instrumentation ([#3795][])
32
+
5
33
  ## [2.2.0] - 2024-07-11
6
34
 
7
35
  ### Added
@@ -2934,7 +2962,8 @@ Release notes: https://github.com/DataDog/dd-trace-rb/releases/tag/v0.3.1
2934
2962
  Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
2935
2963
 
2936
2964
 
2937
- [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.2.0...master
2965
+ [Unreleased]: https://github.com/DataDog/dd-trace-rb/compare/v2.3.0...master
2966
+ [2.3.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.2.0...v2.3.0
2938
2967
  [2.2.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.1.0...v2.2.0
2939
2968
  [2.1.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.0.0...v2.1.0
2940
2969
  [2.0.0]: https://github.com/DataDog/dd-trace-rb/compare/v2.0.0.rc1...v2.0.0
@@ -4331,9 +4360,29 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
4331
4360
  [#3753]: https://github.com/DataDog/dd-trace-rb/issues/3753
4332
4361
  [#3757]: https://github.com/DataDog/dd-trace-rb/issues/3757
4333
4362
  [#3759]: https://github.com/DataDog/dd-trace-rb/issues/3759
4363
+ [#3769]: https://github.com/DataDog/dd-trace-rb/issues/3769
4334
4364
  [#3770]: https://github.com/DataDog/dd-trace-rb/issues/3770
4335
4365
  [#3772]: https://github.com/DataDog/dd-trace-rb/issues/3772
4336
4366
  [#3774]: https://github.com/DataDog/dd-trace-rb/issues/3774
4367
+ [#3776]: https://github.com/DataDog/dd-trace-rb/issues/3776
4368
+ [#3779]: https://github.com/DataDog/dd-trace-rb/issues/3779
4369
+ [#3785]: https://github.com/DataDog/dd-trace-rb/issues/3785
4370
+ [#3789]: https://github.com/DataDog/dd-trace-rb/issues/3789
4371
+ [#3790]: https://github.com/DataDog/dd-trace-rb/issues/3790
4372
+ [#3793]: https://github.com/DataDog/dd-trace-rb/issues/3793
4373
+ [#3795]: https://github.com/DataDog/dd-trace-rb/issues/3795
4374
+ [#3797]: https://github.com/DataDog/dd-trace-rb/issues/3797
4375
+ [#3798]: https://github.com/DataDog/dd-trace-rb/issues/3798
4376
+ [#3799]: https://github.com/DataDog/dd-trace-rb/issues/3799
4377
+ [#3805]: https://github.com/DataDog/dd-trace-rb/issues/3805
4378
+ [#3809]: https://github.com/DataDog/dd-trace-rb/issues/3809
4379
+ [#3812]: https://github.com/DataDog/dd-trace-rb/issues/3812
4380
+ [#3813]: https://github.com/DataDog/dd-trace-rb/issues/3813
4381
+ [#3814]: https://github.com/DataDog/dd-trace-rb/issues/3814
4382
+ [#3826]: https://github.com/DataDog/dd-trace-rb/issues/3826
4383
+ [#3837]: https://github.com/DataDog/dd-trace-rb/issues/3837
4384
+ [#3839]: https://github.com/DataDog/dd-trace-rb/issues/3839
4385
+ [#3841]: https://github.com/DataDog/dd-trace-rb/issues/3841
4337
4386
  [@AdrianLC]: https://github.com/AdrianLC
4338
4387
  [@Azure7111]: https://github.com/Azure7111
4339
4388
  [@BabyGroot]: https://github.com/BabyGroot
@@ -4485,4 +4534,4 @@ Git diff: https://github.com/DataDog/dd-trace-rb/compare/v0.3.0...v0.3.1
4485
4534
  [@y-yagi]: https://github.com/y-yagi
4486
4535
  [@yujideveloper]: https://github.com/yujideveloper
4487
4536
  [@yukimurasawa]: https://github.com/yukimurasawa
4488
- [@zachmccormick]: https://github.com/zachmccormick
4537
+ [@zachmccormick]: https://github.com/zachmccormick
@@ -1,22 +1,22 @@
1
1
  # rubocop:disable Style/StderrPuts
2
2
  # rubocop:disable Style/GlobalVars
3
3
 
4
- if RUBY_ENGINE != 'ruby' || Gem.win_platform?
4
+ if RUBY_ENGINE != "ruby" || Gem.win_platform?
5
5
  $stderr.puts(
6
- 'WARN: Skipping build of Datadog profiling loader. See Datadog profiling native extension note for details.'
6
+ "WARN: Skipping build of Datadog profiling loader. See Datadog profiling native extension note for details."
7
7
  )
8
8
 
9
- File.write('Makefile', 'all install clean: # dummy makefile that does nothing')
9
+ File.write("Makefile", "all install clean: # dummy makefile that does nothing")
10
10
  exit
11
11
  end
12
12
 
13
- require 'mkmf'
13
+ require "mkmf"
14
14
 
15
15
  # mkmf on modern Rubies actually has an append_cflags that does something similar
16
16
  # (see https://github.com/ruby/ruby/pull/5760), but as usual we need a bit more boilerplate to deal with legacy Rubies
17
17
  def add_compiler_flag(flag)
18
18
  if try_cflags(flag)
19
- $CFLAGS << ' ' << flag
19
+ $CFLAGS << " " << flag
20
20
  else
21
21
  $stderr.puts("WARNING: '#{flag}' not accepted by compiler, skipping it")
22
22
  end
@@ -24,26 +24,26 @@ end
24
24
 
25
25
  # Because we can't control what compiler versions our customers use, shipping with -Werror by default is a no-go.
26
26
  # But we can enable it in CI, so that we quickly spot any new warnings that just got introduced.
27
- add_compiler_flag '-Werror' if ENV['DATADOG_GEM_CI'] == 'true'
27
+ add_compiler_flag "-Werror" if ENV["DATADOG_GEM_CI"] == "true"
28
28
 
29
29
  # Older gcc releases may not default to C99 and we need to ask for this. This is also used:
30
30
  # * by upstream Ruby -- search for gnu99 in the codebase
31
31
  # * by msgpack, another datadog gem dependency
32
32
  # (https://github.com/msgpack/msgpack-ruby/blob/18ce08f6d612fe973843c366ac9a0b74c4e50599/ext/msgpack/extconf.rb#L8)
33
- add_compiler_flag '-std=gnu99'
33
+ add_compiler_flag "-std=gnu99"
34
34
 
35
35
  # Gets really noisy when we include the MJIT header, let's omit it (TODO: Use #pragma GCC diagnostic instead?)
36
- add_compiler_flag '-Wno-unused-function'
36
+ add_compiler_flag "-Wno-unused-function"
37
37
 
38
38
  # Allow defining variables at any point in a function
39
- add_compiler_flag '-Wno-declaration-after-statement'
39
+ add_compiler_flag "-Wno-declaration-after-statement"
40
40
 
41
41
  # If we forget to include a Ruby header, the function call may still appear to work, but then
42
42
  # cause a segfault later. Let's ensure that never happens.
43
- add_compiler_flag '-Werror-implicit-function-declaration'
43
+ add_compiler_flag "-Werror-implicit-function-declaration"
44
44
 
45
45
  # Warn on unused parameters to functions. Use `DDTRACE_UNUSED` to mark things as known-to-not-be-used.
46
- add_compiler_flag '-Wunused-parameter'
46
+ add_compiler_flag "-Wunused-parameter"
47
47
 
48
48
  # The native extension is not intended to expose any symbols/functions for other native libraries to use;
49
49
  # the sole exception being `Init_datadog_profiling_loader` which needs to be visible for Ruby to call it when
@@ -51,14 +51,14 @@ add_compiler_flag '-Wunused-parameter'
51
51
  #
52
52
  # By setting this compiler flag, we tell it to assume that everything is private unless explicitly stated.
53
53
  # For more details see https://gcc.gnu.org/wiki/Visibility
54
- add_compiler_flag '-fvisibility=hidden'
54
+ add_compiler_flag "-fvisibility=hidden"
55
55
 
56
56
  # Avoid legacy C definitions
57
- add_compiler_flag '-Wold-style-definition'
57
+ add_compiler_flag "-Wold-style-definition"
58
58
 
59
59
  # Enable all other compiler warnings
60
- add_compiler_flag '-Wall'
61
- add_compiler_flag '-Wextra'
60
+ add_compiler_flag "-Wall"
61
+ add_compiler_flag "-Wextra"
62
62
 
63
63
  # Tag the native extension library with the Ruby version and Ruby platform.
64
64
  # This makes it easier for development (avoids "oops I forgot to rebuild when I switched my Ruby") and ensures that
@@ -2,6 +2,7 @@
2
2
 
3
3
  #include <stdbool.h>
4
4
  #include <time.h>
5
+ #include <ruby.h>
5
6
 
6
7
  // Contains the operating-system specific identifier needed to fetch CPU-time, and a flag to indicate if we failed to fetch it
7
8
  typedef struct thread_cpu_time_id {
@@ -7,11 +7,10 @@
7
7
  #include <pthread.h>
8
8
  #include <time.h>
9
9
  #include <errno.h>
10
- #include <ruby.h>
11
10
 
11
+ #include "clock_id.h"
12
12
  #include "helpers.h"
13
13
  #include "private_vm_api_access.h"
14
- #include "clock_id.h"
15
14
  #include "time_helpers.h"
16
15
 
17
16
  // Validate that our home-cooked pthread_id_for() matches pthread_self() for the current thread
@@ -4,10 +4,9 @@
4
4
  // is not available.
5
5
  #ifndef HAVE_PTHREAD_GETCPUCLOCKID
6
6
 
7
- #include <ruby.h>
8
-
9
7
  #include "clock_id.h"
10
8
  #include "helpers.h"
9
+ #include "datadog_ruby_common.h"
11
10
 
12
11
  void self_test_clock_id(void) { } // Nothing to check
13
12
 
@@ -20,7 +20,9 @@
20
20
  #define ERR_CLOCK_FAIL "failed to get clock time"
21
21
 
22
22
  // Maximum allowed value for an allocation weight. Attempts to use higher values will result in clamping.
23
- unsigned int MAX_ALLOC_WEIGHT = 65535;
23
+ // See https://docs.google.com/document/d/1lWLB714wlLBBq6T4xZyAc4a5wtWhSmr4-hgiPKeErlA/edit#heading=h.ugp0zxcj5iqh
24
+ // (Datadog-only link) for research backing the choice of this value.
25
+ unsigned int MAX_ALLOC_WEIGHT = 10000;
24
26
 
25
27
  // Used to trigger the execution of Collectors::ThreadState, which implements all of the sampling logic
26
28
  // itself; this class only implements the "when to do it" part.
@@ -96,6 +98,7 @@ struct cpu_and_wall_time_worker_state {
96
98
  bool no_signals_workaround_enabled;
97
99
  bool dynamic_sampling_rate_enabled;
98
100
  bool allocation_profiling_enabled;
101
+ bool allocation_counting_enabled;
99
102
  bool skip_idle_samples_for_testing;
100
103
  VALUE self_instance;
101
104
  VALUE thread_context_collector_instance;
@@ -104,7 +107,6 @@ struct cpu_and_wall_time_worker_state {
104
107
  dynamic_sampling_rate_state cpu_dynamic_sampling_rate;
105
108
  discrete_dynamic_sampler allocation_sampler;
106
109
  VALUE gc_tracepoint; // Used to get gc start/finish information
107
- VALUE object_allocation_tracepoint; // Used to get allocation counts and allocation profiling
108
110
 
109
111
  // These are mutable and used to signal things between the worker thread and other threads
110
112
 
@@ -117,7 +119,7 @@ struct cpu_and_wall_time_worker_state {
117
119
 
118
120
  // Others
119
121
 
120
- // Used to detect/avoid nested sampling, e.g. when the object_allocation_tracepoint gets triggered by a memory allocation
122
+ // Used to detect/avoid nested sampling, e.g. when on_newobj_event gets triggered by a memory allocation
121
123
  // that happens during another sample.
122
124
  bool during_sample;
123
125
 
@@ -181,6 +183,7 @@ static VALUE _native_initialize(
181
183
  VALUE dynamic_sampling_rate_enabled,
182
184
  VALUE dynamic_sampling_rate_overhead_target_percentage,
183
185
  VALUE allocation_profiling_enabled,
186
+ VALUE allocation_counting_enabled,
184
187
  VALUE skip_idle_samples_for_testing
185
188
  );
186
189
  static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
@@ -216,7 +219,7 @@ static void grab_gvl_and_sample(void);
216
219
  static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state);
217
220
  static void sleep_for(uint64_t time_ns);
218
221
  static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self);
219
- static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused);
222
+ static void on_newobj_event(DDTRACE_UNUSED VALUE unused1, DDTRACE_UNUSED void *unused2);
220
223
  static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state);
221
224
  static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self);
222
225
  static VALUE rescued_sample_allocation(VALUE tracepoint_data);
@@ -225,6 +228,20 @@ static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VA
225
228
  static VALUE _native_hold_signals(DDTRACE_UNUSED VALUE self);
226
229
  static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self);
227
230
 
231
+ // We're using `on_newobj_event` function with `rb_add_event_hook2`, which requires in its public signature a function
232
+ // with signature `rb_event_hook_func_t` which doesn't match `on_newobj_event`.
233
+ //
234
+ // But in practice, because we pass the `RUBY_EVENT_HOOK_FLAG_RAW_ARG` flag to `rb_add_event_hook2`, it casts the
235
+ // expected signature into a `rb_event_hook_raw_arg_func_t`:
236
+ // > typedef void (*rb_event_hook_raw_arg_func_t)(VALUE data, const rb_trace_arg_t *arg); (from vm_trace.c)
237
+ // which does match `on_newobj_event`.
238
+ //
239
+ // So TL;DR we're just doing this here to avoid the warning and explain why the apparent mismatch in function signatures.
240
+ #pragma GCC diagnostic push
241
+ #pragma GCC diagnostic ignored "-Wcast-function-type"
242
+ static const rb_event_hook_func_t on_newobj_event_as_hook = (rb_event_hook_func_t) on_newobj_event;
243
+ #pragma GCC diagnostic pop
244
+
228
245
  // Note on sampler global state safety:
229
246
  //
230
247
  // Both `active_sampler_instance` and `active_sampler_instance_state` are **GLOBAL** state. Be careful when accessing
@@ -278,7 +295,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
278
295
  // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
279
296
  rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
280
297
 
281
- rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
298
+ rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 10);
282
299
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
283
300
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
284
301
  rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
@@ -316,6 +333,8 @@ static const rb_data_type_t cpu_and_wall_time_worker_typed_data = {
316
333
  };
317
334
 
318
335
  static VALUE _native_new(VALUE klass) {
336
+ long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
337
+
319
338
  struct cpu_and_wall_time_worker_state *state = ruby_xcalloc(1, sizeof(struct cpu_and_wall_time_worker_state));
320
339
 
321
340
  // Note: Any exceptions raised from this note until the TypedData_Wrap_Struct call will lead to the state memory
@@ -325,13 +344,13 @@ static VALUE _native_new(VALUE klass) {
325
344
  state->no_signals_workaround_enabled = false;
326
345
  state->dynamic_sampling_rate_enabled = true;
327
346
  state->allocation_profiling_enabled = false;
347
+ state->allocation_counting_enabled = false;
328
348
  state->skip_idle_samples_for_testing = false;
329
349
  state->thread_context_collector_instance = Qnil;
330
350
  state->idle_sampling_helper_instance = Qnil;
331
351
  state->owner_thread = Qnil;
332
352
  dynamic_sampling_rate_init(&state->cpu_dynamic_sampling_rate);
333
353
  state->gc_tracepoint = Qnil;
334
- state->object_allocation_tracepoint = Qnil;
335
354
 
336
355
  atomic_init(&state->should_run, false);
337
356
  state->failure_exception = Qnil;
@@ -340,15 +359,12 @@ static VALUE _native_new(VALUE klass) {
340
359
  state->during_sample = false;
341
360
 
342
361
  reset_stats_not_thread_safe(state);
343
-
344
- long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
345
- if (now == 0) {
346
- ruby_xfree(state);
347
- rb_raise(rb_eRuntimeError, ERR_CLOCK_FAIL);
348
- }
349
-
350
362
  discrete_dynamic_sampler_init(&state->allocation_sampler, "allocation", now);
351
363
 
364
+ // Note: As of this writing, no new Ruby objects get created and stored in the state. If that ever changes, remember
365
+ // to keep them on the stack and mark them with RB_GC_GUARD -- otherwise it's possible for a GC to run and
366
+ // since the instance representing the state does not yet exist, such objects will not get marked.
367
+
352
368
  return state->self_instance = TypedData_Wrap_Struct(klass, &cpu_and_wall_time_worker_typed_data, state);
353
369
  }
354
370
 
@@ -362,6 +378,7 @@ static VALUE _native_initialize(
362
378
  VALUE dynamic_sampling_rate_enabled,
363
379
  VALUE dynamic_sampling_rate_overhead_target_percentage,
364
380
  VALUE allocation_profiling_enabled,
381
+ VALUE allocation_counting_enabled,
365
382
  VALUE skip_idle_samples_for_testing
366
383
  ) {
367
384
  ENFORCE_BOOLEAN(gc_profiling_enabled);
@@ -369,6 +386,7 @@ static VALUE _native_initialize(
369
386
  ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
370
387
  ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
371
388
  ENFORCE_BOOLEAN(allocation_profiling_enabled);
389
+ ENFORCE_BOOLEAN(allocation_counting_enabled);
372
390
  ENFORCE_BOOLEAN(skip_idle_samples_for_testing)
373
391
 
374
392
  struct cpu_and_wall_time_worker_state *state;
@@ -378,6 +396,7 @@ static VALUE _native_initialize(
378
396
  state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
379
397
  state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
380
398
  state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
399
+ state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
381
400
  state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);
382
401
 
383
402
  double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
@@ -394,7 +413,6 @@ static VALUE _native_initialize(
394
413
  state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
395
414
  state->idle_sampling_helper_instance = idle_sampling_helper_instance;
396
415
  state->gc_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_GC_ENTER | RUBY_INTERNAL_EVENT_GC_EXIT, on_gc_event, NULL /* unused */);
397
- state->object_allocation_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_NEWOBJ, on_newobj_event, NULL /* unused */);
398
416
 
399
417
  return Qtrue;
400
418
  }
@@ -409,7 +427,6 @@ static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr) {
409
427
  rb_gc_mark(state->failure_exception);
410
428
  rb_gc_mark(state->stop_thread);
411
429
  rb_gc_mark(state->gc_tracepoint);
412
- rb_gc_mark(state->object_allocation_tracepoint);
413
430
  }
414
431
 
415
432
  // Called in a background thread created in CpuAndWallTimeWorker#start
@@ -755,7 +772,14 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
755
772
  // because they may raise exceptions.
756
773
  install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal");
757
774
  if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
758
- if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
775
+ if (state->allocation_profiling_enabled) {
776
+ rb_add_event_hook2(
777
+ on_newobj_event_as_hook,
778
+ RUBY_INTERNAL_EVENT_NEWOBJ,
779
+ state->self_instance,
780
+ RUBY_EVENT_HOOK_FLAG_SAFE | RUBY_EVENT_HOOK_FLAG_RAW_ARG)
781
+ ;
782
+ }
759
783
 
760
784
  // Flag the profiler as running before we release the GVL, in case anyone's waiting to know about it
761
785
  rb_funcall(instance, rb_intern("signal_running"), 0);
@@ -1036,46 +1060,87 @@ static void sleep_for(uint64_t time_ns) {
1036
1060
  }
1037
1061
 
1038
1062
  static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
1039
- bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled;
1063
+ struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state;
1064
+
1065
+ bool are_allocations_being_tracked = state != NULL && state->allocation_profiling_enabled && state->allocation_counting_enabled;
1040
1066
 
1041
1067
  return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
1042
1068
  }
1043
1069
 
1044
- // Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint`
1045
- // when the RUBY_INTERNAL_EVENT_NEWOBJ event is triggered.
1046
- static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
1047
- // Update thread-local allocation count
1048
- if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
1049
- allocation_count = 0;
1050
- } else {
1051
- allocation_count++;
1052
- }
1070
+ #define HANDLE_CLOCK_FAILURE(call) ({ \
1071
+ long _result = (call); \
1072
+ if (_result == 0) { \
1073
+ delayed_error(state, ERR_CLOCK_FAIL); \
1074
+ return; \
1075
+ } \
1076
+ _result; \
1077
+ })
1053
1078
 
1079
+ // Implements memory-related profiling events. This function is called by Ruby via the `rb_add_event_hook2`
1080
+ // when the RUBY_INTERNAL_EVENT_NEWOBJ event is triggered.
1081
+ //
1082
+ // When allocation sampling is enabled, this function gets called for almost all* objects allocated by the Ruby VM.
1083
+ // (*In some weird cases the VM may skip this tracepoint.)
1084
+ //
1085
+ // At a high level, there's two paths through this function:
1086
+ // 1. should_sample == false -> return
1087
+ // 2. should_sample == true -> sample
1088
+ //
1089
+ // On big applications, path 1. is the hottest, since we don't sample every object. So it's quite important for it to
1090
+ // be as fast as possible.
1091
+ //
1092
+ // NOTE: You may be wondering why we don't use any of the arguments to this function. It turns out it's possible to just
1093
+ // call `rb_tracearg_from_tracepoint(anything)` anywhere during this function or its callees to get the data, so that's
1094
+ // why it's not being passed as an argument.
1095
+ static void on_newobj_event(DDTRACE_UNUSED VALUE unused1, DDTRACE_UNUSED void *unused2) {
1054
1096
  struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
1055
1097
 
1056
1098
  // This should not happen in a normal situation because the tracepoint is always enabled after the instance is set
1057
1099
  // and disabled before it is cleared, but just in case...
1058
1100
  if (state == NULL) return;
1059
1101
 
1060
- // In a few cases, we may actually be allocating an object as part of profiler sampling. We don't want to recursively
1102
+ if (RB_UNLIKELY(state->allocation_counting_enabled)) {
1103
+ // Update thread-local allocation count
1104
+ if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
1105
+ allocation_count = 0;
1106
+ } else {
1107
+ allocation_count++;
1108
+ }
1109
+ }
1110
+
1111
+ // In rare cases, we may actually be allocating an object as part of profiler sampling. We don't want to recursively
1061
1112
  // sample, so we just return early
1062
1113
  if (state->during_sample) {
1063
1114
  state->stats.allocations_during_sample++;
1064
1115
  return;
1065
1116
  }
1066
1117
 
1067
- if (state->dynamic_sampling_rate_enabled) {
1068
- long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
1069
- if (now == 0) {
1070
- delayed_error(state, ERR_CLOCK_FAIL);
1071
- return;
1072
- }
1073
- if (!discrete_dynamic_sampler_should_sample(&state->allocation_sampler, now)) {
1074
- state->stats.allocation_skipped++;
1075
- return;
1118
+ // Hot path: Dynamic sampling rate is usually enabled and the sampling decision is usually false
1119
+ if (RB_LIKELY(state->dynamic_sampling_rate_enabled && !discrete_dynamic_sampler_should_sample(&state->allocation_sampler))) {
1120
+ state->stats.allocation_skipped++;
1121
+
1122
+ coarse_instant now = monotonic_coarse_wall_time_now_ns();
1123
+ HANDLE_CLOCK_FAILURE(now.timestamp_ns);
1124
+
1125
+ bool needs_readjust = discrete_dynamic_sampler_skipped_sample(&state->allocation_sampler, now);
1126
+ if (RB_UNLIKELY(needs_readjust)) {
1127
+ // We rarely readjust, so this is a cold path
1128
+ // Also, while above we used the cheaper monotonic_coarse, for this call we want the regular monotonic call,
1129
+ // which is why we end up getting time "again".
1130
+ discrete_dynamic_sampler_readjust(
1131
+ &state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE))
1132
+ );
1076
1133
  }
1134
+
1135
+ return;
1077
1136
  }
1078
1137
 
1138
+ // From here on, we've decided to go ahead with the sample, which is way less common than skipping it
1139
+
1140
+ discrete_dynamic_sampler_before_sample(
1141
+ &state->allocation_sampler, HANDLE_CLOCK_FAILURE(monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE))
1142
+ );
1143
+
1079
1144
  // @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous
1080
1145
  // invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided
1081
1146
  // to keep this here for consistency -- every call to the thread context (other than the special gc calls which are
@@ -1083,7 +1148,7 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused)
1083
1148
  state->during_sample = true;
1084
1149
 
1085
1150
  // Rescue against any exceptions that happen during sampling
1086
- safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
1151
+ safely_call(rescued_sample_allocation, Qnil, state->self_instance);
1087
1152
 
1088
1153
  if (state->dynamic_sampling_rate_enabled) {
1089
1154
  long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
@@ -1108,9 +1173,7 @@ static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state) {
1108
1173
  if (state->gc_tracepoint != Qnil) {
1109
1174
  rb_tracepoint_disable(state->gc_tracepoint);
1110
1175
  }
1111
- if (state->object_allocation_tracepoint != Qnil) {
1112
- rb_tracepoint_disable(state->object_allocation_tracepoint);
1113
- }
1176
+ rb_remove_event_hook_with_data(on_newobj_event_as_hook, state->self_instance);
1114
1177
  }
1115
1178
 
1116
1179
  static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
@@ -1126,13 +1189,14 @@ static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
1126
1189
  }
1127
1190
  }
1128
1191
 
1129
- static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
1192
+ static VALUE rescued_sample_allocation(DDTRACE_UNUSED VALUE unused) {
1130
1193
  struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
1131
1194
 
1132
1195
  // This should not happen in a normal situation because on_newobj_event already checked for this, but just in case...
1133
1196
  if (state == NULL) return Qnil;
1134
1197
 
1135
- rb_trace_arg_t *data = rb_tracearg_from_tracepoint(tracepoint_data);
1198
+ // If we're getting called from inside a tracepoint/event hook, Ruby exposes the data using this function.
1199
+ rb_trace_arg_t *data = rb_tracearg_from_tracepoint(Qnil);
1136
1200
  VALUE new_object = rb_tracearg_object(data);
1137
1201
 
1138
1202
  unsigned long allocations_since_last_sample = state->dynamic_sampling_rate_enabled ?
@@ -1140,9 +1204,15 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
1140
1204
  discrete_dynamic_sampler_events_since_last_sample(&state->allocation_sampler) :
1141
1205
  // if we aren't, then we're sampling every event
1142
1206
  1;
1143
- // TODO: Signal in the profile that clamping happened?
1207
+
1208
+ // To control bias from sampling, we clamp the maximum weight attributed to a single allocation sample. This avoids
1209
+ // assigning a very large number to a sample, if for instance the dynamic sampling mechanism chose a really big interval.
1144
1210
  unsigned int weight = allocations_since_last_sample > MAX_ALLOC_WEIGHT ? MAX_ALLOC_WEIGHT : (unsigned int) allocations_since_last_sample;
1145
1211
  thread_context_collector_sample_allocation(state->thread_context_collector_instance, weight, new_object);
1212
+ // ...but we still represent the skipped samples in the profile, thus the data will account for all allocations.
1213
+ if (weight < allocations_since_last_sample) {
1214
+ thread_context_collector_sample_skipped_allocation_samples(state->thread_context_collector_instance, allocations_since_last_sample - weight);
1215
+ }
1146
1216
 
1147
1217
  // Return a dummy VALUE because we're called from rb_rescue2 which requires it
1148
1218
  return Qnil;