ddtrace 1.12.1 → 1.23.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +613 -9
- data/LICENSE-3rdparty.csv +1 -1
- data/bin/ddprofrb +15 -0
- data/bin/ddtracerb +3 -1
- data/ext/{ddtrace_profiling_loader/ddtrace_profiling_loader.c → datadog_profiling_loader/datadog_profiling_loader.c} +2 -2
- data/ext/{ddtrace_profiling_loader → datadog_profiling_loader}/extconf.rb +3 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/NativeExtensionDesign.md +3 -5
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id.h +0 -3
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_from_pthread.c +3 -22
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/clock_id_noop.c +0 -1
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_cpu_and_wall_time_worker.c +338 -108
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +422 -0
- data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.h +101 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.c +22 -14
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_dynamic_sampling_rate.h +4 -0
- data/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c +156 -0
- data/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.h +5 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.c +3 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.c +111 -118
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_stack.h +11 -4
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.c +545 -144
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_thread_context.h +3 -2
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/extconf.rb +68 -17
- data/ext/datadog_profiling_native_extension/heap_recorder.c +1047 -0
- data/ext/datadog_profiling_native_extension/heap_recorder.h +166 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/helpers.h +6 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/http_transport.c +60 -32
- data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +62 -0
- data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +42 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/native_extension_helpers.rb +50 -4
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.c +155 -32
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/private_vm_api_access.h +16 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/profiling.c +19 -3
- data/ext/datadog_profiling_native_extension/ruby_helpers.c +267 -0
- data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/ruby_helpers.h +33 -0
- data/ext/datadog_profiling_native_extension/stack_recorder.c +1040 -0
- data/ext/datadog_profiling_native_extension/stack_recorder.h +27 -0
- data/ext/datadog_profiling_native_extension/time_helpers.c +53 -0
- data/ext/datadog_profiling_native_extension/time_helpers.h +26 -0
- data/lib/datadog/appsec/assets/waf_rules/processors.json +92 -0
- data/lib/datadog/appsec/assets/waf_rules/recommended.json +698 -75
- data/lib/datadog/appsec/assets/waf_rules/scanners.json +114 -0
- data/lib/datadog/appsec/assets/waf_rules/strict.json +98 -8
- data/lib/datadog/appsec/assets.rb +8 -0
- data/lib/datadog/appsec/component.rb +21 -2
- data/lib/datadog/appsec/configuration/settings.rb +167 -189
- data/lib/datadog/appsec/configuration.rb +0 -79
- data/lib/datadog/appsec/contrib/auto_instrument.rb +2 -4
- data/lib/datadog/appsec/contrib/devise/event.rb +57 -0
- data/lib/datadog/appsec/contrib/devise/ext.rb +13 -0
- data/lib/datadog/appsec/contrib/devise/integration.rb +42 -0
- data/lib/datadog/appsec/contrib/devise/patcher/authenticatable_patch.rb +76 -0
- data/lib/datadog/appsec/contrib/devise/patcher/registration_controller_patch.rb +54 -0
- data/lib/datadog/appsec/contrib/devise/patcher.rb +45 -0
- data/lib/datadog/appsec/contrib/devise/resource.rb +35 -0
- data/lib/datadog/appsec/contrib/devise/tracking.rb +57 -0
- data/lib/datadog/appsec/contrib/rack/ext.rb +2 -1
- data/lib/datadog/appsec/contrib/rack/gateway/request.rb +6 -2
- data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +8 -6
- data/lib/datadog/appsec/contrib/rack/reactive/request.rb +3 -8
- data/lib/datadog/appsec/contrib/rack/reactive/request_body.rb +3 -6
- data/lib/datadog/appsec/contrib/rack/reactive/response.rb +3 -6
- data/lib/datadog/appsec/contrib/rack/request_body_middleware.rb +3 -2
- data/lib/datadog/appsec/contrib/rack/request_middleware.rb +77 -27
- data/lib/datadog/appsec/contrib/rails/ext.rb +3 -2
- data/lib/datadog/appsec/contrib/rails/framework.rb +1 -3
- data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +3 -2
- data/lib/datadog/appsec/contrib/rails/patcher.rb +17 -11
- data/lib/datadog/appsec/contrib/rails/reactive/action.rb +3 -6
- data/lib/datadog/appsec/contrib/sinatra/ext.rb +2 -1
- data/lib/datadog/appsec/contrib/sinatra/framework.rb +1 -3
- data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +6 -4
- data/lib/datadog/appsec/contrib/sinatra/patcher.rb +13 -7
- data/lib/datadog/appsec/contrib/sinatra/reactive/routed.rb +3 -6
- data/lib/datadog/appsec/event.rb +106 -50
- data/lib/datadog/appsec/extensions.rb +1 -130
- data/lib/datadog/appsec/monitor/gateway/watcher.rb +3 -3
- data/lib/datadog/appsec/monitor/reactive/set_user.rb +3 -6
- data/lib/datadog/appsec/processor/actions.rb +49 -0
- data/lib/datadog/appsec/processor/rule_loader.rb +60 -0
- data/lib/datadog/appsec/processor/rule_merger.rb +22 -2
- data/lib/datadog/appsec/processor.rb +35 -7
- data/lib/datadog/appsec/rate_limiter.rb +1 -1
- data/lib/datadog/appsec/remote.rb +17 -11
- data/lib/datadog/appsec/response.rb +82 -4
- data/lib/datadog/appsec/sample_rate.rb +21 -0
- data/lib/datadog/appsec.rb +3 -4
- data/lib/datadog/auto_instrument.rb +3 -0
- data/lib/datadog/core/backport.rb +51 -0
- data/lib/datadog/core/configuration/agent_settings_resolver.rb +38 -29
- data/lib/datadog/core/configuration/base.rb +6 -16
- data/lib/datadog/core/configuration/components.rb +20 -7
- data/lib/datadog/core/configuration/ext.rb +28 -5
- data/lib/datadog/core/configuration/option.rb +271 -21
- data/lib/datadog/core/configuration/option_definition.rb +73 -32
- data/lib/datadog/core/configuration/options.rb +27 -15
- data/lib/datadog/core/configuration/settings.rb +398 -119
- data/lib/datadog/core/configuration.rb +24 -4
- data/lib/datadog/core/diagnostics/environment_logger.rb +132 -235
- data/lib/datadog/core/environment/class_count.rb +6 -6
- data/lib/datadog/core/environment/execution.rb +103 -0
- data/lib/datadog/core/environment/ext.rb +13 -11
- data/lib/datadog/core/environment/git.rb +25 -0
- data/lib/datadog/core/environment/identity.rb +18 -48
- data/lib/datadog/core/environment/platform.rb +7 -1
- data/lib/datadog/core/environment/variable_helpers.rb +0 -69
- data/lib/datadog/core/environment/yjit.rb +58 -0
- data/lib/datadog/core/error.rb +1 -0
- data/lib/datadog/core/git/ext.rb +6 -23
- data/lib/datadog/core/logging/ext.rb +3 -1
- data/lib/datadog/core/metrics/ext.rb +7 -5
- data/lib/datadog/core/remote/client/capabilities.rb +7 -2
- data/lib/datadog/core/remote/client.rb +3 -0
- data/lib/datadog/core/remote/component.rb +52 -48
- data/lib/datadog/core/remote/configuration/content.rb +28 -1
- data/lib/datadog/core/remote/configuration/repository.rb +3 -1
- data/lib/datadog/core/remote/ext.rb +2 -1
- data/lib/datadog/core/remote/negotiation.rb +20 -7
- data/lib/datadog/core/remote/tie/tracing.rb +39 -0
- data/lib/datadog/core/remote/tie.rb +27 -0
- data/lib/datadog/core/remote/transport/config.rb +60 -0
- data/lib/datadog/core/remote/transport/http/api/instance.rb +39 -0
- data/lib/datadog/core/remote/transport/http/api/spec.rb +21 -0
- data/lib/datadog/core/remote/transport/http/api.rb +58 -0
- data/lib/datadog/core/remote/transport/http/builder.rb +219 -0
- data/lib/datadog/core/remote/transport/http/client.rb +48 -0
- data/lib/datadog/core/remote/transport/http/config.rb +280 -0
- data/lib/datadog/core/remote/transport/http/negotiation.rb +146 -0
- data/lib/datadog/core/remote/transport/http.rb +179 -0
- data/lib/datadog/core/{transport → remote/transport}/negotiation.rb +25 -23
- data/lib/datadog/core/remote/worker.rb +11 -5
- data/lib/datadog/core/runtime/ext.rb +22 -12
- data/lib/datadog/core/runtime/metrics.rb +43 -0
- data/lib/datadog/core/telemetry/client.rb +28 -10
- data/lib/datadog/core/telemetry/emitter.rb +9 -11
- data/lib/datadog/core/telemetry/event.rb +250 -44
- data/lib/datadog/core/telemetry/ext.rb +8 -1
- data/lib/datadog/core/telemetry/heartbeat.rb +3 -7
- data/lib/datadog/core/telemetry/http/ext.rb +13 -8
- data/lib/datadog/core/telemetry/http/response.rb +4 -0
- data/lib/datadog/core/telemetry/http/transport.rb +10 -3
- data/lib/datadog/core/telemetry/request.rb +59 -0
- data/lib/datadog/core/transport/ext.rb +49 -0
- data/lib/datadog/core/transport/http/adapters/net.rb +168 -0
- data/lib/datadog/core/transport/http/adapters/registry.rb +29 -0
- data/lib/datadog/core/transport/http/adapters/test.rb +89 -0
- data/lib/datadog/core/transport/http/adapters/unix_socket.rb +83 -0
- data/lib/datadog/core/transport/http/api/endpoint.rb +31 -0
- data/lib/datadog/core/transport/http/api/fallbacks.rb +26 -0
- data/lib/datadog/core/transport/http/api/map.rb +18 -0
- data/lib/datadog/core/transport/http/env.rb +62 -0
- data/lib/datadog/core/transport/http/response.rb +60 -0
- data/lib/datadog/core/transport/parcel.rb +22 -0
- data/lib/datadog/core/transport/request.rb +17 -0
- data/lib/datadog/core/transport/response.rb +64 -0
- data/lib/datadog/core/utils/duration.rb +52 -0
- data/lib/datadog/core/utils/hash.rb +47 -0
- data/lib/datadog/core/utils/network.rb +1 -1
- data/lib/datadog/core/utils/safe_dup.rb +27 -20
- data/lib/datadog/core/utils/url.rb +25 -0
- data/lib/datadog/core/utils.rb +1 -1
- data/lib/datadog/core/workers/async.rb +3 -2
- data/lib/datadog/core/workers/polling.rb +2 -2
- data/lib/datadog/kit/appsec/events.rb +139 -89
- data/lib/datadog/kit/enable_core_dumps.rb +5 -6
- data/lib/datadog/kit/identity.rb +80 -65
- data/lib/datadog/opentelemetry/api/context.rb +10 -3
- data/lib/datadog/opentelemetry/sdk/propagator.rb +5 -3
- data/lib/datadog/opentelemetry/sdk/span_processor.rb +48 -5
- data/lib/datadog/opentelemetry/sdk/trace/span.rb +167 -0
- data/lib/datadog/opentelemetry/trace.rb +58 -0
- data/lib/datadog/opentelemetry.rb +4 -0
- data/lib/datadog/opentracer/text_map_propagator.rb +2 -1
- data/lib/datadog/opentracer.rb +9 -0
- data/lib/datadog/profiling/collectors/code_provenance.rb +10 -4
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +43 -20
- data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +3 -1
- data/lib/datadog/profiling/collectors/info.rb +101 -0
- data/lib/datadog/profiling/collectors/thread_context.rb +17 -2
- data/lib/datadog/profiling/component.rb +248 -97
- data/lib/datadog/profiling/exporter.rb +26 -5
- data/lib/datadog/profiling/ext.rb +2 -12
- data/lib/datadog/profiling/flush.rb +10 -5
- data/lib/datadog/profiling/http_transport.rb +23 -6
- data/lib/datadog/profiling/load_native_extension.rb +25 -6
- data/lib/datadog/profiling/native_extension.rb +1 -22
- data/lib/datadog/profiling/profiler.rb +36 -13
- data/lib/datadog/profiling/scheduler.rb +20 -15
- data/lib/datadog/profiling/stack_recorder.rb +19 -4
- data/lib/datadog/profiling/tag_builder.rb +5 -0
- data/lib/datadog/profiling/tasks/exec.rb +3 -3
- data/lib/datadog/profiling/tasks/help.rb +3 -3
- data/lib/datadog/profiling.rb +28 -79
- data/lib/datadog/tracing/component.rb +70 -11
- data/lib/datadog/tracing/configuration/agent_settings_resolver.rb +13 -0
- data/lib/datadog/tracing/configuration/dynamic/option.rb +71 -0
- data/lib/datadog/tracing/configuration/dynamic.rb +64 -0
- data/lib/datadog/tracing/configuration/ext.rb +40 -33
- data/lib/datadog/tracing/configuration/http.rb +74 -0
- data/lib/datadog/tracing/configuration/settings.rb +136 -99
- data/lib/datadog/tracing/contrib/action_cable/configuration/settings.rb +10 -6
- data/lib/datadog/tracing/contrib/action_cable/ext.rb +21 -18
- data/lib/datadog/tracing/contrib/action_mailer/configuration/settings.rb +10 -6
- data/lib/datadog/tracing/contrib/action_mailer/events/deliver.rb +1 -1
- data/lib/datadog/tracing/contrib/action_mailer/ext.rb +21 -18
- data/lib/datadog/tracing/contrib/action_pack/configuration/settings.rb +10 -7
- data/lib/datadog/tracing/contrib/action_pack/ext.rb +11 -8
- data/lib/datadog/tracing/contrib/action_view/configuration/settings.rb +10 -6
- data/lib/datadog/tracing/contrib/action_view/ext.rb +13 -10
- data/lib/datadog/tracing/contrib/active_job/configuration/settings.rb +14 -7
- data/lib/datadog/tracing/contrib/active_job/ext.rb +26 -23
- data/lib/datadog/tracing/contrib/active_job/log_injection.rb +1 -1
- data/lib/datadog/tracing/contrib/active_job/patcher.rb +1 -1
- data/lib/datadog/tracing/contrib/active_model_serializers/configuration/settings.rb +10 -6
- data/lib/datadog/tracing/contrib/active_model_serializers/ext.rb +13 -10
- data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +29 -15
- data/lib/datadog/tracing/contrib/active_record/configuration/settings.rb +10 -7
- data/lib/datadog/tracing/contrib/active_record/events/sql.rb +2 -6
- data/lib/datadog/tracing/contrib/active_record/ext.rb +18 -15
- data/lib/datadog/tracing/contrib/active_record/utils.rb +1 -1
- data/lib/datadog/tracing/contrib/active_support/cache/instrumentation.rb +106 -202
- data/lib/datadog/tracing/contrib/active_support/cache/patcher.rb +3 -0
- data/lib/datadog/tracing/contrib/active_support/configuration/settings.rb +10 -7
- data/lib/datadog/tracing/contrib/active_support/ext.rb +19 -16
- data/lib/datadog/tracing/contrib/analytics.rb +0 -1
- data/lib/datadog/tracing/contrib/aws/configuration/settings.rb +15 -7
- data/lib/datadog/tracing/contrib/aws/ext.rb +38 -24
- data/lib/datadog/tracing/contrib/aws/instrumentation.rb +16 -5
- data/lib/datadog/tracing/contrib/concurrent_ruby/async_patch.rb +20 -0
- data/lib/datadog/tracing/contrib/concurrent_ruby/configuration/settings.rb +3 -2
- data/lib/datadog/tracing/contrib/concurrent_ruby/context_composite_executor_service.rb +14 -14
- data/lib/datadog/tracing/contrib/concurrent_ruby/ext.rb +4 -2
- data/lib/datadog/tracing/contrib/concurrent_ruby/future_patch.rb +3 -10
- data/lib/datadog/tracing/contrib/concurrent_ruby/integration.rb +2 -1
- data/lib/datadog/tracing/contrib/concurrent_ruby/patcher.rb +19 -2
- data/lib/datadog/tracing/contrib/concurrent_ruby/promises_future_patch.rb +22 -0
- data/lib/datadog/tracing/contrib/configurable.rb +1 -1
- data/lib/datadog/tracing/contrib/configuration/settings.rb +1 -1
- data/lib/datadog/tracing/contrib/dalli/configuration/settings.rb +21 -7
- data/lib/datadog/tracing/contrib/dalli/ext.rb +27 -11
- data/lib/datadog/tracing/contrib/dalli/instrumentation.rb +17 -8
- data/lib/datadog/tracing/contrib/delayed_job/configuration/settings.rb +14 -7
- data/lib/datadog/tracing/contrib/delayed_job/ext.rb +17 -14
- data/lib/datadog/tracing/contrib/elasticsearch/configuration/settings.rb +15 -7
- data/lib/datadog/tracing/contrib/elasticsearch/ext.rb +22 -15
- data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +104 -99
- data/lib/datadog/tracing/contrib/ethon/configuration/settings.rb +17 -9
- data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +48 -3
- data/lib/datadog/tracing/contrib/ethon/ext.rb +20 -11
- data/lib/datadog/tracing/contrib/ethon/multi_patch.rb +6 -3
- data/lib/datadog/tracing/contrib/excon/configuration/settings.rb +20 -10
- data/lib/datadog/tracing/contrib/excon/ext.rb +17 -8
- data/lib/datadog/tracing/contrib/excon/middleware.rb +25 -5
- data/lib/datadog/tracing/contrib/ext.rb +26 -1
- data/lib/datadog/tracing/contrib/extensions.rb +38 -2
- data/lib/datadog/tracing/contrib/faraday/configuration/settings.rb +27 -10
- data/lib/datadog/tracing/contrib/faraday/ext.rb +17 -8
- data/lib/datadog/tracing/contrib/faraday/middleware.rb +22 -6
- data/lib/datadog/tracing/contrib/grape/configuration/settings.rb +9 -6
- data/lib/datadog/tracing/contrib/grape/ext.rb +17 -14
- data/lib/datadog/tracing/contrib/graphql/configuration/settings.rb +9 -6
- data/lib/datadog/tracing/contrib/graphql/ext.rb +8 -5
- data/lib/datadog/tracing/contrib/grpc/configuration/settings.rb +40 -9
- data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/client.rb +39 -20
- data/lib/datadog/tracing/contrib/grpc/datadog_interceptor/server.rb +37 -18
- data/lib/datadog/tracing/contrib/grpc/datadog_interceptor.rb +0 -4
- data/lib/datadog/tracing/contrib/grpc/ext.rb +17 -13
- data/lib/datadog/tracing/contrib/grpc/formatting.rb +127 -0
- data/lib/datadog/tracing/contrib/hanami/configuration/settings.rb +3 -2
- data/lib/datadog/tracing/contrib/hanami/ext.rb +10 -8
- data/lib/datadog/tracing/contrib/http/circuit_breaker.rb +5 -8
- data/lib/datadog/tracing/contrib/http/configuration/settings.rb +34 -11
- data/lib/datadog/tracing/contrib/http/distributed/fetcher.rb +2 -2
- data/lib/datadog/tracing/contrib/http/ext.rb +17 -9
- data/lib/datadog/tracing/contrib/http/instrumentation.rb +27 -7
- data/lib/datadog/tracing/contrib/httpclient/configuration/settings.rb +34 -11
- data/lib/datadog/tracing/contrib/httpclient/ext.rb +18 -9
- data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +22 -5
- data/lib/datadog/tracing/contrib/httprb/configuration/settings.rb +34 -11
- data/lib/datadog/tracing/contrib/httprb/ext.rb +17 -9
- data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +22 -5
- data/lib/datadog/tracing/contrib/kafka/configuration/settings.rb +10 -6
- data/lib/datadog/tracing/contrib/kafka/ext.rb +43 -39
- data/lib/datadog/tracing/contrib/lograge/configuration/settings.rb +3 -2
- data/lib/datadog/tracing/contrib/lograge/ext.rb +3 -1
- data/lib/datadog/tracing/contrib/lograge/instrumentation.rb +2 -17
- data/lib/datadog/tracing/contrib/mongodb/configuration/settings.rb +15 -7
- data/lib/datadog/tracing/contrib/mongodb/ext.rb +21 -16
- data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +16 -5
- data/lib/datadog/tracing/contrib/mysql2/configuration/settings.rb +22 -14
- data/lib/datadog/tracing/contrib/mysql2/ext.rb +16 -10
- data/lib/datadog/tracing/contrib/mysql2/instrumentation.rb +22 -7
- data/lib/datadog/tracing/contrib/opensearch/configuration/settings.rb +53 -0
- data/lib/datadog/tracing/contrib/opensearch/ext.rb +38 -0
- data/lib/datadog/tracing/contrib/opensearch/integration.rb +44 -0
- data/lib/datadog/tracing/contrib/opensearch/patcher.rb +135 -0
- data/lib/datadog/tracing/contrib/opensearch/quantize.rb +81 -0
- data/lib/datadog/tracing/contrib/pg/configuration/settings.rb +23 -14
- data/lib/datadog/tracing/contrib/pg/ext.rb +23 -19
- data/lib/datadog/tracing/contrib/pg/instrumentation.rb +49 -9
- data/lib/datadog/tracing/contrib/presto/configuration/settings.rb +15 -7
- data/lib/datadog/tracing/contrib/presto/ext.rb +26 -20
- data/lib/datadog/tracing/contrib/presto/instrumentation.rb +14 -5
- data/lib/datadog/tracing/contrib/propagation/sql_comment/ext.rb +12 -10
- data/lib/datadog/tracing/contrib/propagation/sql_comment.rb +1 -1
- data/lib/datadog/tracing/contrib/qless/configuration/settings.rb +13 -8
- data/lib/datadog/tracing/contrib/qless/ext.rb +15 -12
- data/lib/datadog/tracing/contrib/que/configuration/settings.rb +22 -12
- data/lib/datadog/tracing/contrib/que/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/racecar/configuration/settings.rb +10 -7
- data/lib/datadog/tracing/contrib/racecar/event.rb +5 -5
- data/lib/datadog/tracing/contrib/racecar/ext.rb +21 -18
- data/lib/datadog/tracing/contrib/rack/configuration/settings.rb +17 -12
- data/lib/datadog/tracing/contrib/rack/ext.rb +19 -16
- data/lib/datadog/tracing/contrib/rack/header_collection.rb +3 -0
- data/lib/datadog/tracing/contrib/rack/header_tagging.rb +63 -0
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +16 -50
- data/lib/datadog/tracing/contrib/rails/auto_instrument_railtie.rb +0 -2
- data/lib/datadog/tracing/contrib/rails/configuration/settings.rb +20 -15
- data/lib/datadog/tracing/contrib/rails/ext.rb +8 -5
- data/lib/datadog/tracing/contrib/rails/log_injection.rb +7 -10
- data/lib/datadog/tracing/contrib/rails/patcher.rb +10 -41
- data/lib/datadog/tracing/contrib/rails/railtie.rb +3 -3
- data/lib/datadog/tracing/contrib/rake/configuration/settings.rb +14 -10
- data/lib/datadog/tracing/contrib/rake/ext.rb +15 -12
- data/lib/datadog/tracing/contrib/redis/configuration/settings.rb +18 -9
- data/lib/datadog/tracing/contrib/redis/ext.rb +23 -15
- data/lib/datadog/tracing/contrib/redis/instrumentation.rb +5 -40
- data/lib/datadog/tracing/contrib/redis/patcher.rb +34 -21
- data/lib/datadog/tracing/contrib/redis/tags.rb +16 -7
- data/lib/datadog/tracing/contrib/redis/trace_middleware.rb +46 -33
- data/lib/datadog/tracing/contrib/resque/configuration/settings.rb +14 -7
- data/lib/datadog/tracing/contrib/resque/ext.rb +10 -7
- data/lib/datadog/tracing/contrib/rest_client/configuration/settings.rb +17 -9
- data/lib/datadog/tracing/contrib/rest_client/ext.rb +16 -8
- data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +25 -5
- data/lib/datadog/tracing/contrib/roda/configuration/settings.rb +10 -6
- data/lib/datadog/tracing/contrib/roda/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/semantic_logger/configuration/settings.rb +3 -2
- data/lib/datadog/tracing/contrib/semantic_logger/ext.rb +3 -1
- data/lib/datadog/tracing/contrib/semantic_logger/instrumentation.rb +4 -20
- data/lib/datadog/tracing/contrib/sequel/configuration/settings.rb +10 -6
- data/lib/datadog/tracing/contrib/sequel/ext.rb +11 -8
- data/lib/datadog/tracing/contrib/sequel/utils.rb +7 -7
- data/lib/datadog/tracing/contrib/shoryuken/configuration/settings.rb +15 -8
- data/lib/datadog/tracing/contrib/shoryuken/ext.rb +15 -12
- data/lib/datadog/tracing/contrib/sidekiq/configuration/settings.rb +19 -11
- data/lib/datadog/tracing/contrib/sidekiq/ext.rb +33 -30
- data/lib/datadog/tracing/contrib/sinatra/configuration/settings.rb +12 -9
- data/lib/datadog/tracing/contrib/sinatra/env.rb +0 -17
- data/lib/datadog/tracing/contrib/sinatra/ext.rb +22 -19
- data/lib/datadog/tracing/contrib/sinatra/tracer_middleware.rb +3 -14
- data/lib/datadog/tracing/contrib/sneakers/configuration/settings.rb +15 -8
- data/lib/datadog/tracing/contrib/sneakers/ext.rb +2 -0
- data/lib/datadog/tracing/contrib/sneakers/tracer.rb +1 -1
- data/lib/datadog/tracing/contrib/span_attribute_schema.rb +74 -10
- data/lib/datadog/tracing/contrib/stripe/configuration/settings.rb +10 -6
- data/lib/datadog/tracing/contrib/stripe/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/sucker_punch/configuration/settings.rb +10 -6
- data/lib/datadog/tracing/contrib/sucker_punch/ext.rb +16 -13
- data/lib/datadog/tracing/contrib/trilogy/configuration/settings.rb +58 -0
- data/lib/datadog/tracing/contrib/trilogy/ext.rb +27 -0
- data/lib/datadog/tracing/contrib/trilogy/instrumentation.rb +94 -0
- data/lib/datadog/tracing/contrib/trilogy/integration.rb +43 -0
- data/lib/datadog/{ci/contrib/cucumber → tracing/contrib/trilogy}/patcher.rb +10 -6
- data/lib/datadog/tracing/contrib/utils/database.rb +5 -3
- data/lib/datadog/tracing/contrib/utils/quantization/http.rb +11 -11
- data/lib/datadog/tracing/contrib.rb +2 -0
- data/lib/datadog/tracing/correlation.rb +29 -12
- data/lib/datadog/tracing/diagnostics/environment_logger.rb +165 -0
- data/lib/datadog/tracing/diagnostics/ext.rb +21 -19
- data/lib/datadog/tracing/distributed/b3_multi.rb +2 -2
- data/lib/datadog/tracing/distributed/b3_single.rb +1 -1
- data/lib/datadog/tracing/distributed/datadog.rb +0 -1
- data/lib/datadog/tracing/distributed/propagation.rb +35 -34
- data/lib/datadog/tracing/distributed/trace_context.rb +52 -17
- data/lib/datadog/tracing/metadata/ext.rb +9 -6
- data/lib/datadog/tracing/metadata/tagging.rb +3 -3
- data/lib/datadog/tracing/remote.rb +78 -0
- data/lib/datadog/tracing/sampling/matcher.rb +23 -3
- data/lib/datadog/tracing/sampling/rule.rb +7 -2
- data/lib/datadog/tracing/sampling/rule_sampler.rb +31 -0
- data/lib/datadog/tracing/span_operation.rb +3 -15
- data/lib/datadog/tracing/sync_writer.rb +3 -3
- data/lib/datadog/tracing/trace_digest.rb +31 -0
- data/lib/datadog/tracing/trace_operation.rb +17 -5
- data/lib/datadog/tracing/trace_segment.rb +5 -2
- data/lib/datadog/tracing/tracer.rb +12 -1
- data/lib/datadog/{core → tracing}/transport/http/api/instance.rb +1 -1
- data/lib/datadog/{core → tracing}/transport/http/api/spec.rb +1 -1
- data/lib/datadog/tracing/transport/http/api.rb +43 -0
- data/lib/datadog/{core → tracing}/transport/http/builder.rb +13 -68
- data/lib/datadog/tracing/transport/http/client.rb +57 -0
- data/lib/datadog/tracing/transport/http/statistics.rb +47 -0
- data/lib/datadog/tracing/transport/http/traces.rb +152 -0
- data/lib/datadog/tracing/transport/http.rb +125 -0
- data/lib/datadog/tracing/transport/io/client.rb +89 -0
- data/lib/datadog/tracing/transport/io/response.rb +27 -0
- data/lib/datadog/tracing/transport/io/traces.rb +101 -0
- data/lib/datadog/tracing/transport/io.rb +30 -0
- data/lib/datadog/tracing/transport/serializable_trace.rb +126 -0
- data/lib/datadog/tracing/transport/statistics.rb +77 -0
- data/lib/datadog/tracing/transport/trace_formatter.rb +240 -0
- data/lib/datadog/tracing/transport/traces.rb +224 -0
- data/lib/datadog/tracing/workers/trace_writer.rb +6 -4
- data/lib/datadog/tracing/workers.rb +4 -2
- data/lib/datadog/tracing/writer.rb +5 -2
- data/lib/datadog/tracing.rb +8 -2
- data/lib/ddtrace/transport/ext.rb +22 -14
- data/lib/ddtrace/version.rb +9 -12
- data/lib/ddtrace.rb +1 -1
- metadata +157 -139
- data/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +0 -25
- data/ext/ddtrace_profiling_native_extension/ruby_helpers.c +0 -110
- data/ext/ddtrace_profiling_native_extension/stack_recorder.c +0 -591
- data/ext/ddtrace_profiling_native_extension/stack_recorder.h +0 -14
- data/ext/ddtrace_profiling_native_extension/time_helpers.c +0 -17
- data/ext/ddtrace_profiling_native_extension/time_helpers.h +0 -10
- data/lib/datadog/ci/configuration/components.rb +0 -32
- data/lib/datadog/ci/configuration/settings.rb +0 -53
- data/lib/datadog/ci/contrib/cucumber/configuration/settings.rb +0 -33
- data/lib/datadog/ci/contrib/cucumber/ext.rb +0 -20
- data/lib/datadog/ci/contrib/cucumber/formatter.rb +0 -94
- data/lib/datadog/ci/contrib/cucumber/instrumentation.rb +0 -28
- data/lib/datadog/ci/contrib/cucumber/integration.rb +0 -47
- data/lib/datadog/ci/contrib/rspec/configuration/settings.rb +0 -33
- data/lib/datadog/ci/contrib/rspec/example.rb +0 -68
- data/lib/datadog/ci/contrib/rspec/ext.rb +0 -19
- data/lib/datadog/ci/contrib/rspec/integration.rb +0 -48
- data/lib/datadog/ci/contrib/rspec/patcher.rb +0 -27
- data/lib/datadog/ci/ext/app_types.rb +0 -9
- data/lib/datadog/ci/ext/environment.rb +0 -575
- data/lib/datadog/ci/ext/settings.rb +0 -10
- data/lib/datadog/ci/ext/test.rb +0 -35
- data/lib/datadog/ci/extensions.rb +0 -19
- data/lib/datadog/ci/flush.rb +0 -38
- data/lib/datadog/ci/test.rb +0 -81
- data/lib/datadog/ci.rb +0 -20
- data/lib/datadog/core/configuration/dependency_resolver.rb +0 -28
- data/lib/datadog/core/configuration/option_definition_set.rb +0 -22
- data/lib/datadog/core/configuration/option_set.rb +0 -10
- data/lib/datadog/core/telemetry/collector.rb +0 -231
- data/lib/datadog/core/telemetry/v1/app_event.rb +0 -52
- data/lib/datadog/core/telemetry/v1/application.rb +0 -92
- data/lib/datadog/core/telemetry/v1/configuration.rb +0 -25
- data/lib/datadog/core/telemetry/v1/dependency.rb +0 -43
- data/lib/datadog/core/telemetry/v1/host.rb +0 -59
- data/lib/datadog/core/telemetry/v1/integration.rb +0 -64
- data/lib/datadog/core/telemetry/v1/product.rb +0 -36
- data/lib/datadog/core/telemetry/v1/telemetry_request.rb +0 -106
- data/lib/datadog/core/transport/config.rb +0 -58
- data/lib/datadog/core/transport/http/api.rb +0 -57
- data/lib/datadog/core/transport/http/client.rb +0 -45
- data/lib/datadog/core/transport/http/config.rb +0 -268
- data/lib/datadog/core/transport/http/negotiation.rb +0 -144
- data/lib/datadog/core/transport/http.rb +0 -169
- data/lib/datadog/core/utils/object_set.rb +0 -43
- data/lib/datadog/core/utils/string_table.rb +0 -47
- data/lib/datadog/profiling/backtrace_location.rb +0 -34
- data/lib/datadog/profiling/buffer.rb +0 -43
- data/lib/datadog/profiling/collectors/old_stack.rb +0 -301
- data/lib/datadog/profiling/encoding/profile.rb +0 -41
- data/lib/datadog/profiling/event.rb +0 -15
- data/lib/datadog/profiling/events/stack.rb +0 -82
- data/lib/datadog/profiling/old_recorder.rb +0 -107
- data/lib/datadog/profiling/pprof/builder.rb +0 -125
- data/lib/datadog/profiling/pprof/converter.rb +0 -102
- data/lib/datadog/profiling/pprof/message_set.rb +0 -16
- data/lib/datadog/profiling/pprof/payload.rb +0 -20
- data/lib/datadog/profiling/pprof/pprof.proto +0 -212
- data/lib/datadog/profiling/pprof/pprof_pb.rb +0 -81
- data/lib/datadog/profiling/pprof/stack_sample.rb +0 -139
- data/lib/datadog/profiling/pprof/string_table.rb +0 -12
- data/lib/datadog/profiling/pprof/template.rb +0 -118
- data/lib/datadog/profiling/trace_identifiers/ddtrace.rb +0 -43
- data/lib/datadog/profiling/trace_identifiers/helper.rb +0 -45
- data/lib/datadog/tracing/contrib/sinatra/headers.rb +0 -35
- data/lib/ddtrace/transport/http/adapters/net.rb +0 -168
- data/lib/ddtrace/transport/http/adapters/registry.rb +0 -27
- data/lib/ddtrace/transport/http/adapters/test.rb +0 -85
- data/lib/ddtrace/transport/http/adapters/unix_socket.rb +0 -77
- data/lib/ddtrace/transport/http/api/endpoint.rb +0 -29
- data/lib/ddtrace/transport/http/api/fallbacks.rb +0 -24
- data/lib/ddtrace/transport/http/api/instance.rb +0 -35
- data/lib/ddtrace/transport/http/api/map.rb +0 -16
- data/lib/ddtrace/transport/http/api/spec.rb +0 -17
- data/lib/ddtrace/transport/http/api.rb +0 -39
- data/lib/ddtrace/transport/http/builder.rb +0 -176
- data/lib/ddtrace/transport/http/client.rb +0 -52
- data/lib/ddtrace/transport/http/env.rb +0 -58
- data/lib/ddtrace/transport/http/response.rb +0 -58
- data/lib/ddtrace/transport/http/statistics.rb +0 -43
- data/lib/ddtrace/transport/http/traces.rb +0 -144
- data/lib/ddtrace/transport/http.rb +0 -117
- data/lib/ddtrace/transport/io/client.rb +0 -85
- data/lib/ddtrace/transport/io/response.rb +0 -25
- data/lib/ddtrace/transport/io/traces.rb +0 -99
- data/lib/ddtrace/transport/io.rb +0 -28
- data/lib/ddtrace/transport/parcel.rb +0 -20
- data/lib/ddtrace/transport/request.rb +0 -15
- data/lib/ddtrace/transport/response.rb +0 -60
- data/lib/ddtrace/transport/serializable_trace.rb +0 -122
- data/lib/ddtrace/transport/statistics.rb +0 -75
- data/lib/ddtrace/transport/trace_formatter.rb +0 -198
- data/lib/ddtrace/transport/traces.rb +0 -216
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/collectors_idle_sampling_helper.h +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.c +0 -0
- /data/ext/{ddtrace_profiling_native_extension → datadog_profiling_native_extension}/setup_signal_handler.h +0 -0
|
@@ -12,10 +12,16 @@
|
|
|
12
12
|
#include "collectors_thread_context.h"
|
|
13
13
|
#include "collectors_dynamic_sampling_rate.h"
|
|
14
14
|
#include "collectors_idle_sampling_helper.h"
|
|
15
|
+
#include "collectors_discrete_dynamic_sampler.h"
|
|
15
16
|
#include "private_vm_api_access.h"
|
|
16
17
|
#include "setup_signal_handler.h"
|
|
17
18
|
#include "time_helpers.h"
|
|
18
19
|
|
|
20
|
+
#define ERR_CLOCK_FAIL "failed to get clock time"
|
|
21
|
+
|
|
22
|
+
// Maximum allowed value for an allocation weight. Attempts to use higher values will result in clamping.
|
|
23
|
+
unsigned int MAX_ALLOC_WEIGHT = 65535;
|
|
24
|
+
|
|
19
25
|
// Used to trigger the execution of Collectors::ThreadState, which implements all of the sampling logic
|
|
20
26
|
// itself; this class only implements the "when to do it" part.
|
|
21
27
|
//
|
|
@@ -75,19 +81,28 @@
|
|
|
75
81
|
//
|
|
76
82
|
// ---
|
|
77
83
|
|
|
84
|
+
#ifndef NO_POSTPONED_TRIGGER
|
|
85
|
+
// Used to call the rb_postponed_job_trigger from Ruby 3.3+. These get initialized in
|
|
86
|
+
// `collectors_cpu_and_wall_time_worker_init` below and always get reused after that.
|
|
87
|
+
static rb_postponed_job_handle_t sample_from_postponed_job_handle;
|
|
88
|
+
static rb_postponed_job_handle_t after_gc_from_postponed_job_handle;
|
|
89
|
+
#endif
|
|
90
|
+
|
|
78
91
|
// Contains state for a single CpuAndWallTimeWorker instance
|
|
79
92
|
struct cpu_and_wall_time_worker_state {
|
|
80
93
|
// These are immutable after initialization
|
|
81
94
|
|
|
82
95
|
bool gc_profiling_enabled;
|
|
83
|
-
bool allocation_counting_enabled;
|
|
84
96
|
bool no_signals_workaround_enabled;
|
|
85
97
|
bool dynamic_sampling_rate_enabled;
|
|
98
|
+
bool allocation_profiling_enabled;
|
|
99
|
+
bool skip_idle_samples_for_testing;
|
|
86
100
|
VALUE self_instance;
|
|
87
101
|
VALUE thread_context_collector_instance;
|
|
88
102
|
VALUE idle_sampling_helper_instance;
|
|
89
103
|
VALUE owner_thread;
|
|
90
|
-
dynamic_sampling_rate_state
|
|
104
|
+
dynamic_sampling_rate_state cpu_dynamic_sampling_rate;
|
|
105
|
+
discrete_dynamic_sampler allocation_sampler;
|
|
91
106
|
VALUE gc_tracepoint; // Used to get gc start/finish information
|
|
92
107
|
VALUE object_allocation_tracepoint; // Used to get allocation counts and allocation profiling
|
|
93
108
|
|
|
@@ -107,6 +122,7 @@ struct cpu_and_wall_time_worker_state {
|
|
|
107
122
|
bool during_sample;
|
|
108
123
|
|
|
109
124
|
struct stats {
|
|
125
|
+
// # Generic stats
|
|
110
126
|
// How many times we tried to trigger a sample
|
|
111
127
|
unsigned int trigger_sample_attempts;
|
|
112
128
|
// How many times we tried to simulate signal delivery
|
|
@@ -117,25 +133,38 @@ struct cpu_and_wall_time_worker_state {
|
|
|
117
133
|
unsigned int signal_handler_enqueued_sample;
|
|
118
134
|
// How many times the signal handler was called from the wrong thread
|
|
119
135
|
unsigned int signal_handler_wrong_thread;
|
|
120
|
-
// How many times we actually
|
|
121
|
-
unsigned int
|
|
122
|
-
// How many times we skipped a sample because of the dynamic sampling rate mechanism
|
|
123
|
-
unsigned int skipped_sample_because_of_dynamic_sampling_rate;
|
|
136
|
+
// How many times we actually tried to interrupt a thread for sampling
|
|
137
|
+
unsigned int interrupt_thread_attempts;
|
|
124
138
|
|
|
125
|
-
// Stats for the results of calling rb_postponed_job_register_one
|
|
126
|
-
|
|
139
|
+
// # Stats for the results of calling rb_postponed_job_register_one
|
|
140
|
+
// The same function was already waiting to be executed
|
|
127
141
|
unsigned int postponed_job_skipped_already_existed;
|
|
128
|
-
|
|
142
|
+
// The function was added to the queue successfully
|
|
129
143
|
unsigned int postponed_job_success;
|
|
130
|
-
|
|
144
|
+
// The queue was full
|
|
131
145
|
unsigned int postponed_job_full;
|
|
132
|
-
|
|
146
|
+
// The function returned an unknown result code
|
|
133
147
|
unsigned int postponed_job_unknown_result;
|
|
134
148
|
|
|
135
|
-
//
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
149
|
+
// # CPU/Walltime sampling stats
|
|
150
|
+
// How many times we actually CPU/wall sampled
|
|
151
|
+
unsigned int cpu_sampled;
|
|
152
|
+
// How many times we skipped a CPU/wall sample because of the dynamic sampling rate mechanism
|
|
153
|
+
unsigned int cpu_skipped;
|
|
154
|
+
// Min/max/total wall-time spent on CPU/wall sampling
|
|
155
|
+
uint64_t cpu_sampling_time_ns_min;
|
|
156
|
+
uint64_t cpu_sampling_time_ns_max;
|
|
157
|
+
uint64_t cpu_sampling_time_ns_total;
|
|
158
|
+
|
|
159
|
+
// # Allocation sampling stats
|
|
160
|
+
// How many times we actually allocation sampled
|
|
161
|
+
uint64_t allocation_sampled;
|
|
162
|
+
// How many times we skipped an allocation sample because of the dynamic sampling rate mechanism
|
|
163
|
+
uint64_t allocation_skipped;
|
|
164
|
+
// Min/max/total wall-time spent on allocation sampling
|
|
165
|
+
uint64_t allocation_sampling_time_ns_min;
|
|
166
|
+
uint64_t allocation_sampling_time_ns_max;
|
|
167
|
+
uint64_t allocation_sampling_time_ns_total;
|
|
139
168
|
// How many times we saw allocations being done inside a sample
|
|
140
169
|
unsigned int allocations_during_sample;
|
|
141
170
|
} stats;
|
|
@@ -148,14 +177,17 @@ static VALUE _native_initialize(
|
|
|
148
177
|
VALUE thread_context_collector_instance,
|
|
149
178
|
VALUE gc_profiling_enabled,
|
|
150
179
|
VALUE idle_sampling_helper_instance,
|
|
151
|
-
VALUE allocation_counting_enabled,
|
|
152
180
|
VALUE no_signals_workaround_enabled,
|
|
153
|
-
VALUE dynamic_sampling_rate_enabled
|
|
181
|
+
VALUE dynamic_sampling_rate_enabled,
|
|
182
|
+
VALUE dynamic_sampling_rate_overhead_target_percentage,
|
|
183
|
+
VALUE allocation_profiling_enabled,
|
|
184
|
+
VALUE skip_idle_samples_for_testing
|
|
154
185
|
);
|
|
155
186
|
static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr);
|
|
156
187
|
static VALUE _native_sampling_loop(VALUE self, VALUE instance);
|
|
157
188
|
static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance, VALUE worker_thread);
|
|
158
189
|
static VALUE stop(VALUE self_instance, VALUE optional_exception);
|
|
190
|
+
static void stop_state(struct cpu_and_wall_time_worker_state *state, VALUE optional_exception);
|
|
159
191
|
static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext);
|
|
160
192
|
static void *run_sampling_trigger_loop(void *state_ptr);
|
|
161
193
|
static void interrupt_sampling_trigger_loop(void *state_ptr);
|
|
@@ -178,14 +210,18 @@ static VALUE _native_simulate_sample_from_postponed_job(DDTRACE_UNUSED VALUE sel
|
|
|
178
210
|
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE instance);
|
|
179
211
|
static VALUE _native_is_sigprof_blocked_in_current_thread(DDTRACE_UNUSED VALUE self);
|
|
180
212
|
static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance);
|
|
213
|
+
static VALUE _native_stats_reset_not_thread_safe(DDTRACE_UNUSED VALUE self, VALUE instance);
|
|
181
214
|
void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused);
|
|
182
215
|
static void grab_gvl_and_sample(void);
|
|
183
|
-
static void
|
|
216
|
+
static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state);
|
|
184
217
|
static void sleep_for(uint64_t time_ns);
|
|
185
218
|
static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self);
|
|
186
|
-
static void on_newobj_event(
|
|
219
|
+
static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused);
|
|
187
220
|
static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state);
|
|
188
221
|
static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self);
|
|
222
|
+
static VALUE rescued_sample_allocation(VALUE tracepoint_data);
|
|
223
|
+
static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error);
|
|
224
|
+
static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg);
|
|
189
225
|
|
|
190
226
|
// Note on sampler global state safety:
|
|
191
227
|
//
|
|
@@ -198,6 +234,11 @@ static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self);
|
|
|
198
234
|
static VALUE active_sampler_instance = Qnil;
|
|
199
235
|
static struct cpu_and_wall_time_worker_state *active_sampler_instance_state = NULL;
|
|
200
236
|
|
|
237
|
+
// See handle_sampling_signal for details on what this does
|
|
238
|
+
#ifdef NO_POSTPONED_TRIGGER
|
|
239
|
+
static void *gc_finalize_deferred_workaround;
|
|
240
|
+
#endif
|
|
241
|
+
|
|
201
242
|
// Used to implement CpuAndWallTimeWorker._native_allocation_count . To be able to use cheap thread-local variables
|
|
202
243
|
// (here with `__thread`, see https://gcc.gnu.org/onlinedocs/gcc/Thread-Local.html), this needs to be global.
|
|
203
244
|
//
|
|
@@ -208,6 +249,18 @@ __thread uint64_t allocation_count = 0;
|
|
|
208
249
|
void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
209
250
|
rb_global_variable(&active_sampler_instance);
|
|
210
251
|
|
|
252
|
+
#ifndef NO_POSTPONED_TRIGGER
|
|
253
|
+
int unused_flags = 0;
|
|
254
|
+
sample_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, sample_from_postponed_job, NULL);
|
|
255
|
+
after_gc_from_postponed_job_handle = rb_postponed_job_preregister(unused_flags, after_gc_from_postponed_job, NULL);
|
|
256
|
+
|
|
257
|
+
if (sample_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID || after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID) {
|
|
258
|
+
rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
|
|
259
|
+
}
|
|
260
|
+
#else
|
|
261
|
+
gc_finalize_deferred_workaround = objspace_ptr_for_gc_finalize_deferred_workaround();
|
|
262
|
+
#endif
|
|
263
|
+
|
|
211
264
|
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
|
|
212
265
|
VALUE collectors_cpu_and_wall_time_worker_class = rb_define_class_under(collectors_module, "CpuAndWallTimeWorker", rb_cObject);
|
|
213
266
|
// Hosts methods used for testing the native code using RSpec
|
|
@@ -223,13 +276,16 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
|
223
276
|
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
|
|
224
277
|
rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new);
|
|
225
278
|
|
|
226
|
-
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize,
|
|
279
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9);
|
|
227
280
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1);
|
|
228
281
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2);
|
|
229
282
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
|
|
230
283
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats", _native_stats, 1);
|
|
284
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stats_reset_not_thread_safe", _native_stats_reset_not_thread_safe, 1);
|
|
231
285
|
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_allocation_count", _native_allocation_count, 0);
|
|
286
|
+
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_is_running?", _native_is_running, 1);
|
|
232
287
|
rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0);
|
|
288
|
+
// TODO: Remove `_native_is_running` from `testing_module` once `prof-correctness` has been updated to not need it
|
|
233
289
|
rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1);
|
|
234
290
|
rb_define_singleton_method(testing_module, "_native_install_testing_signal_handler", _native_install_testing_signal_handler, 0);
|
|
235
291
|
rb_define_singleton_method(testing_module, "_native_remove_testing_signal_handler", _native_remove_testing_signal_handler, 0);
|
|
@@ -239,6 +295,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
|
|
|
239
295
|
rb_define_singleton_method(testing_module, "_native_simulate_sample_from_postponed_job", _native_simulate_sample_from_postponed_job, 0);
|
|
240
296
|
rb_define_singleton_method(testing_module, "_native_is_sigprof_blocked_in_current_thread", _native_is_sigprof_blocked_in_current_thread, 0);
|
|
241
297
|
rb_define_singleton_method(testing_module, "_native_with_blocked_sigprof", _native_with_blocked_sigprof, 0);
|
|
298
|
+
rb_define_singleton_method(testing_module, "_native_delayed_error", _native_delayed_error, 2);
|
|
242
299
|
}
|
|
243
300
|
|
|
244
301
|
// This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_worker_state
|
|
@@ -257,14 +314,18 @@ static const rb_data_type_t cpu_and_wall_time_worker_typed_data = {
|
|
|
257
314
|
static VALUE _native_new(VALUE klass) {
|
|
258
315
|
struct cpu_and_wall_time_worker_state *state = ruby_xcalloc(1, sizeof(struct cpu_and_wall_time_worker_state));
|
|
259
316
|
|
|
317
|
+
// Note: Any exceptions raised from this note until the TypedData_Wrap_Struct call will lead to the state memory
|
|
318
|
+
// being leaked.
|
|
319
|
+
|
|
260
320
|
state->gc_profiling_enabled = false;
|
|
261
|
-
state->allocation_counting_enabled = false;
|
|
262
321
|
state->no_signals_workaround_enabled = false;
|
|
263
322
|
state->dynamic_sampling_rate_enabled = true;
|
|
323
|
+
state->allocation_profiling_enabled = false;
|
|
324
|
+
state->skip_idle_samples_for_testing = false;
|
|
264
325
|
state->thread_context_collector_instance = Qnil;
|
|
265
326
|
state->idle_sampling_helper_instance = Qnil;
|
|
266
327
|
state->owner_thread = Qnil;
|
|
267
|
-
dynamic_sampling_rate_init(&state->
|
|
328
|
+
dynamic_sampling_rate_init(&state->cpu_dynamic_sampling_rate);
|
|
268
329
|
state->gc_tracepoint = Qnil;
|
|
269
330
|
state->object_allocation_tracepoint = Qnil;
|
|
270
331
|
|
|
@@ -274,7 +335,15 @@ static VALUE _native_new(VALUE klass) {
|
|
|
274
335
|
|
|
275
336
|
state->during_sample = false;
|
|
276
337
|
|
|
277
|
-
|
|
338
|
+
reset_stats_not_thread_safe(state);
|
|
339
|
+
|
|
340
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
341
|
+
if (now == 0) {
|
|
342
|
+
ruby_xfree(state);
|
|
343
|
+
rb_raise(rb_eRuntimeError, ERR_CLOCK_FAIL);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
discrete_dynamic_sampler_init(&state->allocation_sampler, "allocation", now);
|
|
278
347
|
|
|
279
348
|
return state->self_instance = TypedData_Wrap_Struct(klass, &cpu_and_wall_time_worker_typed_data, state);
|
|
280
349
|
}
|
|
@@ -285,22 +354,39 @@ static VALUE _native_initialize(
|
|
|
285
354
|
VALUE thread_context_collector_instance,
|
|
286
355
|
VALUE gc_profiling_enabled,
|
|
287
356
|
VALUE idle_sampling_helper_instance,
|
|
288
|
-
VALUE allocation_counting_enabled,
|
|
289
357
|
VALUE no_signals_workaround_enabled,
|
|
290
|
-
VALUE dynamic_sampling_rate_enabled
|
|
358
|
+
VALUE dynamic_sampling_rate_enabled,
|
|
359
|
+
VALUE dynamic_sampling_rate_overhead_target_percentage,
|
|
360
|
+
VALUE allocation_profiling_enabled,
|
|
361
|
+
VALUE skip_idle_samples_for_testing
|
|
291
362
|
) {
|
|
292
363
|
ENFORCE_BOOLEAN(gc_profiling_enabled);
|
|
293
|
-
ENFORCE_BOOLEAN(allocation_counting_enabled);
|
|
294
364
|
ENFORCE_BOOLEAN(no_signals_workaround_enabled);
|
|
295
365
|
ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled);
|
|
366
|
+
ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT);
|
|
367
|
+
ENFORCE_BOOLEAN(allocation_profiling_enabled);
|
|
368
|
+
ENFORCE_BOOLEAN(skip_idle_samples_for_testing)
|
|
296
369
|
|
|
297
370
|
struct cpu_and_wall_time_worker_state *state;
|
|
298
371
|
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
299
372
|
|
|
300
373
|
state->gc_profiling_enabled = (gc_profiling_enabled == Qtrue);
|
|
301
|
-
state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue);
|
|
302
374
|
state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue);
|
|
303
375
|
state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue);
|
|
376
|
+
state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue);
|
|
377
|
+
state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue);
|
|
378
|
+
|
|
379
|
+
double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage);
|
|
380
|
+
if (!state->allocation_profiling_enabled) {
|
|
381
|
+
dynamic_sampling_rate_set_overhead_target_percentage(&state->cpu_dynamic_sampling_rate, total_overhead_target_percentage);
|
|
382
|
+
} else {
|
|
383
|
+
// TODO: May be nice to offer customization here? Distribute available "overhead" margin with a bias towards one or the other
|
|
384
|
+
// sampler.
|
|
385
|
+
dynamic_sampling_rate_set_overhead_target_percentage(&state->cpu_dynamic_sampling_rate, total_overhead_target_percentage / 2);
|
|
386
|
+
long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
|
387
|
+
discrete_dynamic_sampler_set_overhead_target_percentage(&state->allocation_sampler, total_overhead_target_percentage / 2, now);
|
|
388
|
+
}
|
|
389
|
+
|
|
304
390
|
state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance);
|
|
305
391
|
state->idle_sampling_helper_instance = idle_sampling_helper_instance;
|
|
306
392
|
state->gc_tracepoint = rb_tracepoint_new(Qnil, RUBY_INTERNAL_EVENT_GC_ENTER | RUBY_INTERNAL_EVENT_GC_EXIT, on_gc_event, NULL /* unused */);
|
|
@@ -327,6 +413,12 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
|
|
|
327
413
|
struct cpu_and_wall_time_worker_state *state;
|
|
328
414
|
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
329
415
|
|
|
416
|
+
// If we already got a delayed exception registered even before starting, raise before starting
|
|
417
|
+
if (state->failure_exception != Qnil) {
|
|
418
|
+
disable_tracepoints(state);
|
|
419
|
+
rb_exc_raise(state->failure_exception);
|
|
420
|
+
}
|
|
421
|
+
|
|
330
422
|
struct cpu_and_wall_time_worker_state *old_state = active_sampler_instance_state;
|
|
331
423
|
if (old_state != NULL) {
|
|
332
424
|
if (is_thread_alive(old_state->owner_thread)) {
|
|
@@ -351,7 +443,9 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
|
|
|
351
443
|
if (state->stop_thread == rb_thread_current()) return Qnil;
|
|
352
444
|
|
|
353
445
|
// Reset the dynamic sampling rate state, if any (reminder: the monotonic clock reference may change after a fork)
|
|
354
|
-
dynamic_sampling_rate_reset(&state->
|
|
446
|
+
dynamic_sampling_rate_reset(&state->cpu_dynamic_sampling_rate);
|
|
447
|
+
long now = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
|
448
|
+
discrete_dynamic_sampler_reset(&state->allocation_sampler, now);
|
|
355
449
|
|
|
356
450
|
// This write to a global is thread-safe BECAUSE we're still holding on to the global VM lock at this point
|
|
357
451
|
active_sampler_instance_state = state;
|
|
@@ -413,15 +507,19 @@ static VALUE _native_stop(DDTRACE_UNUSED VALUE _self, VALUE self_instance, VALUE
|
|
|
413
507
|
return stop(self_instance, /* optional_exception: */ Qnil);
|
|
414
508
|
}
|
|
415
509
|
|
|
416
|
-
static
|
|
417
|
-
struct cpu_and_wall_time_worker_state *state;
|
|
418
|
-
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
419
|
-
|
|
510
|
+
static void stop_state(struct cpu_and_wall_time_worker_state *state, VALUE optional_exception) {
|
|
420
511
|
atomic_store(&state->should_run, false);
|
|
421
512
|
state->failure_exception = optional_exception;
|
|
422
513
|
|
|
423
514
|
// Disable the tracepoints as soon as possible, so the VM doesn't keep on calling them
|
|
424
515
|
disable_tracepoints(state);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
static VALUE stop(VALUE self_instance, VALUE optional_exception) {
|
|
519
|
+
struct cpu_and_wall_time_worker_state *state;
|
|
520
|
+
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
521
|
+
|
|
522
|
+
stop_state(state, optional_exception);
|
|
425
523
|
|
|
426
524
|
return Qtrue;
|
|
427
525
|
}
|
|
@@ -457,20 +555,50 @@ static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED si
|
|
|
457
555
|
|
|
458
556
|
// Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as
|
|
459
557
|
// this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
558
|
+
#ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
|
|
559
|
+
rb_postponed_job_trigger(sample_from_postponed_job_handle);
|
|
560
|
+
state->stats.postponed_job_success++; // Always succeeds
|
|
561
|
+
#else
|
|
562
|
+
|
|
563
|
+
// This is a workaround for https://bugs.ruby-lang.org/issues/19991 (for Ruby < 3.3)
|
|
564
|
+
//
|
|
565
|
+
// TL;DR the `rb_postponed_job_register_one` API is not atomic (which is why it got replaced by `rb_postponed_job_trigger`)
|
|
566
|
+
// and in rare cases can cause VM crashes.
|
|
567
|
+
//
|
|
568
|
+
// Specifically, if we're interrupting `rb_postponed_job_flush` (the function that processes postponed jobs), the way
|
|
569
|
+
// that this function reads the jobs is not atomic, and can cause our call to
|
|
570
|
+
// `rb_postponed_job_register(function, arg)` to clobber an existing job that is getting dequeued.
|
|
571
|
+
// Clobbering an existing job is somewhat annoying, but the worst part is that it can happen that we clobber only
|
|
572
|
+
// the existing job's arguments.
|
|
573
|
+
// As surveyed in https://github.com/ruby/ruby/pull/8949#issuecomment-1821441370 clobbering the arguments turns out
|
|
574
|
+
// to not matter in many cases as usually `rb_postponed_job_register` calls in the VM and ecosystem ignore the argument.
|
|
575
|
+
//
|
|
576
|
+
// https://bugs.ruby-lang.org/issues/19991 is the exception: inside Ruby's `gc.c`, when dealing with object
|
|
577
|
+
// finalizers, Ruby calls `gc_finalize_deferred_register` which internally calls
|
|
578
|
+
// `rb_postponed_job_register_one(gc_finalize_deferred, objspace)`.
|
|
579
|
+
// Clobbering this call means that `gc_finalize_deferred` would get called with `NULL`, causing a segmentation fault.
|
|
580
|
+
//
|
|
581
|
+
// Note that this is quite rare: our signal needs to land at exactly the point where the VM has read the function
|
|
582
|
+
// to execute, but has yet to read the arguments. @ivoanjo: I could only reproduce it by manually changing the VM
|
|
583
|
+
// code to simulate this happening.
|
|
584
|
+
//
|
|
585
|
+
// Thus, our workaround is simple: we pass in objspace as our argument, just in case the clobbering happens.
|
|
586
|
+
// In the happy path, we never use this argument so it makes no difference. In the buggy path, we avoid crashing the VM.
|
|
587
|
+
int result = rb_postponed_job_register(0, sample_from_postponed_job, gc_finalize_deferred_workaround /* instead of NULL */);
|
|
588
|
+
|
|
589
|
+
// Officially, the result of rb_postponed_job_register_one is documented as being opaque, but in practice it does not
|
|
590
|
+
// seem to have changed between Ruby 2.3 and 3.2, and so we track it as a debugging mechanism
|
|
591
|
+
switch (result) {
|
|
592
|
+
case 0:
|
|
593
|
+
state->stats.postponed_job_full++; break;
|
|
594
|
+
case 1:
|
|
595
|
+
state->stats.postponed_job_success++; break;
|
|
596
|
+
case 2:
|
|
597
|
+
state->stats.postponed_job_skipped_already_existed++; break;
|
|
598
|
+
default:
|
|
599
|
+
state->stats.postponed_job_unknown_result++;
|
|
600
|
+
}
|
|
601
|
+
#endif
|
|
474
602
|
}
|
|
475
603
|
|
|
476
604
|
// The actual sampling trigger loop always runs **without** the global vm lock.
|
|
@@ -498,17 +626,23 @@ static void *run_sampling_trigger_loop(void *state_ptr) {
|
|
|
498
626
|
// Note that reading the GVL owner and sending them a signal is a race -- the Ruby VM keeps on executing while
|
|
499
627
|
// we're doing this, so we may still not signal the correct thread from time to time, but our signal handler
|
|
500
628
|
// includes a check to see if it got called in the right thread
|
|
629
|
+
state->stats.interrupt_thread_attempts++;
|
|
501
630
|
pthread_kill(owner.owner, SIGPROF);
|
|
502
631
|
} else {
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
632
|
+
if (state->skip_idle_samples_for_testing) {
|
|
633
|
+
// This was added to make sure our tests don't accidentally pass due to idle samples. Specifically, if we
|
|
634
|
+
// comment out the thread interruption code inside `if (owner.valid)` above, our tests should not pass!
|
|
635
|
+
} else {
|
|
636
|
+
// If no thread owns the Global VM Lock, the application is probably idle at the moment. We still want to sample
|
|
637
|
+
// so we "ask a friend" (the IdleSamplingHelper component) to grab the GVL and simulate getting a SIGPROF.
|
|
638
|
+
//
|
|
639
|
+
// In a previous version of the code, we called `grab_gvl_and_sample` directly BUT this was problematic because
|
|
640
|
+
// Ruby may concurrently get busy and so the CpuAndWallTimeWorker would be blocked in line to acquire the GVL
|
|
641
|
+
// for an uncontrolled amount of time. (This can still happen to the IdleSamplingHelper, but the
|
|
642
|
+
// CpuAndWallTimeWorker will still be free to interrupt the Ruby VM and keep sampling for the entire blocking period).
|
|
643
|
+
state->stats.trigger_simulated_signal_delivery_attempts++;
|
|
644
|
+
idle_sampling_helper_request_action(state->idle_sampling_helper_instance, grab_gvl_and_sample);
|
|
645
|
+
}
|
|
512
646
|
}
|
|
513
647
|
}
|
|
514
648
|
|
|
@@ -519,7 +653,7 @@ static void *run_sampling_trigger_loop(void *state_ptr) {
|
|
|
519
653
|
// Note that we deliberately should NOT combine this sleep_for with the one above because the result of
|
|
520
654
|
// `dynamic_sampling_rate_get_sleep` may have changed while the above sleep was ongoing.
|
|
521
655
|
uint64_t extra_sleep =
|
|
522
|
-
dynamic_sampling_rate_get_sleep(&state->
|
|
656
|
+
dynamic_sampling_rate_get_sleep(&state->cpu_dynamic_sampling_rate, monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE));
|
|
523
657
|
if (state->dynamic_sampling_rate_enabled && extra_sleep > 0) sleep_for(extra_sleep);
|
|
524
658
|
}
|
|
525
659
|
|
|
@@ -559,12 +693,12 @@ static VALUE rescued_sample_from_postponed_job(VALUE self_instance) {
|
|
|
559
693
|
|
|
560
694
|
long wall_time_ns_before_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
|
|
561
695
|
|
|
562
|
-
if (state->dynamic_sampling_rate_enabled && !dynamic_sampling_rate_should_sample(&state->
|
|
563
|
-
state->stats.
|
|
696
|
+
if (state->dynamic_sampling_rate_enabled && !dynamic_sampling_rate_should_sample(&state->cpu_dynamic_sampling_rate, wall_time_ns_before_sample)) {
|
|
697
|
+
state->stats.cpu_skipped++;
|
|
564
698
|
return Qnil;
|
|
565
699
|
}
|
|
566
700
|
|
|
567
|
-
state->stats.
|
|
701
|
+
state->stats.cpu_sampled++;
|
|
568
702
|
|
|
569
703
|
VALUE profiler_overhead_stack_thread = state->owner_thread; // Used to attribute profiler overhead to a different stack
|
|
570
704
|
thread_context_collector_sample(state->thread_context_collector_instance, wall_time_ns_before_sample, profiler_overhead_stack_thread);
|
|
@@ -575,11 +709,11 @@ static VALUE rescued_sample_from_postponed_job(VALUE self_instance) {
|
|
|
575
709
|
// Guard against wall-time going backwards, see https://github.com/DataDog/dd-trace-rb/pull/2336 for discussion.
|
|
576
710
|
uint64_t sampling_time_ns = delta_ns < 0 ? 0 : delta_ns;
|
|
577
711
|
|
|
578
|
-
state->stats.
|
|
579
|
-
state->stats.
|
|
580
|
-
state->stats.
|
|
712
|
+
state->stats.cpu_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.cpu_sampling_time_ns_min);
|
|
713
|
+
state->stats.cpu_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.cpu_sampling_time_ns_max);
|
|
714
|
+
state->stats.cpu_sampling_time_ns_total += sampling_time_ns;
|
|
581
715
|
|
|
582
|
-
dynamic_sampling_rate_after_sample(&state->
|
|
716
|
+
dynamic_sampling_rate_after_sample(&state->cpu_dynamic_sampling_rate, wall_time_ns_after_sample, sampling_time_ns);
|
|
583
717
|
|
|
584
718
|
// Return a dummy VALUE because we're called from rb_rescue2 which requires it
|
|
585
719
|
return Qnil;
|
|
@@ -617,7 +751,10 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
|
|
|
617
751
|
// because they may raise exceptions.
|
|
618
752
|
install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal");
|
|
619
753
|
if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint);
|
|
620
|
-
if (state->
|
|
754
|
+
if (state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint);
|
|
755
|
+
|
|
756
|
+
// Flag the profiler as running before we release the GVL, in case anyone's waiting to know about it
|
|
757
|
+
rb_funcall(instance, rb_intern("signal_running"), 0);
|
|
621
758
|
|
|
622
759
|
rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state);
|
|
623
760
|
|
|
@@ -699,28 +836,17 @@ static void on_gc_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
|
|
|
699
836
|
if (event == RUBY_INTERNAL_EVENT_GC_ENTER) {
|
|
700
837
|
thread_context_collector_on_gc_start(state->thread_context_collector_instance);
|
|
701
838
|
} else if (event == RUBY_INTERNAL_EVENT_GC_EXIT) {
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
//
|
|
705
|
-
//
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
// making the sampling process allocation-safe (very hard); or separate stack sampling from sample recording,
|
|
714
|
-
// e.g. enabling us to capture the stack in thread_context_collector_on_gc_finish and do the rest later
|
|
715
|
-
// (medium hard).
|
|
716
|
-
|
|
717
|
-
thread_context_collector_on_gc_finish(state->thread_context_collector_instance);
|
|
718
|
-
// We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc after if
|
|
719
|
-
// fully finishes the garbage collection, so that one is allowed to do allocations and throw exceptions as usual.
|
|
720
|
-
//
|
|
721
|
-
// Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as
|
|
722
|
-
// this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3.
|
|
723
|
-
rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL);
|
|
839
|
+
bool should_flush = thread_context_collector_on_gc_finish(state->thread_context_collector_instance);
|
|
840
|
+
|
|
841
|
+
// We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc when the
|
|
842
|
+
// thread collector flags it's time to flush.
|
|
843
|
+
if (should_flush) {
|
|
844
|
+
#ifndef NO_POSTPONED_TRIGGER // Ruby 3.3+
|
|
845
|
+
rb_postponed_job_trigger(after_gc_from_postponed_job_handle);
|
|
846
|
+
#else
|
|
847
|
+
rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL);
|
|
848
|
+
#endif
|
|
849
|
+
}
|
|
724
850
|
}
|
|
725
851
|
}
|
|
726
852
|
|
|
@@ -787,7 +913,7 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE instance)
|
|
|
787
913
|
// Disable all tracepoints, so that there are no more attempts to mutate the profile
|
|
788
914
|
disable_tracepoints(state);
|
|
789
915
|
|
|
790
|
-
|
|
916
|
+
reset_stats_not_thread_safe(state);
|
|
791
917
|
|
|
792
918
|
// Remove all state from the `Collectors::ThreadState` and connected downstream components
|
|
793
919
|
rb_funcall(state->thread_context_collector_instance, rb_intern("reset_after_fork"), 0);
|
|
@@ -803,11 +929,15 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
|
803
929
|
struct cpu_and_wall_time_worker_state *state;
|
|
804
930
|
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
805
931
|
|
|
806
|
-
|
|
807
|
-
VALUE
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
932
|
+
unsigned long total_cpu_samples_attempted = state->stats.cpu_sampled + state->stats.cpu_skipped;
|
|
933
|
+
VALUE effective_cpu_sample_rate =
|
|
934
|
+
total_cpu_samples_attempted == 0 ? Qnil : DBL2NUM(((double) state->stats.cpu_sampled) / total_cpu_samples_attempted);
|
|
935
|
+
unsigned long total_allocation_samples_attempted = state->stats.allocation_sampled + state->stats.allocation_skipped;
|
|
936
|
+
VALUE effective_allocation_sample_rate =
|
|
937
|
+
total_allocation_samples_attempted == 0 ? Qnil : DBL2NUM(((double) state->stats.allocation_sampled) / total_allocation_samples_attempted);
|
|
938
|
+
|
|
939
|
+
VALUE allocation_sampler_snapshot = state->allocation_profiling_enabled && state->dynamic_sampling_rate_enabled ?
|
|
940
|
+
discrete_dynamic_sampler_state_snapshot(&state->allocation_sampler) : Qnil;
|
|
811
941
|
|
|
812
942
|
VALUE stats_as_hash = rb_hash_new();
|
|
813
943
|
VALUE arguments[] = {
|
|
@@ -816,22 +946,43 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
|
816
946
|
ID2SYM(rb_intern("simulated_signal_delivery")), /* => */ UINT2NUM(state->stats.simulated_signal_delivery),
|
|
817
947
|
ID2SYM(rb_intern("signal_handler_enqueued_sample")), /* => */ UINT2NUM(state->stats.signal_handler_enqueued_sample),
|
|
818
948
|
ID2SYM(rb_intern("signal_handler_wrong_thread")), /* => */ UINT2NUM(state->stats.signal_handler_wrong_thread),
|
|
819
|
-
ID2SYM(rb_intern("sampled")), /* => */ UINT2NUM(state->stats.sampled),
|
|
820
|
-
ID2SYM(rb_intern("skipped_sample_because_of_dynamic_sampling_rate")), /* => */ UINT2NUM(state->stats.skipped_sample_because_of_dynamic_sampling_rate),
|
|
821
949
|
ID2SYM(rb_intern("postponed_job_skipped_already_existed")), /* => */ UINT2NUM(state->stats.postponed_job_skipped_already_existed),
|
|
822
950
|
ID2SYM(rb_intern("postponed_job_success")), /* => */ UINT2NUM(state->stats.postponed_job_success),
|
|
823
951
|
ID2SYM(rb_intern("postponed_job_full")), /* => */ UINT2NUM(state->stats.postponed_job_full),
|
|
824
952
|
ID2SYM(rb_intern("postponed_job_unknown_result")), /* => */ UINT2NUM(state->stats.postponed_job_unknown_result),
|
|
825
|
-
ID2SYM(rb_intern("
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
ID2SYM(rb_intern("
|
|
829
|
-
ID2SYM(rb_intern("
|
|
953
|
+
ID2SYM(rb_intern("interrupt_thread_attempts")), /* => */ UINT2NUM(state->stats.interrupt_thread_attempts),
|
|
954
|
+
|
|
955
|
+
// CPU Stats
|
|
956
|
+
ID2SYM(rb_intern("cpu_sampled")), /* => */ UINT2NUM(state->stats.cpu_sampled),
|
|
957
|
+
ID2SYM(rb_intern("cpu_skipped")), /* => */ UINT2NUM(state->stats.cpu_skipped),
|
|
958
|
+
ID2SYM(rb_intern("cpu_effective_sample_rate")), /* => */ effective_cpu_sample_rate,
|
|
959
|
+
ID2SYM(rb_intern("cpu_sampling_time_ns_min")), /* => */ RUBY_NUM_OR_NIL(state->stats.cpu_sampling_time_ns_min, != UINT64_MAX, ULL2NUM),
|
|
960
|
+
ID2SYM(rb_intern("cpu_sampling_time_ns_max")), /* => */ RUBY_NUM_OR_NIL(state->stats.cpu_sampling_time_ns_max, > 0, ULL2NUM),
|
|
961
|
+
ID2SYM(rb_intern("cpu_sampling_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats.cpu_sampling_time_ns_total, > 0, ULL2NUM),
|
|
962
|
+
ID2SYM(rb_intern("cpu_sampling_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats.cpu_sampling_time_ns_total, state->stats.cpu_sampled),
|
|
963
|
+
|
|
964
|
+
// Allocation stats
|
|
965
|
+
ID2SYM(rb_intern("allocation_sampled")), /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_sampled) : Qnil,
|
|
966
|
+
ID2SYM(rb_intern("allocation_skipped")), /* => */ state->allocation_profiling_enabled ? ULONG2NUM(state->stats.allocation_skipped) : Qnil,
|
|
967
|
+
ID2SYM(rb_intern("allocation_effective_sample_rate")), /* => */ effective_allocation_sample_rate,
|
|
968
|
+
ID2SYM(rb_intern("allocation_sampling_time_ns_min")), /* => */ RUBY_NUM_OR_NIL(state->stats.allocation_sampling_time_ns_min, != UINT64_MAX, ULL2NUM),
|
|
969
|
+
ID2SYM(rb_intern("allocation_sampling_time_ns_max")), /* => */ RUBY_NUM_OR_NIL(state->stats.allocation_sampling_time_ns_max, > 0, ULL2NUM),
|
|
970
|
+
ID2SYM(rb_intern("allocation_sampling_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats.allocation_sampling_time_ns_total, > 0, ULL2NUM),
|
|
971
|
+
ID2SYM(rb_intern("allocation_sampling_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats.allocation_sampling_time_ns_total, state->stats.allocation_sampled),
|
|
972
|
+
ID2SYM(rb_intern("allocation_sampler_snapshot")), /* => */ allocation_sampler_snapshot,
|
|
973
|
+
ID2SYM(rb_intern("allocations_during_sample")), /* => */ state->allocation_profiling_enabled ? UINT2NUM(state->stats.allocations_during_sample) : Qnil,
|
|
830
974
|
};
|
|
831
975
|
for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
|
|
832
976
|
return stats_as_hash;
|
|
833
977
|
}
|
|
834
978
|
|
|
979
|
+
static VALUE _native_stats_reset_not_thread_safe(DDTRACE_UNUSED VALUE self, VALUE instance) {
|
|
980
|
+
struct cpu_and_wall_time_worker_state *state;
|
|
981
|
+
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
982
|
+
reset_stats_not_thread_safe(state);
|
|
983
|
+
return Qnil;
|
|
984
|
+
}
|
|
985
|
+
|
|
835
986
|
void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
|
|
836
987
|
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
|
|
837
988
|
|
|
@@ -849,9 +1000,17 @@ void *simulate_sampling_signal_delivery(DDTRACE_UNUSED void *_unused) {
|
|
|
849
1000
|
|
|
850
1001
|
static void grab_gvl_and_sample(void) { rb_thread_call_with_gvl(simulate_sampling_signal_delivery, NULL); }
|
|
851
1002
|
|
|
852
|
-
static void
|
|
853
|
-
|
|
854
|
-
|
|
1003
|
+
static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *state) {
|
|
1004
|
+
// NOTE: This is not really thread safe so ongoing sampling operations that are concurrent with a reset can have their stats:
|
|
1005
|
+
// * Lost (writes after stats retrieval but before reset).
|
|
1006
|
+
// * Included in the previous stats window (writes before stats retrieval and reset).
|
|
1007
|
+
// * Included in the following stats window (writes after stats retrieval and reset).
|
|
1008
|
+
// Given the expected infrequency of resetting (~once per 60s profile) and the auxiliary/non-critical nature of these stats
|
|
1009
|
+
// this momentary loss of accuracy is deemed acceptable to keep overhead to a minimum.
|
|
1010
|
+
state->stats = (struct stats) {
|
|
1011
|
+
.cpu_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
|
|
1012
|
+
.allocation_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
|
|
1013
|
+
};
|
|
855
1014
|
}
|
|
856
1015
|
|
|
857
1016
|
static void sleep_for(uint64_t time_ns) {
|
|
@@ -873,14 +1032,14 @@ static void sleep_for(uint64_t time_ns) {
|
|
|
873
1032
|
}
|
|
874
1033
|
|
|
875
1034
|
static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) {
|
|
876
|
-
bool
|
|
1035
|
+
bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled;
|
|
877
1036
|
|
|
878
|
-
return
|
|
1037
|
+
return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil;
|
|
879
1038
|
}
|
|
880
1039
|
|
|
881
1040
|
// Implements memory-related profiling events. This function is called by Ruby via the `object_allocation_tracepoint`
|
|
882
1041
|
// when the RUBY_INTERNAL_EVENT_NEWOBJ event is triggered.
|
|
883
|
-
static void on_newobj_event(
|
|
1042
|
+
static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) {
|
|
884
1043
|
// Update thread-local allocation count
|
|
885
1044
|
if (RB_UNLIKELY(allocation_count == UINT64_MAX)) {
|
|
886
1045
|
allocation_count = 0;
|
|
@@ -901,20 +1060,53 @@ static void on_newobj_event(DDTRACE_UNUSED VALUE tracepoint_data, DDTRACE_UNUSED
|
|
|
901
1060
|
return;
|
|
902
1061
|
}
|
|
903
1062
|
|
|
1063
|
+
if (state->dynamic_sampling_rate_enabled) {
|
|
1064
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
1065
|
+
if (now == 0) {
|
|
1066
|
+
delayed_error(state, ERR_CLOCK_FAIL);
|
|
1067
|
+
return;
|
|
1068
|
+
}
|
|
1069
|
+
if (!discrete_dynamic_sampler_should_sample(&state->allocation_sampler, now)) {
|
|
1070
|
+
state->stats.allocation_skipped++;
|
|
1071
|
+
return;
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
|
|
904
1075
|
// @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous
|
|
905
1076
|
// invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided
|
|
906
1077
|
// to keep this here for consistency -- every call to the thread context (other than the special gc calls which are
|
|
907
1078
|
// defined as not being able to allocate) sets this.
|
|
908
1079
|
state->during_sample = true;
|
|
909
1080
|
|
|
910
|
-
//
|
|
1081
|
+
// Rescue against any exceptions that happen during sampling
|
|
1082
|
+
safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance);
|
|
1083
|
+
|
|
1084
|
+
if (state->dynamic_sampling_rate_enabled) {
|
|
1085
|
+
long now = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
|
|
1086
|
+
if (now == 0) {
|
|
1087
|
+
delayed_error(state, ERR_CLOCK_FAIL);
|
|
1088
|
+
// NOTE: Not short-circuiting here to make sure cleanup happens
|
|
1089
|
+
}
|
|
1090
|
+
uint64_t sampling_time_ns = discrete_dynamic_sampler_after_sample(&state->allocation_sampler, now);
|
|
1091
|
+
// NOTE: To keep things lean when dynamic sampling rate is disabled we skip clock interactions which is
|
|
1092
|
+
// why we're fine with having this inside this conditional.
|
|
1093
|
+
state->stats.allocation_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.allocation_sampling_time_ns_min);
|
|
1094
|
+
state->stats.allocation_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.allocation_sampling_time_ns_max);
|
|
1095
|
+
state->stats.allocation_sampling_time_ns_total += sampling_time_ns;
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
state->stats.allocation_sampled++;
|
|
911
1099
|
|
|
912
1100
|
state->during_sample = false;
|
|
913
1101
|
}
|
|
914
1102
|
|
|
915
1103
|
static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state) {
|
|
916
|
-
|
|
917
|
-
|
|
1104
|
+
if (state->gc_tracepoint != Qnil) {
|
|
1105
|
+
rb_tracepoint_disable(state->gc_tracepoint);
|
|
1106
|
+
}
|
|
1107
|
+
if (state->object_allocation_tracepoint != Qnil) {
|
|
1108
|
+
rb_tracepoint_disable(state->object_allocation_tracepoint);
|
|
1109
|
+
}
|
|
918
1110
|
}
|
|
919
1111
|
|
|
920
1112
|
static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
|
|
@@ -929,3 +1121,41 @@ static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self) {
|
|
|
929
1121
|
return result;
|
|
930
1122
|
}
|
|
931
1123
|
}
|
|
1124
|
+
|
|
1125
|
+
static VALUE rescued_sample_allocation(VALUE tracepoint_data) {
|
|
1126
|
+
struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above
|
|
1127
|
+
|
|
1128
|
+
// This should not happen in a normal situation because on_newobj_event already checked for this, but just in case...
|
|
1129
|
+
if (state == NULL) return Qnil;
|
|
1130
|
+
|
|
1131
|
+
rb_trace_arg_t *data = rb_tracearg_from_tracepoint(tracepoint_data);
|
|
1132
|
+
VALUE new_object = rb_tracearg_object(data);
|
|
1133
|
+
|
|
1134
|
+
unsigned long allocations_since_last_sample = state->dynamic_sampling_rate_enabled ?
|
|
1135
|
+
// if we're doing dynamic sampling, ask the sampler how many events since last sample
|
|
1136
|
+
discrete_dynamic_sampler_events_since_last_sample(&state->allocation_sampler) :
|
|
1137
|
+
// if we aren't, then we're sampling every event
|
|
1138
|
+
1;
|
|
1139
|
+
// TODO: Signal in the profile that clamping happened?
|
|
1140
|
+
unsigned int weight = allocations_since_last_sample > MAX_ALLOC_WEIGHT ? MAX_ALLOC_WEIGHT : (unsigned int) allocations_since_last_sample;
|
|
1141
|
+
thread_context_collector_sample_allocation(state->thread_context_collector_instance, weight, new_object);
|
|
1142
|
+
|
|
1143
|
+
// Return a dummy VALUE because we're called from rb_rescue2 which requires it
|
|
1144
|
+
return Qnil;
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error) {
|
|
1148
|
+
// If we can't raise an immediate exception at the calling site, use the asynchronous flow through the main worker loop.
|
|
1149
|
+
stop_state(state, rb_exc_new_cstr(rb_eRuntimeError, error));
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg) {
|
|
1153
|
+
ENFORCE_TYPE(error_msg, T_STRING);
|
|
1154
|
+
|
|
1155
|
+
struct cpu_and_wall_time_worker_state *state;
|
|
1156
|
+
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);
|
|
1157
|
+
|
|
1158
|
+
delayed_error(state, rb_string_value_cstr(&error_msg));
|
|
1159
|
+
|
|
1160
|
+
return Qnil;
|
|
1161
|
+
}
|