datadog 2.3.0 → 2.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +37 -1
  3. data/ext/datadog_profiling_loader/datadog_profiling_loader.c +9 -1
  4. data/ext/datadog_profiling_loader/extconf.rb +10 -22
  5. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +148 -30
  6. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +4 -2
  7. data/ext/datadog_profiling_native_extension/collectors_stack.c +89 -46
  8. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +580 -29
  9. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +9 -1
  10. data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +0 -27
  11. data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +0 -4
  12. data/ext/datadog_profiling_native_extension/extconf.rb +38 -21
  13. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.c +50 -0
  14. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.h +75 -0
  15. data/ext/datadog_profiling_native_extension/heap_recorder.c +20 -6
  16. data/ext/datadog_profiling_native_extension/http_transport.c +38 -6
  17. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +52 -1
  18. data/ext/datadog_profiling_native_extension/private_vm_api_access.h +3 -0
  19. data/ext/datadog_profiling_native_extension/profiling.c +1 -1
  20. data/ext/datadog_profiling_native_extension/stack_recorder.h +1 -0
  21. data/ext/libdatadog_api/crashtracker.c +20 -18
  22. data/ext/libdatadog_api/datadog_ruby_common.c +0 -27
  23. data/ext/libdatadog_api/datadog_ruby_common.h +0 -4
  24. data/ext/libdatadog_extconf_helpers.rb +1 -1
  25. data/lib/datadog/appsec/assets/waf_rules/recommended.json +2184 -108
  26. data/lib/datadog/appsec/assets/waf_rules/strict.json +1430 -2
  27. data/lib/datadog/appsec/component.rb +29 -8
  28. data/lib/datadog/appsec/configuration/settings.rb +2 -2
  29. data/lib/datadog/appsec/contrib/devise/patcher/authenticatable_patch.rb +1 -0
  30. data/lib/datadog/appsec/contrib/devise/patcher/rememberable_patch.rb +21 -0
  31. data/lib/datadog/appsec/contrib/devise/patcher.rb +12 -2
  32. data/lib/datadog/appsec/contrib/graphql/appsec_trace.rb +0 -14
  33. data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +67 -31
  34. data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +18 -15
  35. data/lib/datadog/appsec/contrib/graphql/integration.rb +14 -1
  36. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +2 -5
  37. data/lib/datadog/appsec/event.rb +1 -1
  38. data/lib/datadog/appsec/processor/rule_loader.rb +3 -1
  39. data/lib/datadog/appsec/processor/rule_merger.rb +33 -15
  40. data/lib/datadog/appsec/processor.rb +36 -37
  41. data/lib/datadog/appsec/rate_limiter.rb +25 -40
  42. data/lib/datadog/appsec/remote.rb +7 -3
  43. data/lib/datadog/appsec.rb +2 -2
  44. data/lib/datadog/core/configuration/components.rb +4 -3
  45. data/lib/datadog/core/configuration/settings.rb +84 -5
  46. data/lib/datadog/core/crashtracking/component.rb +1 -1
  47. data/lib/datadog/core/environment/execution.rb +5 -5
  48. data/lib/datadog/core/metrics/client.rb +7 -0
  49. data/lib/datadog/core/rate_limiter.rb +183 -0
  50. data/lib/datadog/core/remote/client/capabilities.rb +4 -3
  51. data/lib/datadog/core/remote/component.rb +4 -2
  52. data/lib/datadog/core/remote/negotiation.rb +4 -4
  53. data/lib/datadog/core/remote/tie.rb +2 -0
  54. data/lib/datadog/core/runtime/metrics.rb +1 -1
  55. data/lib/datadog/core/telemetry/component.rb +2 -0
  56. data/lib/datadog/core/telemetry/event.rb +12 -7
  57. data/lib/datadog/core/telemetry/logger.rb +51 -0
  58. data/lib/datadog/core/telemetry/logging.rb +50 -14
  59. data/lib/datadog/core/telemetry/request.rb +13 -1
  60. data/lib/datadog/core/utils/time.rb +12 -0
  61. data/lib/datadog/di/code_tracker.rb +168 -0
  62. data/lib/datadog/di/configuration/settings.rb +163 -0
  63. data/lib/datadog/di/configuration.rb +11 -0
  64. data/lib/datadog/di/error.rb +31 -0
  65. data/lib/datadog/di/extensions.rb +16 -0
  66. data/lib/datadog/di/probe.rb +133 -0
  67. data/lib/datadog/di/probe_builder.rb +41 -0
  68. data/lib/datadog/di/redactor.rb +188 -0
  69. data/lib/datadog/di/serializer.rb +193 -0
  70. data/lib/datadog/di.rb +14 -0
  71. data/lib/datadog/opentelemetry/sdk/propagator.rb +2 -0
  72. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +12 -10
  73. data/lib/datadog/profiling/collectors/info.rb +12 -3
  74. data/lib/datadog/profiling/collectors/thread_context.rb +26 -0
  75. data/lib/datadog/profiling/component.rb +20 -4
  76. data/lib/datadog/profiling/http_transport.rb +6 -1
  77. data/lib/datadog/profiling/scheduler.rb +2 -0
  78. data/lib/datadog/profiling/stack_recorder.rb +3 -0
  79. data/lib/datadog/single_step_instrument.rb +12 -0
  80. data/lib/datadog/tracing/contrib/action_cable/instrumentation.rb +8 -12
  81. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +5 -0
  82. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +78 -0
  83. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/patcher.rb +33 -0
  84. data/lib/datadog/tracing/contrib/action_pack/patcher.rb +2 -0
  85. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +4 -0
  86. data/lib/datadog/tracing/contrib/active_record/events/instantiation.rb +3 -1
  87. data/lib/datadog/tracing/contrib/active_record/events/sql.rb +3 -1
  88. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +5 -1
  89. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +5 -0
  90. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +6 -1
  91. data/lib/datadog/tracing/contrib/faraday/middleware.rb +9 -0
  92. data/lib/datadog/tracing/contrib/grape/endpoint.rb +19 -0
  93. data/lib/datadog/tracing/contrib/graphql/patcher.rb +9 -12
  94. data/lib/datadog/tracing/contrib/graphql/trace_patcher.rb +3 -3
  95. data/lib/datadog/tracing/contrib/graphql/tracing_patcher.rb +3 -3
  96. data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +13 -9
  97. data/lib/datadog/tracing/contrib/graphql/unified_trace_patcher.rb +6 -3
  98. data/lib/datadog/tracing/contrib/http/instrumentation.rb +18 -15
  99. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +6 -5
  100. data/lib/datadog/tracing/contrib/httpclient/patcher.rb +1 -14
  101. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +5 -0
  102. data/lib/datadog/tracing/contrib/httprb/patcher.rb +1 -14
  103. data/lib/datadog/tracing/contrib/lograge/patcher.rb +1 -2
  104. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +2 -0
  105. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +13 -6
  106. data/lib/datadog/tracing/contrib/patcher.rb +2 -1
  107. data/lib/datadog/tracing/contrib/presto/patcher.rb +1 -13
  108. data/lib/datadog/tracing/contrib/rack/middlewares.rb +27 -0
  109. data/lib/datadog/tracing/contrib/redis/tags.rb +4 -0
  110. data/lib/datadog/tracing/contrib/sinatra/tracer.rb +4 -0
  111. data/lib/datadog/tracing/contrib/stripe/request.rb +3 -2
  112. data/lib/datadog/tracing/distributed/propagation.rb +7 -0
  113. data/lib/datadog/tracing/metadata/ext.rb +2 -0
  114. data/lib/datadog/tracing/remote.rb +5 -2
  115. data/lib/datadog/tracing/sampling/matcher.rb +6 -1
  116. data/lib/datadog/tracing/sampling/rate_sampler.rb +1 -1
  117. data/lib/datadog/tracing/sampling/rule.rb +2 -0
  118. data/lib/datadog/tracing/sampling/rule_sampler.rb +9 -5
  119. data/lib/datadog/tracing/sampling/span/ext.rb +1 -1
  120. data/lib/datadog/tracing/sampling/span/rule.rb +2 -2
  121. data/lib/datadog/tracing/trace_operation.rb +26 -2
  122. data/lib/datadog/tracing/tracer.rb +14 -12
  123. data/lib/datadog/tracing/transport/http/client.rb +1 -0
  124. data/lib/datadog/tracing/transport/io/client.rb +1 -0
  125. data/lib/datadog/tracing/workers/trace_writer.rb +1 -1
  126. data/lib/datadog/tracing/workers.rb +1 -1
  127. data/lib/datadog/version.rb +1 -1
  128. metadata +25 -8
  129. data/lib/datadog/tracing/sampling/rate_limiter.rb +0 -185
@@ -1,60 +1,45 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../core/rate_limiter'
4
+
3
5
  module Datadog
4
6
  module AppSec
5
- # Simple per-thread rate limiter
6
- # Since AppSec marks sampling to keep on a security event, this limits the flood of egress traces involving AppSec
7
+ # Per-thread rate limiter based on token bucket rate limiter.
8
+ #
9
+ # Since AppSec marks sampling to keep on a security event, this limits
10
+ # the flood of egress traces involving AppSec
7
11
  class RateLimiter
8
- def initialize(rate)
9
- @rate = rate
10
- @timestamps = []
11
- end
12
-
13
- def limit
14
- now = Time.now.to_f
15
-
16
- loop do
17
- oldest = @timestamps.first
18
-
19
- break if oldest.nil? || now - oldest < 1
20
-
21
- @timestamps.shift
22
- end
23
-
24
- @timestamps << now
25
-
26
- if (count = @timestamps.count) <= @rate
27
- yield
28
- else
29
- Datadog.logger.debug { "Rate limit hit: #{count}/#{@rate} AppSec traces/second" }
30
- end
31
- end
12
+ THREAD_KEY = :datadog_security_appsec_rate_limiter
32
13
 
33
14
  class << self
34
- def limit(name, &block)
35
- rate_limiter(name).limit(&block)
15
+ def thread_local
16
+ rate_limiter = Thread.current.thread_variable_get(THREAD_KEY)
17
+ return rate_limiter unless rate_limiter.nil?
18
+
19
+ Thread.current.thread_variable_set(THREAD_KEY, new(trace_rate_limit))
36
20
  end
37
21
 
38
22
  # reset a rate limiter: used for testing
39
- def reset!(name)
40
- Thread.current[:datadog_security_trace_rate_limiter] = nil
23
+ def reset!
24
+ Thread.current.thread_variable_set(THREAD_KEY, nil)
41
25
  end
42
26
 
43
- protected
44
-
45
- def rate_limiter(name)
46
- case name
47
- when :traces
48
- Thread.current[:datadog_security_trace_rate_limiter] ||= RateLimiter.new(trace_rate_limit)
49
- else
50
- raise "unsupported rate limiter: #{name.inspect}"
51
- end
52
- end
27
+ private
53
28
 
54
29
  def trace_rate_limit
55
30
  Datadog.configuration.appsec.trace_rate_limit
56
31
  end
57
32
  end
33
+
34
+ def initialize(rate)
35
+ @rate_limiter = Core::TokenBucket.new(rate)
36
+ end
37
+
38
+ def limit
39
+ return yield if @rate_limiter.allow?
40
+
41
+ Datadog.logger.debug { "Rate limit hit: #{@rate_limiter.current_window_rate} AppSec traces/second" }
42
+ end
58
43
  end
59
44
  end
60
45
  end
@@ -53,7 +53,7 @@ module Datadog
53
53
  end
54
54
 
55
55
  # rubocop:disable Metrics/MethodLength
56
- def receivers
56
+ def receivers(telemetry)
57
57
  return [] unless remote_features_enabled?
58
58
 
59
59
  matcher = Core::Remote::Dispatcher::Matcher::Product.new(ASM_PRODUCTS)
@@ -86,7 +86,10 @@ module Datadog
86
86
  end
87
87
 
88
88
  if rules.empty?
89
- settings_rules = AppSec::Processor::RuleLoader.load_rules(ruleset: Datadog.configuration.appsec.ruleset)
89
+ settings_rules = AppSec::Processor::RuleLoader.load_rules(
90
+ telemetry: telemetry,
91
+ ruleset: Datadog.configuration.appsec.ruleset
92
+ )
90
93
 
91
94
  raise NoRulesError, 'no default rules available' unless settings_rules
92
95
 
@@ -99,9 +102,10 @@ module Datadog
99
102
  overrides: overrides,
100
103
  exclusions: exclusions,
101
104
  custom_rules: custom_rules,
105
+ telemetry: telemetry
102
106
  )
103
107
 
104
- Datadog::AppSec.reconfigure(ruleset: ruleset, actions: actions)
108
+ Datadog::AppSec.reconfigure(ruleset: ruleset, actions: actions, telemetry: telemetry)
105
109
  end
106
110
 
107
111
  [receiver]
@@ -23,12 +23,12 @@ module Datadog
23
23
  appsec_component.processor if appsec_component
24
24
  end
25
25
 
26
- def reconfigure(ruleset:, actions:)
26
+ def reconfigure(ruleset:, actions:, telemetry:)
27
27
  appsec_component = components.appsec
28
28
 
29
29
  return unless appsec_component
30
30
 
31
- appsec_component.reconfigure(ruleset: ruleset, actions: actions)
31
+ appsec_component.reconfigure(ruleset: ruleset, actions: actions, telemetry: telemetry)
32
32
  end
33
33
 
34
34
  def reconfigure_lock(&block)
@@ -94,7 +94,9 @@ module Datadog
94
94
  # the Core resolver from within your product/component's namespace.
95
95
  agent_settings = AgentSettingsResolver.call(settings, logger: @logger)
96
96
 
97
- @remote = Remote::Component.build(settings, agent_settings)
97
+ @telemetry = self.class.build_telemetry(settings, agent_settings, @logger)
98
+
99
+ @remote = Remote::Component.build(settings, agent_settings, telemetry: telemetry)
98
100
  @tracer = self.class.build_tracer(settings, agent_settings, logger: @logger)
99
101
  @crashtracker = self.class.build_crashtracker(settings, agent_settings, logger: @logger)
100
102
 
@@ -107,8 +109,7 @@ module Datadog
107
109
 
108
110
  @runtime_metrics = self.class.build_runtime_metrics_worker(settings)
109
111
  @health_metrics = self.class.build_health_metrics(settings)
110
- @telemetry = self.class.build_telemetry(settings, agent_settings, logger)
111
- @appsec = Datadog::AppSec::Component.build_appsec_component(settings)
112
+ @appsec = Datadog::AppSec::Component.build_appsec_component(settings, telemetry: telemetry)
112
113
 
113
114
  self.class.configure_tracing(settings)
114
115
  end
@@ -410,10 +410,8 @@ module Datadog
410
410
  # The profiler gathers data by sending `SIGPROF` unix signals to Ruby application threads.
411
411
  #
412
412
  # We've discovered that this can trigger a bug in a number of Ruby APIs in the `Dir` class, as
413
- # described in https://github.com/DataDog/dd-trace-rb/issues/3450 . This workaround prevents the issue
414
- # from happening by monkey patching the affected APIs.
415
- #
416
- # (In the future, once a fix lands upstream, we'll disable this workaround for Rubies that don't need it)
413
+ # described in https://bugs.ruby-lang.org/issues/20586 .
414
+ # This was fixed for Ruby 3.4+, and this setting is a no-op for those versions.
417
415
  #
418
416
  # @default `DD_PROFILING_DIR_INTERRUPTION_WORKAROUND_ENABLED` environment variable as a boolean,
419
417
  # otherwise `true`
@@ -462,6 +460,60 @@ module Datadog
462
460
  end
463
461
  end
464
462
  end
463
+
464
+ # Enables GVL profiling. This will show when threads are waiting for GVL in the timeline view.
465
+ #
466
+ # This is a preview feature and disabled by default. It requires Ruby 3.2+.
467
+ #
468
+ # @default `DD_PROFILING_PREVIEW_GVL_ENABLED` environment variable as a boolean, otherwise `false`
469
+ option :preview_gvl_enabled do |o|
470
+ o.type :bool
471
+ o.env 'DD_PROFILING_PREVIEW_GVL_ENABLED'
472
+ o.default false
473
+ end
474
+
475
+ # Controls the smallest time period the profiler will report a thread waiting for the GVL.
476
+ #
477
+ # The default value was set to minimize overhead. Periods smaller than the set value will not be reported (e.g.
478
+ # the thread will be reported as whatever it was doing before it waited for the GVL).
479
+ #
480
+ # We do not recommend setting this to less than 1ms. Tweaking this value can increase application latency and
481
+ # memory use.
482
+ #
483
+ # @default 10_000_000 (10ms)
484
+ option :waiting_for_gvl_threshold_ns do |o|
485
+ o.type :int
486
+ o.default 10_000_000
487
+ end
488
+
489
+ # Controls if the profiler should attempt to read context from the otel library
490
+ #
491
+ # @default false
492
+ option :preview_otel_context_enabled do |o|
493
+ o.env 'DD_PROFILING_PREVIEW_OTEL_CONTEXT_ENABLED'
494
+ o.default false
495
+ o.env_parser do |value|
496
+ if value
497
+ value = value.strip.downcase
498
+ if ['only', 'both'].include?(value)
499
+ value
500
+ elsif ['true', '1'].include?(value)
501
+ 'both'
502
+ else
503
+ 'false'
504
+ end
505
+ end
506
+ end
507
+ o.setter do |value|
508
+ if value == true
509
+ :both
510
+ elsif ['only', 'both', :only, :both].include?(value)
511
+ value.to_sym
512
+ else
513
+ false
514
+ end
515
+ end
516
+ end
465
517
  end
466
518
 
467
519
  # @public_api
@@ -630,6 +682,33 @@ module Datadog
630
682
  end
631
683
  end
632
684
 
685
+ # The monotonic clock time provider used by Datadog. This option is internal and is used by `datadog-ci`
686
+ # gem to avoid traces' durations being skewed by timecop.
687
+ #
688
+ # It must respect the interface of [Datadog::Core::Utils::Time#get_time] method.
689
+ #
690
+ # For [Timecop](https://rubygems.org/gems/timecop), for example,
691
+ # `->(unit = :float_second) { ::Process.clock_gettime_without_mock(::Process::CLOCK_MONOTONIC, unit) }`
692
+ # allows Datadog features to use the real monotonic time when time is frozen with
693
+ # `Timecop.mock_process_clock = true`.
694
+ #
695
+ # @default `->(unit = :float_second) { ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, unit)}`
696
+ # @return [Proc<Numeric>]
697
+ option :get_time_provider do |o|
698
+ o.default_proc { |unit = :float_second| ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, unit) }
699
+ o.type :proc
700
+
701
+ o.after_set do |get_time_provider|
702
+ Core::Utils::Time.get_time_provider = get_time_provider
703
+ end
704
+
705
+ o.resetter do |_value|
706
+ ->(unit = :float_second) { ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, unit) }.tap do |default|
707
+ Core::Utils::Time.get_time_provider = default
708
+ end
709
+ end
710
+ end
711
+
633
712
  # The `version` tag in Datadog. Use it to enable [Deployment Tracking](https://docs.datadoghq.com/tracing/deployment_tracking/).
634
713
  # @see https://docs.datadoghq.com/getting_started/tagging/unified_service_tagging
635
714
  # @default `DD_VERSION` environment variable, otherwise `nils`
@@ -836,7 +915,7 @@ module Datadog
836
915
  # Enables reporting of information when Ruby VM crashes.
837
916
  option :enabled do |o|
838
917
  o.type :bool
839
- o.default true
918
+ o.default false
840
919
  o.env 'DD_CRASHTRACKING_ENABLED'
841
920
  end
842
921
  end
@@ -95,7 +95,7 @@ module Datadog
95
95
  def start_or_update_on_fork(action:)
96
96
  self.class._native_start_or_update_on_fork(
97
97
  action: action,
98
- exporter_configuration: [:agent, agent_base_url],
98
+ agent_base_url: agent_base_url,
99
99
  path_to_crashtracking_receiver_binary: path_to_crashtracking_receiver_binary,
100
100
  ld_library_path: ld_library_path,
101
101
  tags_as_array: tags.to_a,
@@ -25,9 +25,9 @@ module Datadog
25
25
  # 2. Checking if `Net::HTTP` is referring to the original one
26
26
  # => ::Net::HTTP.equal?(::WebMock::HttpLibAdapters::NetHttpAdapter::OriginalNetHTTP)
27
27
  def webmock_enabled?
28
- defined?(::WebMock::HttpLibAdapters::NetHttpAdapter) &&
28
+ !!(defined?(::WebMock::HttpLibAdapters::NetHttpAdapter) &&
29
29
  defined?(::Net::HTTP) &&
30
- ::Net::HTTP.equal?(::WebMock::HttpLibAdapters::NetHttpAdapter.instance_variable_get(:@webMockNetHTTP))
30
+ ::Net::HTTP.equal?(::WebMock::HttpLibAdapters::NetHttpAdapter.instance_variable_get(:@webMockNetHTTP)))
31
31
  end
32
32
 
33
33
  private
@@ -68,7 +68,7 @@ module Datadog
68
68
 
69
69
  # Check if we are running from `bin/cucumber` or `cucumber/rake/task`.
70
70
  def cucumber?
71
- defined?(::Cucumber::Cli)
71
+ !!defined?(::Cucumber::Cli)
72
72
  end
73
73
 
74
74
  # If this is a Rails application, use different heuristics to detect
@@ -80,7 +80,7 @@ module Datadog
80
80
  # detecting its presence is enough to deduct if this is a development environment.
81
81
  #
82
82
  # @see https://github.com/rails/spring/blob/48b299348ace2188444489a0c216a6f3e9687281/README.md?plain=1#L204-L207
83
- defined?(::Spring) || rails_env_development?
83
+ !!defined?(::Spring) || rails_env_development?
84
84
  end
85
85
 
86
86
  RAILS_ENV_DEVELOPMENT = Set['development', 'test'].freeze
@@ -94,7 +94,7 @@ module Datadog
94
94
  # it's common to have a custom "staging" environment, and such environment normally want to run as close
95
95
  # to production as possible.
96
96
  def rails_env_development?
97
- defined?(::Rails.env) && RAILS_ENV_DEVELOPMENT.include?(::Rails.env)
97
+ !!defined?(::Rails.env) && RAILS_ENV_DEVELOPMENT.include?(::Rails.env)
98
98
  end
99
99
  end
100
100
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require_relative '../utils/time'
4
4
  require_relative '../utils/only_once'
5
+ require_relative '../telemetry/logger'
5
6
  require_relative '../configuration/ext'
6
7
 
7
8
  require_relative 'ext'
@@ -100,6 +101,7 @@ module Datadog
100
101
  Datadog.logger.error(
101
102
  "Failed to send count stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}"
102
103
  )
104
+ Telemetry::Logger.report(e, description: 'Failed to send count stat')
103
105
  end
104
106
 
105
107
  def distribution(stat, value = nil, options = nil, &block)
@@ -113,6 +115,7 @@ module Datadog
113
115
  Datadog.logger.error(
114
116
  "Failed to send distribution stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}"
115
117
  )
118
+ Telemetry::Logger.report(e, description: 'Failed to send distribution stat')
116
119
  end
117
120
 
118
121
  def increment(stat, options = nil)
@@ -125,6 +128,7 @@ module Datadog
125
128
  Datadog.logger.error(
126
129
  "Failed to send increment stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}"
127
130
  )
131
+ Telemetry::Logger.report(e, description: 'Failed to send increment stat')
128
132
  end
129
133
 
130
134
  def gauge(stat, value = nil, options = nil, &block)
@@ -138,6 +142,7 @@ module Datadog
138
142
  Datadog.logger.error(
139
143
  "Failed to send gauge stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}"
140
144
  )
145
+ Telemetry::Logger.report(e, description: 'Failed to send gauge stat')
141
146
  end
142
147
 
143
148
  def time(stat, options = nil)
@@ -153,9 +158,11 @@ module Datadog
153
158
  distribution(stat, ((finished - start) * 1000), options)
154
159
  end
155
160
  rescue StandardError => e
161
+ # TODO: Likely to be redundant, since `distribution` handles its own errors.
156
162
  Datadog.logger.error(
157
163
  "Failed to send time stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}"
158
164
  )
165
+ Telemetry::Logger.report(e, description: 'Failed to send time stat')
159
166
  end
160
167
  end
161
168
 
@@ -0,0 +1,183 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'utils/time'
4
+
5
+ module Datadog
6
+ module Core
7
+ # Checks for rate limiting on a resource.
8
+ class RateLimiter
9
+ # Checks if resource of specified size can be
10
+ # conforms with the current limit.
11
+ #
12
+ # Implementations of this method are not guaranteed
13
+ # to be side-effect free.
14
+ #
15
+ # @return [Boolean] whether a resource conforms with the current limit
16
+ def allow?(size = 1); end
17
+
18
+ # The effective rate limiting ratio based on
19
+ # recent calls to `allow?`.
20
+ #
21
+ # @return [Float] recent allowance ratio
22
+ def effective_rate; end
23
+ end
24
+
25
+ # Implementation of the Token Bucket metering algorithm
26
+ # for rate limiting.
27
+ #
28
+ # @see https://en.wikipedia.org/wiki/Token_bucket Token bucket
29
+ class TokenBucket < RateLimiter
30
+ attr_reader :rate, :max_tokens
31
+
32
+ # @param rate [Numeric] Allowance rate, in units per second
33
+ # if rate is negative, always allow
34
+ # if rate is zero, never allow
35
+ # @param max_tokens [Numeric] Limit of available tokens
36
+ def initialize(rate, max_tokens = rate)
37
+ super()
38
+
39
+ raise ArgumentError, "rate must be a number: #{rate}" unless rate.is_a?(Numeric)
40
+ raise ArgumentError, "max_tokens must be a number: #{max_tokens}" unless max_tokens.is_a?(Numeric)
41
+
42
+ @rate = rate
43
+ @max_tokens = max_tokens
44
+
45
+ @tokens = max_tokens
46
+ @total_messages = 0
47
+ @conforming_messages = 0
48
+ @prev_conforming_messages = nil
49
+ @prev_total_messages = nil
50
+ @current_window = nil
51
+
52
+ @last_refill = Core::Utils::Time.get_time
53
+ end
54
+
55
+ # Checks if a message of provided +size+
56
+ # conforms with the current bucket limit.
57
+ #
58
+ # If it does, return +true+ and remove +size+
59
+ # tokens from the bucket.
60
+ # If it does not, return +false+ without affecting
61
+ # the tokens from the bucket.
62
+ #
63
+ # @return [Boolean] +true+ if message conforms with current bucket limit
64
+ def allow?(size = 1)
65
+ allowed = should_allow?(size)
66
+ update_rate_counts(allowed)
67
+ allowed
68
+ end
69
+
70
+ # Ratio of 'conformance' per 'total messages' checked
71
+ # averaged for the past 2 buckets
72
+ #
73
+ # Returns +1.0+ when no messages have been checked yet.
74
+ #
75
+ # @return [Float] Conformance ratio, between +[0,1]+
76
+ def effective_rate
77
+ return 0.0 if @rate.zero?
78
+ return 1.0 if @rate < 0 || @total_messages.zero?
79
+
80
+ return current_window_rate if @prev_conforming_messages.nil? || @prev_total_messages.nil?
81
+
82
+ (@conforming_messages.to_f + @prev_conforming_messages.to_f) / (@total_messages + @prev_total_messages)
83
+ end
84
+
85
+ # Ratio of 'conformance' per 'total messages' checked
86
+ # on this bucket
87
+ #
88
+ # Returns +1.0+ when no messages have been checked yet.
89
+ #
90
+ # @return [Float] Conformance ratio, between +[0,1]+
91
+ def current_window_rate
92
+ return 1.0 if @total_messages.zero?
93
+
94
+ @conforming_messages.to_f / @total_messages
95
+ end
96
+
97
+ # @return [Numeric] number of tokens currently available
98
+ def available_tokens
99
+ @tokens
100
+ end
101
+
102
+ private
103
+
104
+ def refill_since_last_message
105
+ now = Core::Utils::Time.get_time
106
+ elapsed = now - @last_refill
107
+
108
+ # Update the number of available tokens, but ensure we do not exceed the max
109
+ # we return the min of tokens + rate*elapsed, or max tokens
110
+ refill_tokens(@rate * elapsed)
111
+
112
+ @last_refill = now
113
+ end
114
+
115
+ def refill_tokens(size)
116
+ @tokens += size
117
+ @tokens = @max_tokens if @tokens > @max_tokens
118
+ end
119
+
120
+ def increment_total_count
121
+ @total_messages += 1
122
+ end
123
+
124
+ def increment_conforming_count
125
+ @conforming_messages += 1
126
+ end
127
+
128
+ def should_allow?(size = 1)
129
+ # rate limit of 0 blocks everything
130
+ return false if @rate.zero?
131
+
132
+ # negative rate limit disables rate limiting
133
+ return true if @rate < 0
134
+
135
+ refill_since_last_message
136
+
137
+ # if tokens < 1 we don't allow?
138
+ return false if @tokens < size
139
+
140
+ @tokens -= size
141
+
142
+ true
143
+ end
144
+
145
+ # Sets and Updates the past two 1 second windows for which
146
+ # the rate limiter must compute it's rate over and updates
147
+ # the total count, and conforming message count if +allowed+
148
+ def update_rate_counts(allowed)
149
+ now = Core::Utils::Time.get_time
150
+
151
+ # No tokens have been seen yet, start a new window
152
+ if @current_window.nil?
153
+ @current_window = now
154
+ # If more than 1 second has past since last window, reset
155
+ elsif now - @current_window >= 1
156
+ @prev_conforming_messages = @conforming_messages
157
+ @prev_total_messages = @total_messages
158
+ @conforming_messages = 0
159
+ @total_messages = 0
160
+ @current_window = now
161
+ end
162
+
163
+ increment_conforming_count if allowed
164
+
165
+ increment_total_count
166
+ end
167
+ end
168
+
169
+ # {Datadog::Core::RateLimiter} that accepts all resources,
170
+ # with no limits.
171
+ class UnlimitedLimiter < RateLimiter
172
+ # @return [Boolean] always +true+
173
+ def allow?(_ = 1)
174
+ true
175
+ end
176
+
177
+ # @return [Float] always 100%
178
+ def effective_rate
179
+ 1.0
180
+ end
181
+ end
182
+ end
183
+ end
@@ -12,10 +12,11 @@ module Datadog
12
12
  class Capabilities
13
13
  attr_reader :products, :capabilities, :receivers, :base64_capabilities
14
14
 
15
- def initialize(settings)
15
+ def initialize(settings, telemetry)
16
16
  @capabilities = []
17
17
  @products = []
18
18
  @receivers = []
19
+ @telemetry = telemetry
19
20
 
20
21
  register(settings)
21
22
 
@@ -28,12 +29,12 @@ module Datadog
28
29
  if settings.respond_to?(:appsec) && settings.appsec.enabled
29
30
  register_capabilities(Datadog::AppSec::Remote.capabilities)
30
31
  register_products(Datadog::AppSec::Remote.products)
31
- register_receivers(Datadog::AppSec::Remote.receivers)
32
+ register_receivers(Datadog::AppSec::Remote.receivers(@telemetry))
32
33
  end
33
34
 
34
35
  register_capabilities(Datadog::Tracing::Remote.capabilities)
35
36
  register_products(Datadog::Tracing::Remote.products)
36
- register_receivers(Datadog::Tracing::Remote.receivers)
37
+ register_receivers(Datadog::Tracing::Remote.receivers(@telemetry))
37
38
  end
38
39
 
39
40
  def register_capabilities(capabilities)
@@ -39,6 +39,7 @@ module Datadog
39
39
  @client.sync
40
40
  @healthy ||= true
41
41
  rescue Client::SyncError => e
42
+ # Transient errors due to network or agent. Logged the error but not via telemetry
42
43
  Datadog.logger.error do
43
44
  "remote worker client sync error: #{e.message} location: #{Array(e.backtrace).first}. skipping sync"
44
45
  end
@@ -48,6 +49,7 @@ module Datadog
48
49
  # negotiation object stores error logging state that should be reset.
49
50
  negotiation = Negotiation.new(settings, agent_settings)
50
51
 
52
+ # Transient errors due to network or agent. Logged the error but not via telemetry
51
53
  Datadog.logger.error do
52
54
  "remote worker error: #{e.class.name} #{e.message} location: #{Array(e.backtrace).first}. "\
53
55
  'reseting client state'
@@ -150,10 +152,10 @@ module Datadog
150
152
  #
151
153
  # Those checks are instead performed inside the worker loop.
152
154
  # This allows users to upgrade their agent while keeping their application running.
153
- def build(settings, agent_settings)
155
+ def build(settings, agent_settings, telemetry:)
154
156
  return unless settings.remote.enabled
155
157
 
156
- new(settings, Client::Capabilities.new(settings), agent_settings)
158
+ new(settings, Client::Capabilities.new(settings, telemetry), agent_settings)
157
159
  end
158
160
  end
159
161
  end
@@ -20,7 +20,7 @@ module Datadog
20
20
 
21
21
  if res.internal_error? && network_error?(res.error)
22
22
  unless @logged[:agent_unreachable]
23
- Datadog.logger.error { "agent unreachable: cannot negotiate #{path}" }
23
+ Datadog.logger.warn { "agent unreachable: cannot negotiate #{path}" }
24
24
  @logged[:agent_unreachable] = true
25
25
  end
26
26
 
@@ -29,7 +29,7 @@ module Datadog
29
29
 
30
30
  if res.not_found?
31
31
  unless @logged[:no_info_endpoint]
32
- Datadog.logger.error { "agent reachable but has no /info endpoint: cannot negotiate #{path}" }
32
+ Datadog.logger.warn { "agent reachable but has no /info endpoint: cannot negotiate #{path}" }
33
33
  @logged[:no_info_endpoint] = true
34
34
  end
35
35
 
@@ -38,7 +38,7 @@ module Datadog
38
38
 
39
39
  unless res.ok?
40
40
  unless @logged[:unexpected_response]
41
- Datadog.logger.error { "agent reachable but unexpected response: cannot negotiate #{path}" }
41
+ Datadog.logger.warn { "agent reachable but unexpected response: cannot negotiate #{path}" }
42
42
  @logged[:unexpected_response] = true
43
43
  end
44
44
 
@@ -47,7 +47,7 @@ module Datadog
47
47
 
48
48
  unless res.endpoints.include?(path)
49
49
  unless @logged[:no_config_endpoint]
50
- Datadog.logger.error { "agent reachable but does not report #{path}" }
50
+ Datadog.logger.warn { "agent reachable but does not report #{path}" }
51
51
  @logged[:no_config_endpoint] = true
52
52
  end
53
53
 
@@ -19,6 +19,8 @@ module Datadog
19
19
  barrier = Datadog::Core::Remote.active_remote.barrier(:once)
20
20
  end
21
21
 
22
+ # steep does not permit the next line due to
23
+ # https://github.com/soutaro/steep/issues/1231
22
24
  Boot.new(barrier, t)
23
25
  end
24
26
  end
@@ -94,7 +94,7 @@ module Datadog
94
94
  def try_flush
95
95
  yield
96
96
  rescue StandardError => e
97
- Datadog.logger.error("Error while sending runtime metric. Cause: #{e.class.name} #{e.message}")
97
+ Datadog.logger.warn("Error while sending runtime metric. Cause: #{e.class.name} #{e.message}")
98
98
  end
99
99
 
100
100
  def default_metric_options