datadog 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +37 -1
  3. data/ext/datadog_profiling_loader/datadog_profiling_loader.c +9 -1
  4. data/ext/datadog_profiling_loader/extconf.rb +10 -22
  5. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +148 -30
  6. data/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +4 -2
  7. data/ext/datadog_profiling_native_extension/collectors_stack.c +89 -46
  8. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +580 -29
  9. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +9 -1
  10. data/ext/datadog_profiling_native_extension/datadog_ruby_common.c +0 -27
  11. data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +0 -4
  12. data/ext/datadog_profiling_native_extension/extconf.rb +38 -21
  13. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.c +50 -0
  14. data/ext/datadog_profiling_native_extension/gvl_profiling_helper.h +75 -0
  15. data/ext/datadog_profiling_native_extension/heap_recorder.c +20 -6
  16. data/ext/datadog_profiling_native_extension/http_transport.c +38 -6
  17. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +52 -1
  18. data/ext/datadog_profiling_native_extension/private_vm_api_access.h +3 -0
  19. data/ext/datadog_profiling_native_extension/profiling.c +1 -1
  20. data/ext/datadog_profiling_native_extension/stack_recorder.h +1 -0
  21. data/ext/libdatadog_api/crashtracker.c +20 -18
  22. data/ext/libdatadog_api/datadog_ruby_common.c +0 -27
  23. data/ext/libdatadog_api/datadog_ruby_common.h +0 -4
  24. data/ext/libdatadog_extconf_helpers.rb +1 -1
  25. data/lib/datadog/appsec/assets/waf_rules/recommended.json +2184 -108
  26. data/lib/datadog/appsec/assets/waf_rules/strict.json +1430 -2
  27. data/lib/datadog/appsec/component.rb +29 -8
  28. data/lib/datadog/appsec/configuration/settings.rb +2 -2
  29. data/lib/datadog/appsec/contrib/devise/patcher/authenticatable_patch.rb +1 -0
  30. data/lib/datadog/appsec/contrib/devise/patcher/rememberable_patch.rb +21 -0
  31. data/lib/datadog/appsec/contrib/devise/patcher.rb +12 -2
  32. data/lib/datadog/appsec/contrib/graphql/appsec_trace.rb +0 -14
  33. data/lib/datadog/appsec/contrib/graphql/gateway/multiplex.rb +67 -31
  34. data/lib/datadog/appsec/contrib/graphql/gateway/watcher.rb +18 -15
  35. data/lib/datadog/appsec/contrib/graphql/integration.rb +14 -1
  36. data/lib/datadog/appsec/contrib/rack/gateway/request.rb +2 -5
  37. data/lib/datadog/appsec/event.rb +1 -1
  38. data/lib/datadog/appsec/processor/rule_loader.rb +3 -1
  39. data/lib/datadog/appsec/processor/rule_merger.rb +33 -15
  40. data/lib/datadog/appsec/processor.rb +36 -37
  41. data/lib/datadog/appsec/rate_limiter.rb +25 -40
  42. data/lib/datadog/appsec/remote.rb +7 -3
  43. data/lib/datadog/appsec.rb +2 -2
  44. data/lib/datadog/core/configuration/components.rb +4 -3
  45. data/lib/datadog/core/configuration/settings.rb +84 -5
  46. data/lib/datadog/core/crashtracking/component.rb +1 -1
  47. data/lib/datadog/core/environment/execution.rb +5 -5
  48. data/lib/datadog/core/metrics/client.rb +7 -0
  49. data/lib/datadog/core/rate_limiter.rb +183 -0
  50. data/lib/datadog/core/remote/client/capabilities.rb +4 -3
  51. data/lib/datadog/core/remote/component.rb +4 -2
  52. data/lib/datadog/core/remote/negotiation.rb +4 -4
  53. data/lib/datadog/core/remote/tie.rb +2 -0
  54. data/lib/datadog/core/runtime/metrics.rb +1 -1
  55. data/lib/datadog/core/telemetry/component.rb +2 -0
  56. data/lib/datadog/core/telemetry/event.rb +12 -7
  57. data/lib/datadog/core/telemetry/logger.rb +51 -0
  58. data/lib/datadog/core/telemetry/logging.rb +50 -14
  59. data/lib/datadog/core/telemetry/request.rb +13 -1
  60. data/lib/datadog/core/utils/time.rb +12 -0
  61. data/lib/datadog/di/code_tracker.rb +168 -0
  62. data/lib/datadog/di/configuration/settings.rb +163 -0
  63. data/lib/datadog/di/configuration.rb +11 -0
  64. data/lib/datadog/di/error.rb +31 -0
  65. data/lib/datadog/di/extensions.rb +16 -0
  66. data/lib/datadog/di/probe.rb +133 -0
  67. data/lib/datadog/di/probe_builder.rb +41 -0
  68. data/lib/datadog/di/redactor.rb +188 -0
  69. data/lib/datadog/di/serializer.rb +193 -0
  70. data/lib/datadog/di.rb +14 -0
  71. data/lib/datadog/opentelemetry/sdk/propagator.rb +2 -0
  72. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +12 -10
  73. data/lib/datadog/profiling/collectors/info.rb +12 -3
  74. data/lib/datadog/profiling/collectors/thread_context.rb +26 -0
  75. data/lib/datadog/profiling/component.rb +20 -4
  76. data/lib/datadog/profiling/http_transport.rb +6 -1
  77. data/lib/datadog/profiling/scheduler.rb +2 -0
  78. data/lib/datadog/profiling/stack_recorder.rb +3 -0
  79. data/lib/datadog/single_step_instrument.rb +12 -0
  80. data/lib/datadog/tracing/contrib/action_cable/instrumentation.rb +8 -12
  81. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +5 -0
  82. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +78 -0
  83. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/patcher.rb +33 -0
  84. data/lib/datadog/tracing/contrib/action_pack/patcher.rb +2 -0
  85. data/lib/datadog/tracing/contrib/active_record/configuration/resolver.rb +4 -0
  86. data/lib/datadog/tracing/contrib/active_record/events/instantiation.rb +3 -1
  87. data/lib/datadog/tracing/contrib/active_record/events/sql.rb +3 -1
  88. data/lib/datadog/tracing/contrib/active_support/cache/events/cache.rb +5 -1
  89. data/lib/datadog/tracing/contrib/aws/instrumentation.rb +5 -0
  90. data/lib/datadog/tracing/contrib/elasticsearch/patcher.rb +6 -1
  91. data/lib/datadog/tracing/contrib/faraday/middleware.rb +9 -0
  92. data/lib/datadog/tracing/contrib/grape/endpoint.rb +19 -0
  93. data/lib/datadog/tracing/contrib/graphql/patcher.rb +9 -12
  94. data/lib/datadog/tracing/contrib/graphql/trace_patcher.rb +3 -3
  95. data/lib/datadog/tracing/contrib/graphql/tracing_patcher.rb +3 -3
  96. data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +13 -9
  97. data/lib/datadog/tracing/contrib/graphql/unified_trace_patcher.rb +6 -3
  98. data/lib/datadog/tracing/contrib/http/instrumentation.rb +18 -15
  99. data/lib/datadog/tracing/contrib/httpclient/instrumentation.rb +6 -5
  100. data/lib/datadog/tracing/contrib/httpclient/patcher.rb +1 -14
  101. data/lib/datadog/tracing/contrib/httprb/instrumentation.rb +5 -0
  102. data/lib/datadog/tracing/contrib/httprb/patcher.rb +1 -14
  103. data/lib/datadog/tracing/contrib/lograge/patcher.rb +1 -2
  104. data/lib/datadog/tracing/contrib/mongodb/subscribers.rb +2 -0
  105. data/lib/datadog/tracing/contrib/opensearch/patcher.rb +13 -6
  106. data/lib/datadog/tracing/contrib/patcher.rb +2 -1
  107. data/lib/datadog/tracing/contrib/presto/patcher.rb +1 -13
  108. data/lib/datadog/tracing/contrib/rack/middlewares.rb +27 -0
  109. data/lib/datadog/tracing/contrib/redis/tags.rb +4 -0
  110. data/lib/datadog/tracing/contrib/sinatra/tracer.rb +4 -0
  111. data/lib/datadog/tracing/contrib/stripe/request.rb +3 -2
  112. data/lib/datadog/tracing/distributed/propagation.rb +7 -0
  113. data/lib/datadog/tracing/metadata/ext.rb +2 -0
  114. data/lib/datadog/tracing/remote.rb +5 -2
  115. data/lib/datadog/tracing/sampling/matcher.rb +6 -1
  116. data/lib/datadog/tracing/sampling/rate_sampler.rb +1 -1
  117. data/lib/datadog/tracing/sampling/rule.rb +2 -0
  118. data/lib/datadog/tracing/sampling/rule_sampler.rb +9 -5
  119. data/lib/datadog/tracing/sampling/span/ext.rb +1 -1
  120. data/lib/datadog/tracing/sampling/span/rule.rb +2 -2
  121. data/lib/datadog/tracing/trace_operation.rb +26 -2
  122. data/lib/datadog/tracing/tracer.rb +14 -12
  123. data/lib/datadog/tracing/transport/http/client.rb +1 -0
  124. data/lib/datadog/tracing/transport/io/client.rb +1 -0
  125. data/lib/datadog/tracing/workers/trace_writer.rb +1 -1
  126. data/lib/datadog/tracing/workers.rb +1 -1
  127. data/lib/datadog/version.rb +1 -1
  128. metadata +25 -8
  129. data/lib/datadog/tracing/sampling/rate_limiter.rb +0 -185
@@ -1,60 +1,45 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../core/rate_limiter'
4
+
3
5
  module Datadog
4
6
  module AppSec
5
- # Simple per-thread rate limiter
6
- # Since AppSec marks sampling to keep on a security event, this limits the flood of egress traces involving AppSec
7
+ # Per-thread rate limiter based on token bucket rate limiter.
8
+ #
9
+ # Since AppSec marks sampling to keep on a security event, this limits
10
+ # the flood of egress traces involving AppSec
7
11
  class RateLimiter
8
- def initialize(rate)
9
- @rate = rate
10
- @timestamps = []
11
- end
12
-
13
- def limit
14
- now = Time.now.to_f
15
-
16
- loop do
17
- oldest = @timestamps.first
18
-
19
- break if oldest.nil? || now - oldest < 1
20
-
21
- @timestamps.shift
22
- end
23
-
24
- @timestamps << now
25
-
26
- if (count = @timestamps.count) <= @rate
27
- yield
28
- else
29
- Datadog.logger.debug { "Rate limit hit: #{count}/#{@rate} AppSec traces/second" }
30
- end
31
- end
12
+ THREAD_KEY = :datadog_security_appsec_rate_limiter
32
13
 
33
14
  class << self
34
- def limit(name, &block)
35
- rate_limiter(name).limit(&block)
15
+ def thread_local
16
+ rate_limiter = Thread.current.thread_variable_get(THREAD_KEY)
17
+ return rate_limiter unless rate_limiter.nil?
18
+
19
+ Thread.current.thread_variable_set(THREAD_KEY, new(trace_rate_limit))
36
20
  end
37
21
 
38
22
  # reset a rate limiter: used for testing
39
- def reset!(name)
40
- Thread.current[:datadog_security_trace_rate_limiter] = nil
23
+ def reset!
24
+ Thread.current.thread_variable_set(THREAD_KEY, nil)
41
25
  end
42
26
 
43
- protected
44
-
45
- def rate_limiter(name)
46
- case name
47
- when :traces
48
- Thread.current[:datadog_security_trace_rate_limiter] ||= RateLimiter.new(trace_rate_limit)
49
- else
50
- raise "unsupported rate limiter: #{name.inspect}"
51
- end
52
- end
27
+ private
53
28
 
54
29
  def trace_rate_limit
55
30
  Datadog.configuration.appsec.trace_rate_limit
56
31
  end
57
32
  end
33
+
34
+ def initialize(rate)
35
+ @rate_limiter = Core::TokenBucket.new(rate)
36
+ end
37
+
38
+ def limit
39
+ return yield if @rate_limiter.allow?
40
+
41
+ Datadog.logger.debug { "Rate limit hit: #{@rate_limiter.current_window_rate} AppSec traces/second" }
42
+ end
58
43
  end
59
44
  end
60
45
  end
@@ -53,7 +53,7 @@ module Datadog
53
53
  end
54
54
 
55
55
  # rubocop:disable Metrics/MethodLength
56
- def receivers
56
+ def receivers(telemetry)
57
57
  return [] unless remote_features_enabled?
58
58
 
59
59
  matcher = Core::Remote::Dispatcher::Matcher::Product.new(ASM_PRODUCTS)
@@ -86,7 +86,10 @@ module Datadog
86
86
  end
87
87
 
88
88
  if rules.empty?
89
- settings_rules = AppSec::Processor::RuleLoader.load_rules(ruleset: Datadog.configuration.appsec.ruleset)
89
+ settings_rules = AppSec::Processor::RuleLoader.load_rules(
90
+ telemetry: telemetry,
91
+ ruleset: Datadog.configuration.appsec.ruleset
92
+ )
90
93
 
91
94
  raise NoRulesError, 'no default rules available' unless settings_rules
92
95
 
@@ -99,9 +102,10 @@ module Datadog
99
102
  overrides: overrides,
100
103
  exclusions: exclusions,
101
104
  custom_rules: custom_rules,
105
+ telemetry: telemetry
102
106
  )
103
107
 
104
- Datadog::AppSec.reconfigure(ruleset: ruleset, actions: actions)
108
+ Datadog::AppSec.reconfigure(ruleset: ruleset, actions: actions, telemetry: telemetry)
105
109
  end
106
110
 
107
111
  [receiver]
@@ -23,12 +23,12 @@ module Datadog
23
23
  appsec_component.processor if appsec_component
24
24
  end
25
25
 
26
- def reconfigure(ruleset:, actions:)
26
+ def reconfigure(ruleset:, actions:, telemetry:)
27
27
  appsec_component = components.appsec
28
28
 
29
29
  return unless appsec_component
30
30
 
31
- appsec_component.reconfigure(ruleset: ruleset, actions: actions)
31
+ appsec_component.reconfigure(ruleset: ruleset, actions: actions, telemetry: telemetry)
32
32
  end
33
33
 
34
34
  def reconfigure_lock(&block)
@@ -94,7 +94,9 @@ module Datadog
94
94
  # the Core resolver from within your product/component's namespace.
95
95
  agent_settings = AgentSettingsResolver.call(settings, logger: @logger)
96
96
 
97
- @remote = Remote::Component.build(settings, agent_settings)
97
+ @telemetry = self.class.build_telemetry(settings, agent_settings, @logger)
98
+
99
+ @remote = Remote::Component.build(settings, agent_settings, telemetry: telemetry)
98
100
  @tracer = self.class.build_tracer(settings, agent_settings, logger: @logger)
99
101
  @crashtracker = self.class.build_crashtracker(settings, agent_settings, logger: @logger)
100
102
 
@@ -107,8 +109,7 @@ module Datadog
107
109
 
108
110
  @runtime_metrics = self.class.build_runtime_metrics_worker(settings)
109
111
  @health_metrics = self.class.build_health_metrics(settings)
110
- @telemetry = self.class.build_telemetry(settings, agent_settings, logger)
111
- @appsec = Datadog::AppSec::Component.build_appsec_component(settings)
112
+ @appsec = Datadog::AppSec::Component.build_appsec_component(settings, telemetry: telemetry)
112
113
 
113
114
  self.class.configure_tracing(settings)
114
115
  end
@@ -410,10 +410,8 @@ module Datadog
410
410
  # The profiler gathers data by sending `SIGPROF` unix signals to Ruby application threads.
411
411
  #
412
412
  # We've discovered that this can trigger a bug in a number of Ruby APIs in the `Dir` class, as
413
- # described in https://github.com/DataDog/dd-trace-rb/issues/3450 . This workaround prevents the issue
414
- # from happening by monkey patching the affected APIs.
415
- #
416
- # (In the future, once a fix lands upstream, we'll disable this workaround for Rubies that don't need it)
413
+ # described in https://bugs.ruby-lang.org/issues/20586 .
414
+ # This was fixed for Ruby 3.4+, and this setting is a no-op for those versions.
417
415
  #
418
416
  # @default `DD_PROFILING_DIR_INTERRUPTION_WORKAROUND_ENABLED` environment variable as a boolean,
419
417
  # otherwise `true`
@@ -462,6 +460,60 @@ module Datadog
462
460
  end
463
461
  end
464
462
  end
463
+
464
+ # Enables GVL profiling. This will show when threads are waiting for GVL in the timeline view.
465
+ #
466
+ # This is a preview feature and disabled by default. It requires Ruby 3.2+.
467
+ #
468
+ # @default `DD_PROFILING_PREVIEW_GVL_ENABLED` environment variable as a boolean, otherwise `false`
469
+ option :preview_gvl_enabled do |o|
470
+ o.type :bool
471
+ o.env 'DD_PROFILING_PREVIEW_GVL_ENABLED'
472
+ o.default false
473
+ end
474
+
475
+ # Controls the smallest time period the profiler will report a thread waiting for the GVL.
476
+ #
477
+ # The default value was set to minimize overhead. Periods smaller than the set value will not be reported (e.g.
478
+ # the thread will be reported as whatever it was doing before it waited for the GVL).
479
+ #
480
+ # We do not recommend setting this to less than 1ms. Tweaking this value can increase application latency and
481
+ # memory use.
482
+ #
483
+ # @default 10_000_000 (10ms)
484
+ option :waiting_for_gvl_threshold_ns do |o|
485
+ o.type :int
486
+ o.default 10_000_000
487
+ end
488
+
489
+ # Controls if the profiler should attempt to read context from the otel library
490
+ #
491
+ # @default false
492
+ option :preview_otel_context_enabled do |o|
493
+ o.env 'DD_PROFILING_PREVIEW_OTEL_CONTEXT_ENABLED'
494
+ o.default false
495
+ o.env_parser do |value|
496
+ if value
497
+ value = value.strip.downcase
498
+ if ['only', 'both'].include?(value)
499
+ value
500
+ elsif ['true', '1'].include?(value)
501
+ 'both'
502
+ else
503
+ 'false'
504
+ end
505
+ end
506
+ end
507
+ o.setter do |value|
508
+ if value == true
509
+ :both
510
+ elsif ['only', 'both', :only, :both].include?(value)
511
+ value.to_sym
512
+ else
513
+ false
514
+ end
515
+ end
516
+ end
465
517
  end
466
518
 
467
519
  # @public_api
@@ -630,6 +682,33 @@ module Datadog
630
682
  end
631
683
  end
632
684
 
685
+ # The monotonic clock time provider used by Datadog. This option is internal and is used by `datadog-ci`
686
+ # gem to avoid traces' durations being skewed by timecop.
687
+ #
688
+ # It must respect the interface of [Datadog::Core::Utils::Time#get_time] method.
689
+ #
690
+ # For [Timecop](https://rubygems.org/gems/timecop), for example,
691
+ # `->(unit = :float_second) { ::Process.clock_gettime_without_mock(::Process::CLOCK_MONOTONIC, unit) }`
692
+ # allows Datadog features to use the real monotonic time when time is frozen with
693
+ # `Timecop.mock_process_clock = true`.
694
+ #
695
+ # @default `->(unit = :float_second) { ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, unit)}`
696
+ # @return [Proc<Numeric>]
697
+ option :get_time_provider do |o|
698
+ o.default_proc { |unit = :float_second| ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, unit) }
699
+ o.type :proc
700
+
701
+ o.after_set do |get_time_provider|
702
+ Core::Utils::Time.get_time_provider = get_time_provider
703
+ end
704
+
705
+ o.resetter do |_value|
706
+ ->(unit = :float_second) { ::Process.clock_gettime(::Process::CLOCK_MONOTONIC, unit) }.tap do |default|
707
+ Core::Utils::Time.get_time_provider = default
708
+ end
709
+ end
710
+ end
711
+
633
712
  # The `version` tag in Datadog. Use it to enable [Deployment Tracking](https://docs.datadoghq.com/tracing/deployment_tracking/).
634
713
  # @see https://docs.datadoghq.com/getting_started/tagging/unified_service_tagging
635
714
  # @default `DD_VERSION` environment variable, otherwise `nils`
@@ -836,7 +915,7 @@ module Datadog
836
915
  # Enables reporting of information when Ruby VM crashes.
837
916
  option :enabled do |o|
838
917
  o.type :bool
839
- o.default true
918
+ o.default false
840
919
  o.env 'DD_CRASHTRACKING_ENABLED'
841
920
  end
842
921
  end
@@ -95,7 +95,7 @@ module Datadog
95
95
  def start_or_update_on_fork(action:)
96
96
  self.class._native_start_or_update_on_fork(
97
97
  action: action,
98
- exporter_configuration: [:agent, agent_base_url],
98
+ agent_base_url: agent_base_url,
99
99
  path_to_crashtracking_receiver_binary: path_to_crashtracking_receiver_binary,
100
100
  ld_library_path: ld_library_path,
101
101
  tags_as_array: tags.to_a,
@@ -25,9 +25,9 @@ module Datadog
25
25
  # 2. Checking if `Net::HTTP` is referring to the original one
26
26
  # => ::Net::HTTP.equal?(::WebMock::HttpLibAdapters::NetHttpAdapter::OriginalNetHTTP)
27
27
  def webmock_enabled?
28
- defined?(::WebMock::HttpLibAdapters::NetHttpAdapter) &&
28
+ !!(defined?(::WebMock::HttpLibAdapters::NetHttpAdapter) &&
29
29
  defined?(::Net::HTTP) &&
30
- ::Net::HTTP.equal?(::WebMock::HttpLibAdapters::NetHttpAdapter.instance_variable_get(:@webMockNetHTTP))
30
+ ::Net::HTTP.equal?(::WebMock::HttpLibAdapters::NetHttpAdapter.instance_variable_get(:@webMockNetHTTP)))
31
31
  end
32
32
 
33
33
  private
@@ -68,7 +68,7 @@ module Datadog
68
68
 
69
69
  # Check if we are running from `bin/cucumber` or `cucumber/rake/task`.
70
70
  def cucumber?
71
- defined?(::Cucumber::Cli)
71
+ !!defined?(::Cucumber::Cli)
72
72
  end
73
73
 
74
74
  # If this is a Rails application, use different heuristics to detect
@@ -80,7 +80,7 @@ module Datadog
80
80
  # detecting its presence is enough to deduct if this is a development environment.
81
81
  #
82
82
  # @see https://github.com/rails/spring/blob/48b299348ace2188444489a0c216a6f3e9687281/README.md?plain=1#L204-L207
83
- defined?(::Spring) || rails_env_development?
83
+ !!defined?(::Spring) || rails_env_development?
84
84
  end
85
85
 
86
86
  RAILS_ENV_DEVELOPMENT = Set['development', 'test'].freeze
@@ -94,7 +94,7 @@ module Datadog
94
94
  # it's common to have a custom "staging" environment, and such environment normally want to run as close
95
95
  # to production as possible.
96
96
  def rails_env_development?
97
- defined?(::Rails.env) && RAILS_ENV_DEVELOPMENT.include?(::Rails.env)
97
+ !!defined?(::Rails.env) && RAILS_ENV_DEVELOPMENT.include?(::Rails.env)
98
98
  end
99
99
  end
100
100
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require_relative '../utils/time'
4
4
  require_relative '../utils/only_once'
5
+ require_relative '../telemetry/logger'
5
6
  require_relative '../configuration/ext'
6
7
 
7
8
  require_relative 'ext'
@@ -100,6 +101,7 @@ module Datadog
100
101
  Datadog.logger.error(
101
102
  "Failed to send count stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}"
102
103
  )
104
+ Telemetry::Logger.report(e, description: 'Failed to send count stat')
103
105
  end
104
106
 
105
107
  def distribution(stat, value = nil, options = nil, &block)
@@ -113,6 +115,7 @@ module Datadog
113
115
  Datadog.logger.error(
114
116
  "Failed to send distribution stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}"
115
117
  )
118
+ Telemetry::Logger.report(e, description: 'Failed to send distribution stat')
116
119
  end
117
120
 
118
121
  def increment(stat, options = nil)
@@ -125,6 +128,7 @@ module Datadog
125
128
  Datadog.logger.error(
126
129
  "Failed to send increment stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}"
127
130
  )
131
+ Telemetry::Logger.report(e, description: 'Failed to send increment stat')
128
132
  end
129
133
 
130
134
  def gauge(stat, value = nil, options = nil, &block)
@@ -138,6 +142,7 @@ module Datadog
138
142
  Datadog.logger.error(
139
143
  "Failed to send gauge stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}"
140
144
  )
145
+ Telemetry::Logger.report(e, description: 'Failed to send gauge stat')
141
146
  end
142
147
 
143
148
  def time(stat, options = nil)
@@ -153,9 +158,11 @@ module Datadog
153
158
  distribution(stat, ((finished - start) * 1000), options)
154
159
  end
155
160
  rescue StandardError => e
161
+ # TODO: Likely to be redundant, since `distribution` handles its own errors.
156
162
  Datadog.logger.error(
157
163
  "Failed to send time stat. Cause: #{e.class.name} #{e.message} Source: #{Array(e.backtrace).first}"
158
164
  )
165
+ Telemetry::Logger.report(e, description: 'Failed to send time stat')
159
166
  end
160
167
  end
161
168
 
@@ -0,0 +1,183 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'utils/time'
4
+
5
+ module Datadog
6
+ module Core
7
+ # Checks for rate limiting on a resource.
8
+ class RateLimiter
9
+ # Checks if resource of specified size can be
10
+ # conforms with the current limit.
11
+ #
12
+ # Implementations of this method are not guaranteed
13
+ # to be side-effect free.
14
+ #
15
+ # @return [Boolean] whether a resource conforms with the current limit
16
+ def allow?(size = 1); end
17
+
18
+ # The effective rate limiting ratio based on
19
+ # recent calls to `allow?`.
20
+ #
21
+ # @return [Float] recent allowance ratio
22
+ def effective_rate; end
23
+ end
24
+
25
+ # Implementation of the Token Bucket metering algorithm
26
+ # for rate limiting.
27
+ #
28
+ # @see https://en.wikipedia.org/wiki/Token_bucket Token bucket
29
+ class TokenBucket < RateLimiter
30
+ attr_reader :rate, :max_tokens
31
+
32
+ # @param rate [Numeric] Allowance rate, in units per second
33
+ # if rate is negative, always allow
34
+ # if rate is zero, never allow
35
+ # @param max_tokens [Numeric] Limit of available tokens
36
+ def initialize(rate, max_tokens = rate)
37
+ super()
38
+
39
+ raise ArgumentError, "rate must be a number: #{rate}" unless rate.is_a?(Numeric)
40
+ raise ArgumentError, "max_tokens must be a number: #{max_tokens}" unless max_tokens.is_a?(Numeric)
41
+
42
+ @rate = rate
43
+ @max_tokens = max_tokens
44
+
45
+ @tokens = max_tokens
46
+ @total_messages = 0
47
+ @conforming_messages = 0
48
+ @prev_conforming_messages = nil
49
+ @prev_total_messages = nil
50
+ @current_window = nil
51
+
52
+ @last_refill = Core::Utils::Time.get_time
53
+ end
54
+
55
+ # Checks if a message of provided +size+
56
+ # conforms with the current bucket limit.
57
+ #
58
+ # If it does, return +true+ and remove +size+
59
+ # tokens from the bucket.
60
+ # If it does not, return +false+ without affecting
61
+ # the tokens from the bucket.
62
+ #
63
+ # @return [Boolean] +true+ if message conforms with current bucket limit
64
+ def allow?(size = 1)
65
+ allowed = should_allow?(size)
66
+ update_rate_counts(allowed)
67
+ allowed
68
+ end
69
+
70
+ # Ratio of 'conformance' per 'total messages' checked
71
+ # averaged for the past 2 buckets
72
+ #
73
+ # Returns +1.0+ when no messages have been checked yet.
74
+ #
75
+ # @return [Float] Conformance ratio, between +[0,1]+
76
+ def effective_rate
77
+ return 0.0 if @rate.zero?
78
+ return 1.0 if @rate < 0 || @total_messages.zero?
79
+
80
+ return current_window_rate if @prev_conforming_messages.nil? || @prev_total_messages.nil?
81
+
82
+ (@conforming_messages.to_f + @prev_conforming_messages.to_f) / (@total_messages + @prev_total_messages)
83
+ end
84
+
85
+ # Ratio of 'conformance' per 'total messages' checked
86
+ # on this bucket
87
+ #
88
+ # Returns +1.0+ when no messages have been checked yet.
89
+ #
90
+ # @return [Float] Conformance ratio, between +[0,1]+
91
+ def current_window_rate
92
+ return 1.0 if @total_messages.zero?
93
+
94
+ @conforming_messages.to_f / @total_messages
95
+ end
96
+
97
+ # @return [Numeric] number of tokens currently available
98
+ def available_tokens
99
+ @tokens
100
+ end
101
+
102
+ private
103
+
104
+ def refill_since_last_message
105
+ now = Core::Utils::Time.get_time
106
+ elapsed = now - @last_refill
107
+
108
+ # Update the number of available tokens, but ensure we do not exceed the max
109
+ # we return the min of tokens + rate*elapsed, or max tokens
110
+ refill_tokens(@rate * elapsed)
111
+
112
+ @last_refill = now
113
+ end
114
+
115
+ def refill_tokens(size)
116
+ @tokens += size
117
+ @tokens = @max_tokens if @tokens > @max_tokens
118
+ end
119
+
120
+ def increment_total_count
121
+ @total_messages += 1
122
+ end
123
+
124
+ def increment_conforming_count
125
+ @conforming_messages += 1
126
+ end
127
+
128
+ def should_allow?(size = 1)
129
+ # rate limit of 0 blocks everything
130
+ return false if @rate.zero?
131
+
132
+ # negative rate limit disables rate limiting
133
+ return true if @rate < 0
134
+
135
+ refill_since_last_message
136
+
137
+ # if tokens < 1 we don't allow?
138
+ return false if @tokens < size
139
+
140
+ @tokens -= size
141
+
142
+ true
143
+ end
144
+
145
+ # Sets and Updates the past two 1 second windows for which
146
+ # the rate limiter must compute it's rate over and updates
147
+ # the total count, and conforming message count if +allowed+
148
+ def update_rate_counts(allowed)
149
+ now = Core::Utils::Time.get_time
150
+
151
+ # No tokens have been seen yet, start a new window
152
+ if @current_window.nil?
153
+ @current_window = now
154
+ # If more than 1 second has past since last window, reset
155
+ elsif now - @current_window >= 1
156
+ @prev_conforming_messages = @conforming_messages
157
+ @prev_total_messages = @total_messages
158
+ @conforming_messages = 0
159
+ @total_messages = 0
160
+ @current_window = now
161
+ end
162
+
163
+ increment_conforming_count if allowed
164
+
165
+ increment_total_count
166
+ end
167
+ end
168
+
169
+ # {Datadog::Core::RateLimiter} that accepts all resources,
170
+ # with no limits.
171
+ class UnlimitedLimiter < RateLimiter
172
+ # @return [Boolean] always +true+
173
+ def allow?(_ = 1)
174
+ true
175
+ end
176
+
177
+ # @return [Float] always 100%
178
+ def effective_rate
179
+ 1.0
180
+ end
181
+ end
182
+ end
183
+ end
@@ -12,10 +12,11 @@ module Datadog
12
12
  class Capabilities
13
13
  attr_reader :products, :capabilities, :receivers, :base64_capabilities
14
14
 
15
- def initialize(settings)
15
+ def initialize(settings, telemetry)
16
16
  @capabilities = []
17
17
  @products = []
18
18
  @receivers = []
19
+ @telemetry = telemetry
19
20
 
20
21
  register(settings)
21
22
 
@@ -28,12 +29,12 @@ module Datadog
28
29
  if settings.respond_to?(:appsec) && settings.appsec.enabled
29
30
  register_capabilities(Datadog::AppSec::Remote.capabilities)
30
31
  register_products(Datadog::AppSec::Remote.products)
31
- register_receivers(Datadog::AppSec::Remote.receivers)
32
+ register_receivers(Datadog::AppSec::Remote.receivers(@telemetry))
32
33
  end
33
34
 
34
35
  register_capabilities(Datadog::Tracing::Remote.capabilities)
35
36
  register_products(Datadog::Tracing::Remote.products)
36
- register_receivers(Datadog::Tracing::Remote.receivers)
37
+ register_receivers(Datadog::Tracing::Remote.receivers(@telemetry))
37
38
  end
38
39
 
39
40
  def register_capabilities(capabilities)
@@ -39,6 +39,7 @@ module Datadog
39
39
  @client.sync
40
40
  @healthy ||= true
41
41
  rescue Client::SyncError => e
42
+ # Transient errors due to network or agent. Logged the error but not via telemetry
42
43
  Datadog.logger.error do
43
44
  "remote worker client sync error: #{e.message} location: #{Array(e.backtrace).first}. skipping sync"
44
45
  end
@@ -48,6 +49,7 @@ module Datadog
48
49
  # negotiation object stores error logging state that should be reset.
49
50
  negotiation = Negotiation.new(settings, agent_settings)
50
51
 
52
+ # Transient errors due to network or agent. Logged the error but not via telemetry
51
53
  Datadog.logger.error do
52
54
  "remote worker error: #{e.class.name} #{e.message} location: #{Array(e.backtrace).first}. "\
53
55
  'reseting client state'
@@ -150,10 +152,10 @@ module Datadog
150
152
  #
151
153
  # Those checks are instead performed inside the worker loop.
152
154
  # This allows users to upgrade their agent while keeping their application running.
153
- def build(settings, agent_settings)
155
+ def build(settings, agent_settings, telemetry:)
154
156
  return unless settings.remote.enabled
155
157
 
156
- new(settings, Client::Capabilities.new(settings), agent_settings)
158
+ new(settings, Client::Capabilities.new(settings, telemetry), agent_settings)
157
159
  end
158
160
  end
159
161
  end
@@ -20,7 +20,7 @@ module Datadog
20
20
 
21
21
  if res.internal_error? && network_error?(res.error)
22
22
  unless @logged[:agent_unreachable]
23
- Datadog.logger.error { "agent unreachable: cannot negotiate #{path}" }
23
+ Datadog.logger.warn { "agent unreachable: cannot negotiate #{path}" }
24
24
  @logged[:agent_unreachable] = true
25
25
  end
26
26
 
@@ -29,7 +29,7 @@ module Datadog
29
29
 
30
30
  if res.not_found?
31
31
  unless @logged[:no_info_endpoint]
32
- Datadog.logger.error { "agent reachable but has no /info endpoint: cannot negotiate #{path}" }
32
+ Datadog.logger.warn { "agent reachable but has no /info endpoint: cannot negotiate #{path}" }
33
33
  @logged[:no_info_endpoint] = true
34
34
  end
35
35
 
@@ -38,7 +38,7 @@ module Datadog
38
38
 
39
39
  unless res.ok?
40
40
  unless @logged[:unexpected_response]
41
- Datadog.logger.error { "agent reachable but unexpected response: cannot negotiate #{path}" }
41
+ Datadog.logger.warn { "agent reachable but unexpected response: cannot negotiate #{path}" }
42
42
  @logged[:unexpected_response] = true
43
43
  end
44
44
 
@@ -47,7 +47,7 @@ module Datadog
47
47
 
48
48
  unless res.endpoints.include?(path)
49
49
  unless @logged[:no_config_endpoint]
50
- Datadog.logger.error { "agent reachable but does not report #{path}" }
50
+ Datadog.logger.warn { "agent reachable but does not report #{path}" }
51
51
  @logged[:no_config_endpoint] = true
52
52
  end
53
53
 
@@ -19,6 +19,8 @@ module Datadog
19
19
  barrier = Datadog::Core::Remote.active_remote.barrier(:once)
20
20
  end
21
21
 
22
+ # steep does not permit the next line due to
23
+ # https://github.com/soutaro/steep/issues/1231
22
24
  Boot.new(barrier, t)
23
25
  end
24
26
  end
@@ -94,7 +94,7 @@ module Datadog
94
94
  def try_flush
95
95
  yield
96
96
  rescue StandardError => e
97
- Datadog.logger.error("Error while sending runtime metric. Cause: #{e.class.name} #{e.message}")
97
+ Datadog.logger.warn("Error while sending runtime metric. Cause: #{e.class.name} #{e.message}")
98
98
  end
99
99
 
100
100
  def default_metric_options