datadog 2.17.0 → 2.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +44 -1
  3. data/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +12 -46
  4. data/ext/datadog_profiling_native_extension/collectors_stack.c +227 -49
  5. data/ext/datadog_profiling_native_extension/collectors_stack.h +19 -3
  6. data/ext/datadog_profiling_native_extension/collectors_thread_context.c +63 -12
  7. data/ext/datadog_profiling_native_extension/collectors_thread_context.h +1 -0
  8. data/ext/datadog_profiling_native_extension/extconf.rb +7 -0
  9. data/ext/datadog_profiling_native_extension/heap_recorder.c +239 -363
  10. data/ext/datadog_profiling_native_extension/heap_recorder.h +4 -6
  11. data/ext/datadog_profiling_native_extension/libdatadog_helpers.c +22 -0
  12. data/ext/datadog_profiling_native_extension/libdatadog_helpers.h +8 -5
  13. data/ext/datadog_profiling_native_extension/private_vm_api_access.c +1 -0
  14. data/ext/datadog_profiling_native_extension/private_vm_api_access.h +6 -3
  15. data/ext/datadog_profiling_native_extension/ruby_helpers.c +1 -13
  16. data/ext/datadog_profiling_native_extension/ruby_helpers.h +2 -10
  17. data/ext/datadog_profiling_native_extension/stack_recorder.c +154 -57
  18. data/ext/libdatadog_api/extconf.rb +2 -2
  19. data/ext/libdatadog_api/library_config.c +54 -12
  20. data/ext/libdatadog_api/library_config.h +6 -0
  21. data/ext/libdatadog_api/process_discovery.c +2 -7
  22. data/ext/libdatadog_extconf_helpers.rb +1 -1
  23. data/lib/datadog/appsec/api_security/lru_cache.rb +9 -2
  24. data/lib/datadog/appsec/api_security/route_extractor.rb +65 -0
  25. data/lib/datadog/appsec/api_security/sampler.rb +59 -0
  26. data/lib/datadog/appsec/api_security.rb +14 -0
  27. data/lib/datadog/appsec/assets/waf_rules/recommended.json +257 -85
  28. data/lib/datadog/appsec/assets/waf_rules/strict.json +10 -78
  29. data/lib/datadog/appsec/component.rb +30 -54
  30. data/lib/datadog/appsec/configuration/settings.rb +60 -2
  31. data/lib/datadog/appsec/context.rb +6 -6
  32. data/lib/datadog/appsec/contrib/devise/tracking_middleware.rb +1 -1
  33. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +27 -16
  34. data/lib/datadog/appsec/processor/rule_loader.rb +5 -6
  35. data/lib/datadog/appsec/remote.rb +15 -55
  36. data/lib/datadog/appsec/security_engine/engine.rb +194 -0
  37. data/lib/datadog/appsec/security_engine/runner.rb +10 -11
  38. data/lib/datadog/appsec.rb +4 -7
  39. data/lib/datadog/core/configuration/agent_settings.rb +52 -0
  40. data/lib/datadog/core/configuration/agent_settings_resolver.rb +1 -43
  41. data/lib/datadog/core/configuration/components.rb +2 -4
  42. data/lib/datadog/core/configuration/option.rb +9 -9
  43. data/lib/datadog/core/configuration/settings.rb +22 -10
  44. data/lib/datadog/core/configuration/stable_config.rb +1 -2
  45. data/lib/datadog/core/crashtracking/tag_builder.rb +4 -22
  46. data/lib/datadog/core/process_discovery/tracer_memfd.rb +15 -0
  47. data/lib/datadog/core/process_discovery.rb +5 -1
  48. data/lib/datadog/core/remote/configuration/repository.rb +12 -0
  49. data/lib/datadog/core/tag_builder.rb +56 -0
  50. data/lib/datadog/core/telemetry/event/app_client_configuration_change.rb +1 -0
  51. data/lib/datadog/core/telemetry/event/app_started.rb +129 -39
  52. data/lib/datadog/core/telemetry/logger.rb +5 -4
  53. data/lib/datadog/core/telemetry/logging.rb +11 -5
  54. data/lib/datadog/core/transport/http/adapters/net.rb +17 -2
  55. data/lib/datadog/core/transport/http/builder.rb +2 -2
  56. data/lib/datadog/core/transport/http/env.rb +8 -0
  57. data/lib/datadog/core/utils.rb +7 -0
  58. data/lib/datadog/di/instrumenter.rb +52 -2
  59. data/lib/datadog/di/probe_notification_builder.rb +31 -41
  60. data/lib/datadog/di/probe_notifier_worker.rb +9 -1
  61. data/lib/datadog/di/serializer.rb +6 -2
  62. data/lib/datadog/di/transport/http/input.rb +10 -0
  63. data/lib/datadog/di/transport/input.rb +10 -2
  64. data/lib/datadog/profiling/collectors/code_provenance.rb +17 -8
  65. data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +4 -0
  66. data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -0
  67. data/lib/datadog/profiling/collectors/thread_context.rb +16 -1
  68. data/lib/datadog/profiling/component.rb +7 -9
  69. data/lib/datadog/profiling/ext.rb +0 -12
  70. data/lib/datadog/profiling/http_transport.rb +2 -2
  71. data/lib/datadog/profiling/profiler.rb +2 -0
  72. data/lib/datadog/profiling/scheduler.rb +2 -1
  73. data/lib/datadog/profiling/stack_recorder.rb +5 -5
  74. data/lib/datadog/profiling/tag_builder.rb +5 -37
  75. data/lib/datadog/profiling/tasks/setup.rb +2 -0
  76. data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +15 -0
  77. data/lib/datadog/tracing/contrib/action_pack/action_dispatch/instrumentation.rb +19 -12
  78. data/lib/datadog/tracing/contrib/action_pack/ext.rb +2 -0
  79. data/lib/datadog/tracing/contrib/lograge/patcher.rb +4 -2
  80. data/lib/datadog/tracing/contrib/sidekiq/ext.rb +1 -0
  81. data/lib/datadog/tracing/contrib/sidekiq/server_tracer.rb +5 -2
  82. data/lib/datadog/tracing/sync_writer.rb +1 -1
  83. data/lib/datadog/tracing/trace_operation.rb +12 -4
  84. data/lib/datadog/tracing/tracer.rb +6 -2
  85. data/lib/datadog/version.rb +1 -1
  86. metadata +12 -10
  87. data/lib/datadog/appsec/assets/waf_rules/processors.json +0 -321
  88. data/lib/datadog/appsec/assets/waf_rules/scanners.json +0 -1023
  89. data/lib/datadog/appsec/processor/rule_merger.rb +0 -171
  90. data/lib/datadog/appsec/processor.rb +0 -107
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'environment/socket'
4
+ require_relative 'environment/identity'
5
+ require_relative 'environment/git'
6
+
7
+ module Datadog
8
+ module Core
9
+ # This module builds a hash of tags.
10
+ #
11
+ # When changing or adding the tags, make sure they are kept in sync with
12
+ # https://docs.google.com/spreadsheets/d/1LOGMf4c4Avbtn36uZ2SWvhIGKRPLM1BoWkUP4JYj7hA/
13
+ # (Datadog internal link).
14
+ #
15
+ # @api private
16
+ module TagBuilder
17
+ def self.fixed_environment_tags
18
+ @fixed_environment_tags ||= {
19
+ 'language' => Environment::Identity.lang,
20
+ 'runtime' => Environment::Identity.lang, # Known to be duplicated from language above
21
+ 'runtime_engine' => Environment::Identity.lang_engine,
22
+ 'runtime_platform' => Environment::Identity.lang_platform,
23
+ 'runtime_version' => Environment::Identity.lang_version,
24
+ 'library_version' => Environment::Identity.gem_datadog_version,
25
+ 'git.repository_url' => Environment::Git.git_repository_url,
26
+ 'git.commit.sha' => Environment::Git.git_commit_sha,
27
+ }.compact.freeze
28
+ end
29
+
30
+ def self.tags(settings)
31
+ # Note that user tags get overwritten by our tags, and also
32
+ # that user tags do not get compacted (nil values are sent as
33
+ # empty strings).
34
+ settings.tags.merge(fixed_environment_tags).merge({
35
+ # Hostname can possibly change during application runtime.
36
+ 'host' => Environment::Socket.hostname,
37
+ # Runtime ID changes upon a fork.
38
+ 'runtime-id' => Environment::Identity.id,
39
+ # Process ID changes upon a fork.
40
+ 'process_id' => Process.pid.to_s,
41
+ # Unified service tagging.
42
+ 'env' => settings.env,
43
+ 'service' => settings.service,
44
+ 'version' => settings.version,
45
+ }.compact)
46
+ end
47
+
48
+ def self.serialize_tags(tags)
49
+ # DEV: Should there be some sort of escaping done here?
50
+ tags.map do |key, value|
51
+ "#{key}:#{value}"
52
+ end.join(',')
53
+ end
54
+ end
55
+ end
56
+ end
@@ -37,6 +37,7 @@ module Datadog
37
37
  }
38
38
  end
39
39
 
40
+ # DEV: This seems unnecessary (we send the state of sca_enabled for each remote config change)
40
41
  unless config.dig('appsec', 'sca_enabled').nil?
41
42
  res << {
42
43
  name: 'appsec.sca_enabled',
@@ -25,7 +25,7 @@ module Datadog
25
25
  private
26
26
 
27
27
  def products
28
- # @type var products: Hash[Symbol, Hash[Symbol, Object]]
28
+ # @type var products: Hash[Symbol, Hash[Symbol, Hash[Symbol, String | Integer] | bool | nil]]
29
29
  products = {
30
30
  appsec: {
31
31
  enabled: Datadog::AppSec.enabled?,
@@ -64,6 +64,7 @@ module Datadog
64
64
  tracing.partial_flush.min_spans_threshold
65
65
  tracing.report_hostname
66
66
  tracing.sampling.rate_limit
67
+ apm.tracing.enabled
67
68
  ].freeze
68
69
 
69
70
  # standard:disable Metrics/AbcSize
@@ -72,23 +73,70 @@ module Datadog
72
73
  config = Datadog.configuration
73
74
  seq_id = Event.configuration_sequence.next
74
75
 
76
+ # tracing.writer_options.buffer_size and tracing.writer_options.flush_interval have the same origin.
77
+ writer_option_origin = get_telemetry_origin(config, 'tracing.writer_options')
78
+
75
79
  list = [
80
+ # Only set using env var as of June 2025
76
81
  conf_value('DD_GIT_REPOSITORY_URL', Core::Environment::Git.git_repository_url, seq_id, 'env_var'),
77
82
  conf_value('DD_GIT_COMMIT_SHA', Core::Environment::Git.git_commit_sha, seq_id, 'env_var'),
78
83
 
79
- conf_value('DD_AGENT_HOST', config.agent.host, seq_id),
80
- conf_value('DD_AGENT_TRANSPORT', agent_transport(config), seq_id),
81
- conf_value('DD_TRACE_SAMPLE_RATE', to_value(config.tracing.sampling.default_rate), seq_id),
84
+ # Set by the customer application (eg. `require 'datadog/auto_instrument'`)
85
+ conf_value(
86
+ 'tracing.auto_instrument.enabled',
87
+ !defined?(Datadog::AutoInstrument::LOADED).nil?,
88
+ seq_id,
89
+ 'code'
90
+ ),
91
+ conf_value(
92
+ 'tracing.opentelemetry.enabled',
93
+ !defined?(Datadog::OpenTelemetry::LOADED).nil?,
94
+ seq_id,
95
+ 'code'
96
+ ),
97
+
98
+ # Mix of env var, programmatic and default config, so we use unknown
99
+ conf_value('DD_AGENT_TRANSPORT', agent_transport(config), seq_id, 'unknown'),
100
+
101
+ # writer_options is defined as an option that has a Hash value.
102
+ conf_value(
103
+ 'tracing.writer_options.buffer_size',
104
+ to_value(config.tracing.writer_options[:buffer_size]),
105
+ seq_id,
106
+ writer_option_origin
107
+ ),
108
+ conf_value(
109
+ 'tracing.writer_options.flush_interval',
110
+ to_value(config.tracing.writer_options[:flush_interval]),
111
+ seq_id,
112
+ writer_option_origin
113
+ ),
114
+
115
+ conf_value('DD_AGENT_HOST', config.agent.host, seq_id, get_telemetry_origin(config, 'agent.host')),
116
+ conf_value(
117
+ 'DD_TRACE_SAMPLE_RATE',
118
+ to_value(config.tracing.sampling.default_rate),
119
+ seq_id,
120
+ get_telemetry_origin(config, 'tracing.sampling.default_rate')
121
+ ),
82
122
  conf_value(
83
123
  'DD_TRACE_REMOVE_INTEGRATION_SERVICE_NAMES_ENABLED',
84
124
  config.tracing.contrib.global_default_service_name.enabled,
85
- seq_id
125
+ seq_id,
126
+ get_telemetry_origin(config, 'tracing.contrib.global_default_service_name.enabled')
86
127
  ),
87
128
  conf_value(
88
129
  'DD_TRACE_PEER_SERVICE_DEFAULTS_ENABLED',
89
130
  config.tracing.contrib.peer_service_defaults,
90
- seq_id
131
+ seq_id,
132
+ get_telemetry_origin(config, 'tracing.contrib.peer_service_defaults')
91
133
  ),
134
+ conf_value(
135
+ 'DD_TRACE_DEBUG',
136
+ config.diagnostics.debug,
137
+ seq_id,
138
+ get_telemetry_origin(config, 'diagnostics.debug')
139
+ )
92
140
  ]
93
141
 
94
142
  peer_service_mapping_str = ''
@@ -96,39 +144,55 @@ module Datadog
96
144
  peer_service_mapping = config.tracing.contrib.peer_service_mapping
97
145
  peer_service_mapping_str = peer_service_mapping.map { |key, value| "#{key}:#{value}" }.join(',')
98
146
  end
99
- list << conf_value('DD_TRACE_PEER_SERVICE_MAPPING', peer_service_mapping_str, seq_id)
147
+ list << conf_value(
148
+ 'DD_TRACE_PEER_SERVICE_MAPPING',
149
+ peer_service_mapping_str,
150
+ seq_id,
151
+ get_telemetry_origin(config, 'tracing.contrib.peer_service_mapping')
152
+ )
100
153
 
101
154
  # Whitelist of configuration options to send in additional payload object
102
- TARGET_OPTIONS.each do |option|
103
- split_option = option.split('.')
104
- list << conf_value(option, to_value(config.dig(*split_option)), seq_id)
155
+ TARGET_OPTIONS.each do |option_path|
156
+ split_option = option_path.split('.')
157
+ list << conf_value(
158
+ option_path,
159
+ to_value(config.dig(*split_option)),
160
+ seq_id,
161
+ get_telemetry_origin(config, option_path)
162
+ )
105
163
  end
106
164
 
107
165
  # Add some more custom additional payload values here
108
- list.push(
109
- conf_value('tracing.auto_instrument.enabled', !defined?(Datadog::AutoInstrument::LOADED).nil?, seq_id),
110
- conf_value(
111
- 'tracing.writer_options.buffer_size',
112
- to_value(config.tracing.writer_options[:buffer_size]),
113
- seq_id
114
- ),
115
- conf_value(
116
- 'tracing.writer_options.flush_interval',
117
- to_value(config.tracing.writer_options[:flush_interval]),
118
- seq_id
119
- ),
120
- conf_value(
121
- 'tracing.opentelemetry.enabled',
122
- !defined?(Datadog::OpenTelemetry::LOADED).nil?,
123
- seq_id
124
- ),
125
- )
126
- list << conf_value('logger.instance', config.logger.instance.class.to_s, seq_id) if config.logger.instance
166
+ if config.logger.instance
167
+ list << conf_value(
168
+ 'logger.instance',
169
+ config.logger.instance.class.to_s,
170
+ seq_id,
171
+ get_telemetry_origin(config, 'logger.instance')
172
+ )
173
+ end
127
174
  if config.respond_to?('appsec')
128
- list << conf_value('appsec.enabled', config.dig('appsec', 'enabled'), seq_id)
129
- list << conf_value('appsec.sca_enabled', config.dig('appsec', 'sca_enabled'), seq_id)
175
+ list << conf_value(
176
+ 'appsec.enabled',
177
+ config.dig('appsec', 'enabled'),
178
+ seq_id,
179
+ get_telemetry_origin(config, 'appsec.enabled')
180
+ )
181
+ list << conf_value(
182
+ 'appsec.sca_enabled',
183
+ config.dig('appsec', 'sca_enabled'),
184
+ seq_id,
185
+ get_telemetry_origin(config, 'appsec.sca_enabled')
186
+ )
187
+ end
188
+ if config.respond_to?('ci')
189
+ list << conf_value(
190
+ 'ci.enabled',
191
+ config.dig('ci', 'enabled'),
192
+ seq_id,
193
+ get_telemetry_origin(config, 'ci.enabled')
194
+ )
130
195
  end
131
- list << conf_value('ci.enabled', config.dig('ci', 'enabled'), seq_id) if config.respond_to?('ci')
132
196
 
133
197
  list.reject! { |entry| entry[:value].nil? }
134
198
  list
@@ -145,13 +209,27 @@ module Datadog
145
209
  end
146
210
  end
147
211
 
148
- def conf_value(name, value, seq_id, origin = 'code')
149
- {
150
- name: name,
151
- value: value,
152
- origin: origin,
153
- seq_id: seq_id,
154
- }
212
+ # `origin`: Source of the configuration. One of :
213
+ # - `fleet_stable_config`: configuration is set via the fleet automation Datadog UI
214
+ # - `local_stable_config`: configuration set via a user-managed file
215
+ # - `env_var`: configurations that are set through environment variables
216
+ # - `jvm_prop`: JVM system properties passed on the command line
217
+ # - `code`: configurations that are set through the customer application
218
+ # - `dd_config`: set by the dd.yaml file or json
219
+ # - `remote_config`: values that are set using remote config
220
+ # - `app.config`: only applies to .NET
221
+ # - `default`: set when the user has not set any configuration for the key (defaults to a value)
222
+ # - `unknown`: set for cases where it is difficult/not possible to determine the source of a config.
223
+ def conf_value(name, value, seq_id, origin)
224
+ result = {name: name, value: value, origin: origin, seq_id: seq_id}
225
+ if origin == 'fleet_stable_config'
226
+ fleet_id = Core::Configuration::StableConfig.configuration.dig(:fleet, :id)
227
+ result[:config_id] = fleet_id if fleet_id
228
+ elsif origin == 'local_stable_config'
229
+ local_id = Core::Configuration::StableConfig.configuration.dig(:local, :id)
230
+ result[:config_id] = local_id if local_id
231
+ end
232
+ result
155
233
  end
156
234
 
157
235
  def to_value(value)
@@ -172,6 +250,18 @@ module Datadog
172
250
  install_time: config.dig('telemetry', 'install_time'),
173
251
  }
174
252
  end
253
+
254
+ def get_telemetry_origin(config, config_path)
255
+ split_option = config_path.split('.')
256
+ option_name = split_option.pop
257
+ return 'unknown' if option_name.nil?
258
+
259
+ # @type var parent_setting: Core::Configuration::Options
260
+ # @type var option: Core::Configuration::Option
261
+ parent_setting = config.dig(*split_option)
262
+ option = parent_setting.send(:resolve_option, option_name.to_sym)
263
+ option.precedence_set&.origin || 'unknown'
264
+ end
175
265
  end
176
266
  end
177
267
  end
@@ -14,8 +14,8 @@ module Datadog
14
14
  # read: lib/datadog/core/telemetry/logging.rb
15
15
  module Logger
16
16
  class << self
17
- def report(exception, level: :error, description: nil)
18
- instance&.report(exception, level: level, description: description)
17
+ def report(exception, level: :error, description: nil, pii_safe: false)
18
+ instance&.report(exception, level: level, description: description, pii_safe: pii_safe)
19
19
  end
20
20
 
21
21
  def error(description)
@@ -34,9 +34,10 @@ module Datadog
34
34
  #
35
35
  # The downside is: this leaves us unable to report telemetry during component initialization.
36
36
  components = Datadog.send(:components, allow_initialization: false)
37
+ telemetry = components&.telemetry
37
38
 
38
- if components&.telemetry
39
- components.telemetry
39
+ if telemetry
40
+ telemetry
40
41
  else
41
42
  Datadog.logger.warn(
42
43
  'Failed to send telemetry before components initialization or within components lifecycle'
@@ -45,11 +45,17 @@ module Datadog
45
45
  end
46
46
  end
47
47
 
48
- def report(exception, level: :error, description: nil)
49
- # Annoymous exceptions to be logged as <Class:0x00007f8b1c0b3b40>
50
- message = +''
51
- message << (exception.class.name || exception.class.inspect)
52
- message << ': ' << description if description
48
+ def report(exception, level: :error, description: nil, pii_safe: false)
49
+ # Anonymous exceptions to be logged as <Class:0x00007f8b1c0b3b40>
50
+ message = +"#{exception.class.name || exception.class.inspect}" # standard:disable Style/RedundantInterpolation
51
+
52
+ exception_message = pii_safe ? exception.message : nil
53
+
54
+ if description || exception_message
55
+ message << ':'
56
+ message << " #{description}" if description
57
+ message << " (#{exception.message})" if exception_message
58
+ end
53
59
 
54
60
  event = Event::Log.new(
55
61
  message: message,
@@ -48,7 +48,7 @@ module Datadog
48
48
  end
49
49
 
50
50
  def get(env)
51
- get = ::Net::HTTP::Get.new(env.path, env.headers)
51
+ get = ::Net::HTTP::Get.new(net_http_path_from_env(env), env.headers)
52
52
 
53
53
  # Connect and send the request
54
54
  http_response = open do |http|
@@ -63,7 +63,7 @@ module Datadog
63
63
  post = nil
64
64
 
65
65
  if env.form.nil? || env.form.empty?
66
- post = ::Net::HTTP::Post.new(env.path, env.headers)
66
+ post = ::Net::HTTP::Post.new(net_http_path_from_env(env), env.headers)
67
67
  post.body = env.body
68
68
  else
69
69
  post = ::Datadog::Core::Vendor::Net::HTTP::Post::Multipart.new(
@@ -86,6 +86,21 @@ module Datadog
86
86
  "http://#{hostname}:#{port}?timeout=#{timeout}"
87
87
  end
88
88
 
89
+ def net_http_path_from_env(env)
90
+ path = env.path
91
+ case query = env.query
92
+ when Hash
93
+ path = path + '?' + URI.encode_www_form(query)
94
+ when String
95
+ path = path + '?' + query
96
+ when nil
97
+ # Nothing
98
+ else
99
+ raise ArgumentError, "Invalid type for query: #{query}"
100
+ end
101
+ path
102
+ end
103
+
89
104
  # Raised when called with an unknown HTTP method
90
105
  class UnknownHTTPMethod < StandardError
91
106
  attr_reader :verb
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative '../../configuration/agent_settings_resolver'
3
+ require_relative '../../configuration/agent_settings'
4
4
  require_relative 'adapters/registry'
5
5
  require_relative 'api/map'
6
6
 
@@ -41,7 +41,7 @@ module Datadog
41
41
 
42
42
  def adapter(config, *args, **kwargs)
43
43
  @default_adapter = case config
44
- when Core::Configuration::AgentSettingsResolver::AgentSettings
44
+ when Core::Configuration::AgentSettings
45
45
  registry_klass = REGISTRY.get(config.adapter)
46
46
  raise UnknownAdapterError, config.adapter if registry_klass.nil?
47
47
 
@@ -32,6 +32,14 @@ module Datadog
32
32
  self[:path] = value
33
33
  end
34
34
 
35
+ def query
36
+ self[:query]
37
+ end
38
+
39
+ def query=(value)
40
+ self[:query] = value
41
+ end
42
+
35
43
  def body
36
44
  self[:body]
37
45
  end
@@ -66,6 +66,13 @@ module Datadog
66
66
  placeholder
67
67
  end
68
68
 
69
+ def self.encode_tags(hash)
70
+ # Make sure everything is an utf-8 string, to avoid encoding issues in downstream
71
+ hash.each_with_object({}) do |(key, value), h|
72
+ h[utf8_encode(key)] = utf8_encode(value)
73
+ end
74
+ end
75
+
69
76
  # @!visibility private
70
77
  def self.without_warnings
71
78
  # This is typically used when monkey patching functions such as
@@ -52,6 +52,17 @@ module Datadog
52
52
  # (however, Probe instances can be replaced by OpenStruct instances
53
53
  # providing the same interface with not much effort).
54
54
  #
55
+ # Instrumenter (this class) is responsible for building snapshots.
56
+ # This is because to capture values on method entry, those values need to
57
+ # be duplicated or serialized into immutable values to prevent their
58
+ # modification by the instrumented method. Therefore this class must
59
+ # do at least some serialization/snapshot building and to keep the code
60
+ # well-encapsulated, all serialization/snapshot building should thus be
61
+ # initiated from this class rather than downstream code.
62
+ #
63
+ # As a consequence of Instrumenter building snapshots, it should not
64
+ # expose TracePoint objects to any downstream code.
65
+ #
55
66
  # @api private
56
67
  class Instrumenter
57
68
  def initialize(settings, serializer, logger, code_tracker: nil, telemetry: nil)
@@ -111,7 +122,8 @@ module Datadog
111
122
  # Arguments may be mutated by the method, therefore
112
123
  # they need to be serialized prior to method invocation.
113
124
  entry_args = if probe.capture_snapshot?
114
- serializer.serialize_args(args, kwargs,
125
+ instance_vars = Instrumenter.get_instance_variables(self)
126
+ serializer.serialize_args(args, kwargs, instance_vars,
115
127
  depth: probe.max_capture_depth || settings.dynamic_instrumentation.max_capture_depth,
116
128
  attribute_count: probe.max_capture_attribute_count || settings.dynamic_instrumentation.max_capture_attribute_count)
117
129
  end
@@ -149,6 +161,7 @@ module Datadog
149
161
  # TODO capture arguments at exit
150
162
  # & is to stop steep complaints, block is always present here.
151
163
  block&.call(probe: probe, rv: rv, duration: duration, caller_locations: caller_locs,
164
+ instance_vars: probe.capture_snapshot? ? Instrumenter.get_instance_variables(self) : nil,
152
165
  serialized_entry_args: entry_args)
153
166
  rv
154
167
  else
@@ -298,8 +311,20 @@ module Datadog
298
311
  probe.file == tp.path || probe.file_matches?(tp.path)
299
312
  )
300
313
  if rate_limiter.nil? || rate_limiter.allow?
314
+ locals = if probe.capture_snapshot?
315
+ serializer.serialize_vars(Instrumenter.get_local_variables(tp),
316
+ depth: probe.max_capture_depth || settings.dynamic_instrumentation.max_capture_depth,
317
+ attribute_count: probe.max_capture_attribute_count || settings.dynamic_instrumentation.max_capture_attribute_count,)
318
+ end
319
+ instance_vars = if probe.capture_snapshot?
320
+ serializer.serialize_vars(Instrumenter.get_instance_variables(tp.self),
321
+ depth: probe.max_capture_depth || settings.dynamic_instrumentation.max_capture_depth,
322
+ attribute_count: probe.max_capture_attribute_count || settings.dynamic_instrumentation.max_capture_attribute_count,)
323
+ end
301
324
  # & is to stop steep complaints, block is always present here.
302
- block&.call(probe: probe, trace_point: tp, caller_locations: caller_locations)
325
+ block&.call(probe: probe,
326
+ locals: locals, instance_vars: instance_vars,
327
+ path: tp.path, caller_locations: caller_locations)
303
328
  end
304
329
  end
305
330
  rescue => exc
@@ -371,6 +396,31 @@ module Datadog
371
396
  end
372
397
  end
373
398
 
399
+ class << self
400
+ def get_instance_variables(object)
401
+ {}.tap do |hash|
402
+ object.instance_variables.each do |var|
403
+ hash[var] = object.instance_variable_get(var)
404
+ end
405
+ end
406
+ end
407
+
408
+ def get_local_variables(trace_point)
409
+ # binding appears to be constructed on access, therefore
410
+ # 1) we should attempt to cache it and
411
+ # 2) we should not call +binding+ until we actually need variable values.
412
+ binding = trace_point.binding
413
+
414
+ # steep hack - should never happen
415
+ return {} unless binding
416
+
417
+ binding.local_variables.each_with_object({}) do |name, map|
418
+ value = binding.local_variable_get(name)
419
+ map[name] = value
420
+ end
421
+ end
422
+ end
423
+
374
424
  private
375
425
 
376
426
  attr_reader :lock
@@ -39,39 +39,50 @@ module Datadog
39
39
  end
40
40
 
41
41
  # Duration is in seconds.
42
+ # path is the actual path of the instrumented file.
42
43
  def build_executed(probe,
43
- trace_point: nil, rv: nil, duration: nil, caller_locations: nil,
44
- args: nil, kwargs: nil, serialized_entry_args: nil)
45
- snapshot = if probe.line? && probe.capture_snapshot?
46
- if trace_point.nil?
47
- raise "Cannot create snapshot because there is no trace point"
48
- end
49
- get_local_variables(trace_point)
50
- end
51
- # TODO check how many stack frames we should be keeping/sending,
52
- # this should be all frames for enriched probes and no frames for
53
- # non-enriched probes?
54
- build_snapshot(probe, rv: rv, snapshot: snapshot,
44
+ path: nil, rv: nil, duration: nil, caller_locations: nil,
45
+ locals: nil, args: nil, kwargs: nil, instance_vars: nil,
46
+ serialized_entry_args: nil)
47
+ build_snapshot(probe, rv: rv, locals: locals,
55
48
  # Actual path of the instrumented file.
56
- path: trace_point&.path,
57
- duration: duration, caller_locations: caller_locations, args: args, kwargs: kwargs,
49
+ path: path,
50
+ duration: duration,
51
+ # TODO check how many stack frames we should be keeping/sending,
52
+ # this should be all frames for enriched probes and no frames for
53
+ # non-enriched probes?
54
+ caller_locations: caller_locations,
55
+ args: args, kwargs: kwargs, instance_vars: instance_vars,
58
56
  serialized_entry_args: serialized_entry_args)
59
57
  end
60
58
 
61
- def build_snapshot(probe, rv: nil, snapshot: nil, path: nil,
62
- duration: nil, caller_locations: nil, args: nil, kwargs: nil,
59
+ def build_snapshot(probe, rv: nil, locals: nil, path: nil,
60
+ duration: nil, caller_locations: nil,
61
+ args: nil, kwargs: nil, instance_vars: nil,
63
62
  serialized_entry_args: nil)
64
63
  # TODO also verify that non-capturing probe does not pass
65
64
  # snapshot or vars/args into this method
66
65
  captures = if probe.capture_snapshot?
67
66
  if probe.method?
67
+ return_arguments = {
68
+ "@return": serializer.serialize_value(rv,
69
+ depth: probe.max_capture_depth || settings.dynamic_instrumentation.max_capture_depth,
70
+ attribute_count: probe.max_capture_attribute_count || settings.dynamic_instrumentation.max_capture_attribute_count),
71
+ }
72
+ if instance_vars
73
+ return_arguments.update(
74
+ serializer.serialize_vars(instance_vars,
75
+ depth: probe.max_capture_depth || settings.dynamic_instrumentation.max_capture_depth,
76
+ attribute_count: probe.max_capture_attribute_count || settings.dynamic_instrumentation.max_capture_attribute_count,)
77
+ )
78
+ end
68
79
  {
69
80
  entry: {
70
81
  # standard:disable all
71
82
  arguments: if serialized_entry_args
72
83
  serialized_entry_args
73
84
  else
74
- (args || kwargs) && serializer.serialize_args(args, kwargs,
85
+ (args || kwargs) && serializer.serialize_args(args, kwargs, instance_vars,
75
86
  depth: probe.max_capture_depth || settings.dynamic_instrumentation.max_capture_depth,
76
87
  attribute_count: probe.max_capture_attribute_count || settings.dynamic_instrumentation.max_capture_attribute_count)
77
88
  end,
@@ -79,20 +90,14 @@ module Datadog
79
90
  # standard:enable all
80
91
  },
81
92
  return: {
82
- arguments: {
83
- "@return": serializer.serialize_value(rv,
84
- depth: probe.max_capture_depth || settings.dynamic_instrumentation.max_capture_depth,
85
- attribute_count: probe.max_capture_attribute_count || settings.dynamic_instrumentation.max_capture_attribute_count),
86
- },
93
+ arguments: return_arguments,
87
94
  throwable: nil,
88
95
  },
89
96
  }
90
97
  elsif probe.line?
91
98
  {
92
- lines: snapshot && {
93
- probe.line_no => {locals: serializer.serialize_vars(snapshot,
94
- depth: probe.max_capture_depth || settings.dynamic_instrumentation.max_capture_depth,
95
- attribute_count: probe.max_capture_attribute_count || settings.dynamic_instrumentation.max_capture_attribute_count,)},
99
+ lines: locals && {
100
+ probe.line_no => {locals: locals.merge(instance_vars || {})},
96
101
  },
97
102
  }
98
103
  end
@@ -194,21 +199,6 @@ module Datadog
194
199
  (Core::Utils::Time.now.to_f * 1000).to_i
195
200
  end
196
201
 
197
- def get_local_variables(trace_point)
198
- # binding appears to be constructed on access, therefore
199
- # 1) we should attempt to cache it and
200
- # 2) we should not call +binding+ until we actually need variable values.
201
- binding = trace_point.binding
202
-
203
- # steep hack - should never happen
204
- return {} unless binding
205
-
206
- binding.local_variables.each_with_object({}) do |name, map|
207
- value = binding.local_variable_get(name)
208
- map[name] = value
209
- end
210
- end
211
-
212
202
  def active_trace
213
203
  if defined?(Datadog::Tracing)
214
204
  Datadog::Tracing.active_trace
@@ -183,7 +183,15 @@ module Datadog
183
183
  end
184
184
 
185
185
  def do_send_snapshot(batch)
186
- snapshot_transport.send_input(batch)
186
+ snapshot_transport.send_input(batch, tags)
187
+ end
188
+
189
+ def tags
190
+ # DEV: The tags could be cached but they need to be recreated
191
+ # when process forks (since the child receives new runtime IDs).
192
+ Core::TagBuilder.tags(settings).merge(
193
+ 'debugger_version' => Core::Environment::Identity.gem_datadog_version,
194
+ )
187
195
  end
188
196
 
189
197
  [