ddtrace 1.4.2 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -3
  3. data/LICENSE-3rdparty.csv +1 -0
  4. data/ext/ddtrace_profiling_native_extension/native_extension_helpers.rb +0 -2
  5. data/lib/datadog/appsec/assets/waf_rules/recommended.json +1169 -275
  6. data/lib/datadog/appsec/assets/waf_rules/risky.json +78 -78
  7. data/lib/datadog/appsec/assets/waf_rules/strict.json +278 -88
  8. data/lib/datadog/appsec/configuration/settings.rb +0 -2
  9. data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +25 -20
  10. data/lib/datadog/appsec/contrib/rack/reactive/request.rb +11 -11
  11. data/lib/datadog/appsec/contrib/rack/reactive/request_body.rb +11 -11
  12. data/lib/datadog/appsec/contrib/rack/reactive/response.rb +11 -11
  13. data/lib/datadog/appsec/contrib/rack/request.rb +3 -0
  14. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +2 -1
  15. data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +7 -6
  16. data/lib/datadog/appsec/contrib/rails/integration.rb +1 -1
  17. data/lib/datadog/appsec/contrib/rails/reactive/action.rb +11 -11
  18. data/lib/datadog/appsec/contrib/rails/request.rb +3 -0
  19. data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +14 -12
  20. data/lib/datadog/appsec/contrib/sinatra/reactive/routed.rb +11 -11
  21. data/lib/datadog/appsec/event.rb +0 -8
  22. data/lib/datadog/appsec/instrumentation/gateway.rb +16 -2
  23. data/lib/datadog/appsec/processor.rb +18 -2
  24. data/lib/datadog/ci/ext/environment.rb +16 -4
  25. data/lib/datadog/core/configuration/agent_settings_resolver.rb +0 -3
  26. data/lib/datadog/core/configuration/components.rb +8 -2
  27. data/lib/datadog/core/configuration/settings.rb +71 -4
  28. data/lib/datadog/core/configuration.rb +1 -1
  29. data/lib/datadog/core/header_collection.rb +41 -0
  30. data/lib/datadog/core/telemetry/collector.rb +0 -2
  31. data/lib/datadog/core/workers/async.rb +0 -2
  32. data/lib/datadog/profiling/collectors/old_stack.rb +1 -1
  33. data/lib/datadog/profiling.rb +1 -1
  34. data/lib/datadog/tracing/client_ip.rb +153 -0
  35. data/lib/datadog/tracing/configuration/ext.rb +12 -0
  36. data/lib/datadog/tracing/contrib/aws/services.rb +0 -2
  37. data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +0 -2
  38. data/lib/datadog/tracing/contrib/ext.rb +19 -0
  39. data/lib/datadog/tracing/contrib/faraday/middleware.rb +1 -2
  40. data/lib/datadog/tracing/contrib/grape/endpoint.rb +0 -2
  41. data/lib/datadog/tracing/contrib/graphql/configuration/settings.rb +1 -1
  42. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +5 -4
  43. data/lib/datadog/tracing/contrib/rack/header_collection.rb +35 -0
  44. data/lib/datadog/tracing/contrib/rack/middlewares.rb +92 -38
  45. data/lib/datadog/tracing/contrib/utils/quantization/http.rb +92 -10
  46. data/lib/datadog/tracing/flush.rb +57 -35
  47. data/lib/datadog/tracing/metadata/ext.rb +3 -9
  48. data/lib/datadog/tracing/metadata/tagging.rb +9 -0
  49. data/lib/datadog/tracing/sampling/rate_limiter.rb +3 -0
  50. data/lib/datadog/tracing/sampling/rate_sampler.rb +10 -0
  51. data/lib/datadog/tracing/sampling/span/ext.rb +29 -0
  52. data/lib/datadog/tracing/sampling/span/matcher.rb +9 -0
  53. data/lib/datadog/tracing/sampling/span/rule.rb +82 -0
  54. data/lib/datadog/tracing/sampling/span/rule_parser.rb +104 -0
  55. data/lib/datadog/tracing/sampling/span/sampler.rb +64 -0
  56. data/lib/datadog/tracing/span_operation.rb +0 -2
  57. data/lib/datadog/tracing/trace_operation.rb +22 -3
  58. data/lib/datadog/tracing/trace_segment.rb +1 -2
  59. data/lib/datadog/tracing/tracer.rb +31 -5
  60. data/lib/ddtrace/transport/traces.rb +2 -0
  61. data/lib/ddtrace/version.rb +2 -2
  62. metadata +21 -7
@@ -3,12 +3,14 @@
3
3
  require 'date'
4
4
 
5
5
  require_relative '../../../core/environment/variable_helpers'
6
+ require_relative '../../client_ip'
6
7
  require_relative '../../metadata/ext'
7
8
  require_relative '../../propagation/http'
8
9
  require_relative '../analytics'
10
+ require_relative '../utils/quantization/http'
9
11
  require_relative 'ext'
12
+ require_relative 'header_collection'
10
13
  require_relative 'request_queue'
11
- require_relative '../utils/quantization/http'
12
14
 
13
15
  module Datadog
14
16
  module Tracing
@@ -121,20 +123,13 @@ module Datadog
121
123
  # rubocop:disable Metrics/PerceivedComplexity
122
124
  # rubocop:disable Metrics/MethodLength
123
125
  def set_request_tags!(trace, request_span, env, status, headers, response, original_env)
124
- # http://www.rubydoc.info/github/rack/rack/file/SPEC
125
- # The source of truth in Rack is the PATH_INFO key that holds the
126
- # URL for the current request; but some frameworks may override that
127
- # value, especially during exception handling.
128
- #
129
- # Because of this, we prefer to use REQUEST_URI, if available, which is the
130
- # relative path + query string, and doesn't mutate.
131
- #
132
- # REQUEST_URI is only available depending on what web server is running though.
133
- # So when its not available, we want the original, unmutated PATH_INFO, which
134
- # is just the relative path without query strings.
135
- url = env['REQUEST_URI'] || original_env['PATH_INFO']
136
- request_headers = parse_request_headers(env)
137
- response_headers = parse_response_headers(headers || {})
126
+ request_header_collection = Header::RequestHeaderCollection.new(env)
127
+ request_headers_tags = parse_request_headers(request_header_collection)
128
+ response_headers_tags = parse_response_headers(headers || {})
129
+
130
+ # request_headers is subject to filtering and configuration so we
131
+ # get the user agent separately
132
+ user_agent = parse_user_agent_header(request_header_collection)
138
133
 
139
134
  # The priority
140
135
  # 1. User overrides span.resource
@@ -169,8 +164,14 @@ module Datadog
169
164
  request_span.set_tag(Tracing::Metadata::Ext::HTTP::TAG_METHOD, env['REQUEST_METHOD'])
170
165
  end
171
166
 
167
+ url = parse_url(env, original_env)
168
+
172
169
  if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_URL).nil?
173
- options = configuration[:quantize]
170
+ options = configuration[:quantize] || {}
171
+
172
+ # Quantization::HTTP.url base defaults to :show, but we are transitioning
173
+ options[:base] ||= :exclude
174
+
174
175
  request_span.set_tag(
175
176
  Tracing::Metadata::Ext::HTTP::TAG_URL,
176
177
  Contrib::Utils::Quantization::HTTP.url(url, options)
@@ -178,29 +179,43 @@ module Datadog
178
179
  end
179
180
 
180
181
  if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_BASE_URL).nil?
181
- request_obj = ::Rack::Request.new(env)
182
+ options = configuration[:quantize]
182
183
 
183
- base_url = if request_obj.respond_to?(:base_url)
184
- request_obj.base_url
185
- else
186
- # Compatibility for older Rack versions
187
- request_obj.url.chomp(request_obj.fullpath)
188
- end
184
+ unless options[:base] == :show
185
+ base_url = Contrib::Utils::Quantization::HTTP.base_url(url)
189
186
 
190
- request_span.set_tag(Tracing::Metadata::Ext::HTTP::TAG_BASE_URL, base_url)
187
+ unless base_url.empty?
188
+ request_span.set_tag(
189
+ Tracing::Metadata::Ext::HTTP::TAG_BASE_URL,
190
+ base_url
191
+ )
192
+ end
193
+ end
194
+ end
195
+
196
+ if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_CLIENT_IP).nil?
197
+ Tracing::ClientIp.set_client_ip_tag(
198
+ request_span,
199
+ headers: request_header_collection,
200
+ remote_ip: env['REMOTE_ADDR']
201
+ )
191
202
  end
192
203
 
193
204
  if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_STATUS_CODE).nil? && status
194
205
  request_span.set_tag(Tracing::Metadata::Ext::HTTP::TAG_STATUS_CODE, status)
195
206
  end
196
207
 
208
+ if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_USER_AGENT).nil? && user_agent
209
+ request_span.set_tag(Tracing::Metadata::Ext::HTTP::TAG_USER_AGENT, user_agent)
210
+ end
211
+
197
212
  # Request headers
198
- request_headers.each do |name, value|
213
+ request_headers_tags.each do |name, value|
199
214
  request_span.set_tag(name, value) if request_span.get_tag(name).nil?
200
215
  end
201
216
 
202
217
  # Response headers
203
- response_headers.each do |name, value|
218
+ response_headers_tags.each do |name, value|
204
219
  request_span.set_tag(name, value) if request_span.get_tag(name).nil?
205
220
  end
206
221
 
@@ -219,14 +234,57 @@ module Datadog
219
234
  Datadog.configuration.tracing[:rack]
220
235
  end
221
236
 
222
- def parse_request_headers(env)
223
- {}.tap do |result|
224
- whitelist = configuration[:headers][:request] || []
225
- whitelist.each do |header|
226
- rack_header = header_to_rack_header(header)
227
- if env.key?(rack_header)
228
- result[Tracing::Metadata::Ext::HTTP::RequestHeaders.to_tag(header)] = env[rack_header]
229
- end
237
+ def parse_url(env, original_env)
238
+ request_obj = ::Rack::Request.new(env)
239
+
240
+ # scheme, host, and port
241
+ base_url = if request_obj.respond_to?(:base_url)
242
+ request_obj.base_url
243
+ else
244
+ # Compatibility for older Rack versions
245
+ request_obj.url.chomp(request_obj.fullpath)
246
+ end
247
+
248
+ # https://github.com/rack/rack/blob/main/SPEC.rdoc
249
+ #
250
+ # The source of truth in Rack is the PATH_INFO key that holds the
251
+ # URL for the current request; but some frameworks may override that
252
+ # value, especially during exception handling.
253
+ #
254
+ # Because of this, we prefer to use REQUEST_URI, if available, which is the
255
+ # relative path + query string, and doesn't mutate.
256
+ #
257
+ # REQUEST_URI is only available depending on what web server is running though.
258
+ # So when its not available, we want the original, unmutated PATH_INFO, which
259
+ # is just the relative path without query strings.
260
+ #
261
+ # SCRIPT_NAME is the first part of the request URL path, so that
262
+ # the application can know its virtual location. It should be
263
+ # prepended to PATH_INFO to reflect the correct user visible path.
264
+ request_uri = env['REQUEST_URI'].to_s
265
+ fullpath = if request_uri.empty?
266
+ query_string = original_env['QUERY_STRING'].to_s
267
+ path = original_env['SCRIPT_NAME'].to_s + original_env['PATH_INFO'].to_s
268
+
269
+ query_string.empty? ? path : "#{path}?#{query_string}"
270
+ else
271
+ request_uri
272
+ end
273
+
274
+ ::URI.join(base_url, fullpath).to_s
275
+ end
276
+
277
+ def parse_user_agent_header(headers)
278
+ headers.get(Tracing::Metadata::Ext::HTTP::HEADER_USER_AGENT)
279
+ end
280
+
281
+ def parse_request_headers(headers)
282
+ whitelist = configuration[:headers][:request] || []
283
+ whitelist.each_with_object({}) do |header, result|
284
+ header_value = headers.get(header)
285
+ unless header_value.nil?
286
+ header_tag = Tracing::Metadata::Ext::HTTP::RequestHeaders.to_tag(header)
287
+ result[header_tag] = header_value
230
288
  end
231
289
  end
232
290
  end
@@ -248,10 +306,6 @@ module Datadog
248
306
  end
249
307
  end
250
308
  end
251
-
252
- def header_to_rack_header(name)
253
- "HTTP_#{name.to_s.upcase.gsub(/[-\s]/, '_')}"
254
- end
255
309
  end
256
310
  end
257
311
  end
@@ -14,12 +14,28 @@ module Datadog
14
14
 
15
15
  PLACEHOLDER = '?'.freeze
16
16
 
17
+ # taken from Ruby https://github.com/ruby/uri/blob/ffbab83de6d8748c9454414e02db5317609166eb/lib/uri/rfc3986_parser.rb
18
+ # but adjusted to parse only <scheme>://<host>:<port>/ components
19
+ # and stop there, since we don't care about the path, query string,
20
+ # and fragment components
21
+ RFC3986_URL_BASE = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*))(?::(?<port>\d*))?)))(?:\/|\z)/.freeze # rubocop:disable Style/RegexpLiteral, Layout/LineLength
22
+
17
23
  module_function
18
24
 
19
25
  def url(url, options = {})
20
26
  url!(url, options)
21
27
  rescue StandardError
22
- options[:placeholder] || PLACEHOLDER
28
+ placeholder = options[:placeholder] || PLACEHOLDER
29
+
30
+ options[:base] == :exclude ? placeholder : "#{base_url(url)}/#{placeholder}"
31
+ end
32
+
33
+ def base_url(url, options = {})
34
+ if (m = RFC3986_URL_BASE.match(url))
35
+ m[1]
36
+ else
37
+ ''
38
+ end
23
39
  end
24
40
 
25
41
  def url!(url, options = {})
@@ -32,8 +48,14 @@ module Datadog
32
48
  uri.query = (!query.nil? && query.empty? ? nil : query)
33
49
  end
34
50
 
35
- # Remove any URI framents
51
+ # Remove any URI fragments
36
52
  uri.fragment = nil unless options[:fragment] == :show
53
+
54
+ if options[:base] == :exclude
55
+ uri.host = nil
56
+ uri.port = nil
57
+ uri.scheme = nil
58
+ end
37
59
  end.to_s
38
60
  end
39
61
 
@@ -45,22 +67,26 @@ module Datadog
45
67
 
46
68
  def query!(query, options = {})
47
69
  options ||= {}
48
- options[:show] = options[:show] || []
70
+ options[:obfuscate] = {} if options[:obfuscate] == :internal
71
+ options[:show] = options[:show] || (options[:obfuscate] ? :all : [])
49
72
  options[:exclude] = options[:exclude] || []
50
73
 
51
74
  # Short circuit if query string is meant to exclude everything
52
75
  # or if the query string is meant to include everything
53
76
  return '' if options[:exclude] == :all
54
- return query if options[:show] == :all
55
77
 
56
- collect_query(query, uniq: true) do |key, value|
57
- if options[:exclude].include?(key)
58
- [nil, nil]
59
- else
60
- value = options[:show].include?(key) ? value : nil
61
- [key, value]
78
+ unless options[:show] == :all && !(options[:obfuscate] && options[:exclude])
79
+ query = collect_query(query, uniq: true) do |key, value|
80
+ if options[:exclude].include?(key)
81
+ [nil, nil]
82
+ else
83
+ value = options[:show] == :all || options[:show].include?(key) ? value : nil
84
+ [key, value]
85
+ end
62
86
  end
63
87
  end
88
+
89
+ options[:obfuscate] ? obfuscate_query(query, options[:obfuscate]) : query
64
90
  end
65
91
 
66
92
  # Iterate over each key value pair, yielding to the block given.
@@ -91,6 +117,62 @@ module Datadog
91
117
  end
92
118
 
93
119
  private_class_method :collect_query
120
+
121
+ # Scans over the query string and obfuscates sensitive data by
122
+ # replacing matches with an opaque value
123
+ def obfuscate_query(query, options = {})
124
+ options[:regex] = nil if options[:regex] == :internal
125
+ re = options[:regex] || OBFUSCATOR_REGEX
126
+ with = options[:with] || OBFUSCATOR_WITH
127
+
128
+ query.gsub(re, with)
129
+ end
130
+
131
+ private_class_method :obfuscate_query
132
+
133
+ OBFUSCATOR_WITH = '<redacted>'.freeze
134
+
135
+ # rubocop:disable Layout/LineLength
136
+ OBFUSCATOR_REGEX = %r{
137
+ (?: # JSON-ish leading quote
138
+ (?:"|%22)?
139
+ )
140
+ (?: # common keys
141
+ (?:old_?|new_?)?p(?:ass)?w(?:or)?d(?:1|2)? # pw, password variants
142
+ |pass(?:_?phrase)? # pass, passphrase variants
143
+ |secret
144
+ |(?: # key, key_id variants
145
+ api_?
146
+ |private_?
147
+ |public_?
148
+ |access_?
149
+ |secret_?
150
+ )key(?:_?id)?
151
+ |token
152
+ |consumer_?(?:id|key|secret)
153
+ |sign(?:ed|ature)?
154
+ |auth(?:entication|orization)?
155
+ )
156
+ (?:
157
+ # '=' query string separator, plus value til next '&' separator
158
+ (?:\s|%20)*(?:=|%3D)[^&]+
159
+ # JSON-ish '": "somevalue"', key being handled with case above, without the opening '"'
160
+ |(?:"|%22) # closing '"' at end of key
161
+ (?:\s|%20)*(?::|%3A)(?:\s|%20)* # ':' key-value spearator, with surrounding spaces
162
+ (?:"|%22) # opening '"' at start of value
163
+ (?:%2[^2]|%[^2]|[^"%])+ # value
164
+ (?:"|%22) # closing '"' at end of value
165
+ )
166
+ |(?: # other common secret values
167
+ bearer(?:\s|%20)+[a-z0-9._\-]+
168
+ |token(?::|%3A)[a-z0-9]{13}
169
+ |gh[opsu]_[0-9a-zA-Z]{36}
170
+ |ey[I-L](?:[\w=-]|%3D)+\.ey[I-L](?:[\w=-]|%3D)+(?:\.(?:[\w.+/=-]|%3D|%2F|%2B)+)?
171
+ |-{5}BEGIN(?:[a-z\s]|%20)+PRIVATE(?:\s|%20)KEY-{5}[^\-]+-{5}END(?:[a-z\s]|%20)+PRIVATE(?:\s|%20)KEY(?:-{5})?(?:\n|%0A)?
172
+ |(?:ssh-(?:rsa|dss)|ecdsa-[a-z0-9]+-[a-z0-9]+)(?:\s|%20)*(?:[a-z0-9/.+]|%2F|%5C|%2B){100,}(?:=|%3D)*(?:(?:\s+)[a-z0-9._-]+)?
173
+ )
174
+ }ix.freeze
175
+ # rubocop:enable Layout/LineLength
94
176
  end
95
177
  end
96
178
  end
@@ -3,71 +3,93 @@
3
3
  module Datadog
4
4
  module Tracing
5
5
  module Flush
6
- # Consumes only completed traces (where all spans have finished)
7
- class Finished
8
- # Consumes and returns completed traces (where all spans have finished)
9
- # from the provided \trace_op, if any.
6
+ # Consumes and returns a {TraceSegment} to be flushed, from
7
+ # the provided {TraceSegment}.
8
+ #
9
+ # Only finished spans are consumed. Any spans consumed are
10
+ # removed from +trace_op+ as a side effect. Unfinished spans are
11
+ # unaffected.
12
+ #
13
+ # @abstract
14
+ class Base
15
+ # Consumes and returns a {TraceSegment} to be flushed, from
16
+ # the provided {TraceSegment}.
10
17
  #
11
- # Any traces consumed are removed from +trace_op+ as a side effect.
18
+ # Only finished spans are consumed. Any spans consumed are
19
+ # removed from +trace_op+ as a side effect. Unfinished spans are
20
+ # unaffected.
12
21
  #
22
+ # @param [TraceOperation] trace_op
13
23
  # @return [TraceSegment] trace to be flushed, or +nil+ if the trace is not finished
14
24
  def consume!(trace_op)
15
- return unless full_flush?(trace_op)
25
+ return unless flush?(trace_op)
16
26
 
17
27
  get_trace(trace_op)
18
28
  end
19
29
 
20
- def full_flush?(trace_op)
21
- trace_op && trace_op.sampled? && trace_op.finished?
30
+ # Should we consume spans from the +trace_op+?
31
+ # @abstract
32
+ def flush?(trace_op)
33
+ raise NotImplementedError
22
34
  end
23
35
 
24
36
  protected
25
37
 
38
+ # Consumes all finished spans from trace.
39
+ # @return [TraceSegment]
26
40
  def get_trace(trace_op)
27
- trace_op.flush!
41
+ trace_op.flush! do |spans|
42
+ spans.select! { |span| single_sampled?(span) } unless trace_op.sampled?
43
+
44
+ spans
45
+ end
46
+ end
47
+
48
+ # Single Span Sampling has chosen to keep this span
49
+ # regardless of the trace-level sampling decision
50
+ def single_sampled?(span)
51
+ span.get_metric(Sampling::Span::Ext::TAG_MECHANISM) == Sampling::Span::Ext::MECHANISM_SPAN_SAMPLING_RATE
52
+ end
53
+ end
54
+
55
+ # Consumes and returns completed traces (where all spans have finished),
56
+ # if any, from the provided +trace_op+.
57
+ #
58
+ # Spans consumed are removed from +trace_op+ as a side effect.
59
+ class Finished < Base
60
+ # Are all spans finished?
61
+ def flush?(trace_op)
62
+ trace_op && trace_op.finished?
28
63
  end
29
64
  end
30
65
 
31
- # Performs partial trace flushing to avoid large traces residing in memory for too long
32
- class Partial
66
+ # Consumes and returns completed or partially completed
67
+ # traces from the provided +trace_op+, if any.
68
+ #
69
+ # Partial trace flushing avoids large traces residing in memory for too long.
70
+ #
71
+ # Partially completed traces, where not all spans have finished,
72
+ # will only be returned if there are at least
73
+ # +@min_spans_for_partial+ finished spans.
74
+ #
75
+ # Spans consumed are removed from +trace_op+ as a side effect.
76
+ class Partial < Base
33
77
  # Start flushing partial trace after this many active spans in one trace
34
78
  DEFAULT_MIN_SPANS_FOR_PARTIAL_FLUSH = 500
35
79
 
36
80
  attr_reader :min_spans_for_partial
37
81
 
38
82
  def initialize(options = {})
83
+ super()
39
84
  @min_spans_for_partial = options.fetch(:min_spans_before_partial_flush, DEFAULT_MIN_SPANS_FOR_PARTIAL_FLUSH)
40
85
  end
41
86
 
42
- # Consumes and returns completed or partially completed
43
- # traces from the provided +trace_op+, if any.
44
- #
45
- # Partially completed traces, where not all spans have finished,
46
- # will only be returned if there are at least
47
- # +@min_spans_for_partial+ finished spans.
48
- #
49
- # Any spans consumed are removed from +trace_op+ as a side effect.
50
- #
51
- # @return [TraceSegment] partial or complete trace to be flushed, or +nil+ if no spans are finished
52
- def consume!(trace_op)
53
- return unless partial_flush?(trace_op)
54
-
55
- get_trace(trace_op)
56
- end
57
-
58
- def partial_flush?(trace_op)
59
- return false unless trace_op.sampled?
87
+ def flush?(trace_op)
60
88
  return true if trace_op.finished?
61
89
  return false if trace_op.finished_span_count < @min_spans_for_partial
62
90
 
63
91
  true
64
92
  end
65
-
66
- protected
67
-
68
- def get_trace(trace_op)
69
- trace_op.flush!
70
- end
71
93
  end
72
94
  end
73
95
  end
@@ -63,11 +63,14 @@ module Datadog
63
63
  TAG_BASE_URL = 'http.base_url'
64
64
  TAG_METHOD = 'http.method'
65
65
  TAG_STATUS_CODE = 'http.status_code'
66
+ TAG_USER_AGENT = 'http.useragent'
66
67
  TAG_URL = 'http.url'
67
68
  TYPE_INBOUND = AppTypes::TYPE_WEB.freeze
68
69
  TYPE_OUTBOUND = 'http'
69
70
  TYPE_PROXY = 'proxy'
70
71
  TYPE_TEMPLATE = 'template'
72
+ TAG_CLIENT_IP = 'http.client_ip'
73
+ HEADER_USER_AGENT = 'User-Agent'
71
74
 
72
75
  # General header functionality
73
76
  module Headers
@@ -153,15 +156,6 @@ module Datadog
153
156
  TAG_QUERY = 'sql.query'
154
157
  end
155
158
 
156
- # @public_api
157
- module DB
158
- TAG_INSTANCE = 'db.instance'
159
- TAG_USER = 'db.user'
160
- TAG_SYSTEM = 'db.system'
161
- TAG_STATEMENT = 'db.statement'
162
- TAG_ROW_COUNT = 'db.row_count'
163
- end
164
-
165
159
  # @public_api
166
160
  module SpanKind
167
161
  TAG_SERVER = 'server'
@@ -65,6 +65,15 @@ module Datadog
65
65
  tags.each { |k, v| set_tag(k, v) }
66
66
  end
67
67
 
68
+ # Returns true if the provided `tag` was set to a non-nil value.
69
+ # False otherwise.
70
+ #
71
+ # @param [String] tag the tag or metric to check for presence
72
+ # @return [Boolean] if the tag is present and not nil
73
+ def has_tag?(tag) # rubocop:disable Naming/PredicateName
74
+ !get_tag(tag).nil? # nil is considered not present, thus we can't use `Hash#has_key?`
75
+ end
76
+
68
77
  # This method removes a tag for the given key.
69
78
  def clear_tag(key)
70
79
  meta.delete(key)
@@ -39,6 +39,9 @@ module Datadog
39
39
  def initialize(rate, max_tokens = rate)
40
40
  super()
41
41
 
42
+ raise ArgumentError, "rate must be a number: #{rate}" unless rate.is_a?(Numeric)
43
+ raise ArgumentError, "max_tokens must be a number: #{max_tokens}" unless max_tokens.is_a?(Numeric)
44
+
42
45
  @rate = rate
43
46
  @max_tokens = max_tokens
44
47
 
@@ -20,6 +20,16 @@ module Datadog
20
20
  # * +sample_rate+: the sample rate as a {Float} between 0.0 and 1.0. 0.0
21
21
  # means that no trace will be sampled; 1.0 means that all traces will be
22
22
  # sampled.
23
+ #
24
+ # DEV-2.0: Allow for `sample_rate` zero (drop all) to be allowed. This eases
25
+ # DEV-2.0: usage for all internal users of the {RateSampler} class: both
26
+ # DEV-2.0: RuleSampler and Single Span Sampling leverage the RateSampler, but want
27
+ # DEV-2.0: `sample_rate` zero to mean "drop all". They work around this by hard-
28
+ # DEV-2.0: setting the `sample_rate` to zero like so:
29
+ # DEV-2.0: ```
30
+ # DEV-2.0: sampler = RateSampler.new
31
+ # DEV-2.0: sampler.sample_rate = sample_rate
32
+ # DEV-2.0: ```
23
33
  def initialize(sample_rate = 1.0)
24
34
  super()
25
35
 
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Datadog
4
+ module Tracing
5
+ module Sampling
6
+ module Span
7
+ # Single Span Sampling constants.
8
+ module Ext
9
+ # Accept all spans (100% retention).
10
+ DEFAULT_SAMPLE_RATE = 1.0
11
+ # Unlimited.
12
+ # @see Datadog::Tracing::Sampling::TokenBucket
13
+ DEFAULT_MAX_PER_SECOND = -1
14
+
15
+ # Sampling decision method used to come to the sampling decision for this span
16
+ TAG_MECHANISM = '_dd.span_sampling.mechanism'
17
+ # Sampling rate applied to this span, if a rule applies
18
+ TAG_RULE_RATE = '_dd.span_sampling.rule_rate'
19
+ # Rate limit configured for this span, if a rule applies
20
+ TAG_MAX_PER_SECOND = '_dd.span_sampling.max_per_second'
21
+
22
+ # This span was sampled on account of a Span Sampling Rule
23
+ # @see Datadog::Tracing::Sampling::Span::Rule
24
+ MECHANISM_SPAN_SAMPLING_RATE = 8
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -6,6 +6,8 @@ module Datadog
6
6
  module Span
7
7
  # Checks if a span conforms to a matching criteria.
8
8
  class Matcher
9
+ attr_reader :name, :service
10
+
9
11
  # Pattern that matches any string
10
12
  MATCH_ALL_PATTERN = '*'
11
13
 
@@ -54,6 +56,13 @@ module Datadog
54
56
  end
55
57
  end
56
58
 
59
+ def ==(other)
60
+ return super unless other.is_a?(Matcher)
61
+
62
+ name == other.name &&
63
+ service == other.service
64
+ end
65
+
57
66
  private
58
67
 
59
68
  # @param pattern [String]
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ext'
4
+
5
+ module Datadog
6
+ module Tracing
7
+ module Sampling
8
+ module Span
9
+ # Span sampling rule that applies a sampling rate if the span
10
+ # matches the provided {Matcher}.
11
+ # Additionally, a rate limiter is also applied.
12
+ #
13
+ # If a span does not conform to the matcher, no changes are made.
14
+ class Rule
15
+ attr_reader :matcher, :sample_rate, :rate_limit
16
+
17
+ # Creates a new span sampling rule.
18
+ #
19
+ # @param [Sampling::Span::Matcher] matcher whether this rule applies to a specific span
20
+ # @param [Float] sample_rate span sampling ratio, between 0.0 (0%) and 1.0 (100%).
21
+ # @param [Numeric] rate_limit maximum number of spans sampled per second. Negative numbers mean unlimited spans.
22
+ def initialize(
23
+ matcher,
24
+ sample_rate: Span::Ext::DEFAULT_SAMPLE_RATE,
25
+ rate_limit: Span::Ext::DEFAULT_MAX_PER_SECOND
26
+ )
27
+
28
+ @matcher = matcher
29
+ @sample_rate = sample_rate
30
+ @rate_limit = rate_limit
31
+
32
+ @sampler = Sampling::RateSampler.new
33
+ # Set the sample_rate outside of the initializer to allow for
34
+ # zero to be a "drop all".
35
+ # The RateSampler initializer enforces non-zero, falling back to 100% sampling
36
+ # if zero is provided.
37
+ @sampler.sample_rate = sample_rate
38
+ @rate_limiter = Sampling::TokenBucket.new(rate_limit)
39
+ end
40
+
41
+ # This method should only be invoked for spans that are part
42
+ # of a trace that has been dropped by trace-level sampling.
43
+ # Invoking it for other spans will cause incorrect sampling
44
+ # metrics to be reported by the Datadog App.
45
+ #
46
+ # Returns `true` if the provided span is sampled.
47
+ # If the span is dropped due to sampling rate or rate limiting,
48
+ # it returns `false`.
49
+ #
50
+ # Returns `nil` if the span did not meet the matching criteria by the
51
+ # provided matcher.
52
+ #
53
+ # This method modifies the `span` if it matches the provided matcher.
54
+ #
55
+ # @param [Datadog::Tracing::SpanOperation] span_op span to be sampled
56
+ # @return [:kept,:rejected] should this span be sampled?
57
+ # @return [:not_matched] span did not satisfy the matcher, no changes are made to the span
58
+ def sample!(span_op)
59
+ return :not_matched unless @matcher.match?(span_op)
60
+
61
+ if @sampler.sample?(span_op) && @rate_limiter.allow?(1)
62
+ span_op.set_metric(Span::Ext::TAG_MECHANISM, Span::Ext::MECHANISM_SPAN_SAMPLING_RATE)
63
+ span_op.set_metric(Span::Ext::TAG_RULE_RATE, @sample_rate)
64
+ span_op.set_metric(Span::Ext::TAG_MAX_PER_SECOND, @rate_limit)
65
+ :kept
66
+ else
67
+ :rejected
68
+ end
69
+ end
70
+
71
+ def ==(other)
72
+ return super unless other.is_a?(Rule)
73
+
74
+ matcher == other.matcher &&
75
+ sample_rate == other.sample_rate &&
76
+ rate_limit == other.rate_limit
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end