ddtrace 1.4.2 → 1.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -3
  3. data/LICENSE-3rdparty.csv +1 -0
  4. data/ext/ddtrace_profiling_native_extension/native_extension_helpers.rb +0 -2
  5. data/lib/datadog/appsec/assets/waf_rules/recommended.json +1169 -275
  6. data/lib/datadog/appsec/assets/waf_rules/risky.json +78 -78
  7. data/lib/datadog/appsec/assets/waf_rules/strict.json +278 -88
  8. data/lib/datadog/appsec/configuration/settings.rb +0 -2
  9. data/lib/datadog/appsec/contrib/rack/gateway/watcher.rb +25 -20
  10. data/lib/datadog/appsec/contrib/rack/reactive/request.rb +11 -11
  11. data/lib/datadog/appsec/contrib/rack/reactive/request_body.rb +11 -11
  12. data/lib/datadog/appsec/contrib/rack/reactive/response.rb +11 -11
  13. data/lib/datadog/appsec/contrib/rack/request.rb +3 -0
  14. data/lib/datadog/appsec/contrib/rack/request_middleware.rb +2 -1
  15. data/lib/datadog/appsec/contrib/rails/gateway/watcher.rb +7 -6
  16. data/lib/datadog/appsec/contrib/rails/integration.rb +1 -1
  17. data/lib/datadog/appsec/contrib/rails/reactive/action.rb +11 -11
  18. data/lib/datadog/appsec/contrib/rails/request.rb +3 -0
  19. data/lib/datadog/appsec/contrib/sinatra/gateway/watcher.rb +14 -12
  20. data/lib/datadog/appsec/contrib/sinatra/reactive/routed.rb +11 -11
  21. data/lib/datadog/appsec/event.rb +0 -8
  22. data/lib/datadog/appsec/instrumentation/gateway.rb +16 -2
  23. data/lib/datadog/appsec/processor.rb +18 -2
  24. data/lib/datadog/ci/ext/environment.rb +16 -4
  25. data/lib/datadog/core/configuration/agent_settings_resolver.rb +0 -3
  26. data/lib/datadog/core/configuration/components.rb +8 -2
  27. data/lib/datadog/core/configuration/settings.rb +71 -4
  28. data/lib/datadog/core/configuration.rb +1 -1
  29. data/lib/datadog/core/header_collection.rb +41 -0
  30. data/lib/datadog/core/telemetry/collector.rb +0 -2
  31. data/lib/datadog/core/workers/async.rb +0 -2
  32. data/lib/datadog/profiling/collectors/old_stack.rb +1 -1
  33. data/lib/datadog/profiling.rb +1 -1
  34. data/lib/datadog/tracing/client_ip.rb +153 -0
  35. data/lib/datadog/tracing/configuration/ext.rb +12 -0
  36. data/lib/datadog/tracing/contrib/aws/services.rb +0 -2
  37. data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +0 -2
  38. data/lib/datadog/tracing/contrib/ext.rb +19 -0
  39. data/lib/datadog/tracing/contrib/faraday/middleware.rb +1 -2
  40. data/lib/datadog/tracing/contrib/grape/endpoint.rb +0 -2
  41. data/lib/datadog/tracing/contrib/graphql/configuration/settings.rb +1 -1
  42. data/lib/datadog/tracing/contrib/pg/instrumentation.rb +5 -4
  43. data/lib/datadog/tracing/contrib/rack/header_collection.rb +35 -0
  44. data/lib/datadog/tracing/contrib/rack/middlewares.rb +92 -38
  45. data/lib/datadog/tracing/contrib/utils/quantization/http.rb +92 -10
  46. data/lib/datadog/tracing/flush.rb +57 -35
  47. data/lib/datadog/tracing/metadata/ext.rb +3 -9
  48. data/lib/datadog/tracing/metadata/tagging.rb +9 -0
  49. data/lib/datadog/tracing/sampling/rate_limiter.rb +3 -0
  50. data/lib/datadog/tracing/sampling/rate_sampler.rb +10 -0
  51. data/lib/datadog/tracing/sampling/span/ext.rb +29 -0
  52. data/lib/datadog/tracing/sampling/span/matcher.rb +9 -0
  53. data/lib/datadog/tracing/sampling/span/rule.rb +82 -0
  54. data/lib/datadog/tracing/sampling/span/rule_parser.rb +104 -0
  55. data/lib/datadog/tracing/sampling/span/sampler.rb +64 -0
  56. data/lib/datadog/tracing/span_operation.rb +0 -2
  57. data/lib/datadog/tracing/trace_operation.rb +22 -3
  58. data/lib/datadog/tracing/trace_segment.rb +1 -2
  59. data/lib/datadog/tracing/tracer.rb +31 -5
  60. data/lib/ddtrace/transport/traces.rb +2 -0
  61. data/lib/ddtrace/version.rb +2 -2
  62. metadata +21 -7
@@ -3,12 +3,14 @@
3
3
  require 'date'
4
4
 
5
5
  require_relative '../../../core/environment/variable_helpers'
6
+ require_relative '../../client_ip'
6
7
  require_relative '../../metadata/ext'
7
8
  require_relative '../../propagation/http'
8
9
  require_relative '../analytics'
10
+ require_relative '../utils/quantization/http'
9
11
  require_relative 'ext'
12
+ require_relative 'header_collection'
10
13
  require_relative 'request_queue'
11
- require_relative '../utils/quantization/http'
12
14
 
13
15
  module Datadog
14
16
  module Tracing
@@ -121,20 +123,13 @@ module Datadog
121
123
  # rubocop:disable Metrics/PerceivedComplexity
122
124
  # rubocop:disable Metrics/MethodLength
123
125
  def set_request_tags!(trace, request_span, env, status, headers, response, original_env)
124
- # http://www.rubydoc.info/github/rack/rack/file/SPEC
125
- # The source of truth in Rack is the PATH_INFO key that holds the
126
- # URL for the current request; but some frameworks may override that
127
- # value, especially during exception handling.
128
- #
129
- # Because of this, we prefer to use REQUEST_URI, if available, which is the
130
- # relative path + query string, and doesn't mutate.
131
- #
132
- # REQUEST_URI is only available depending on what web server is running though.
133
- # So when its not available, we want the original, unmutated PATH_INFO, which
134
- # is just the relative path without query strings.
135
- url = env['REQUEST_URI'] || original_env['PATH_INFO']
136
- request_headers = parse_request_headers(env)
137
- response_headers = parse_response_headers(headers || {})
126
+ request_header_collection = Header::RequestHeaderCollection.new(env)
127
+ request_headers_tags = parse_request_headers(request_header_collection)
128
+ response_headers_tags = parse_response_headers(headers || {})
129
+
130
+ # request_headers is subject to filtering and configuration so we
131
+ # get the user agent separately
132
+ user_agent = parse_user_agent_header(request_header_collection)
138
133
 
139
134
  # The priority
140
135
  # 1. User overrides span.resource
@@ -169,8 +164,14 @@ module Datadog
169
164
  request_span.set_tag(Tracing::Metadata::Ext::HTTP::TAG_METHOD, env['REQUEST_METHOD'])
170
165
  end
171
166
 
167
+ url = parse_url(env, original_env)
168
+
172
169
  if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_URL).nil?
173
- options = configuration[:quantize]
170
+ options = configuration[:quantize] || {}
171
+
172
+ # Quantization::HTTP.url base defaults to :show, but we are transitioning
173
+ options[:base] ||= :exclude
174
+
174
175
  request_span.set_tag(
175
176
  Tracing::Metadata::Ext::HTTP::TAG_URL,
176
177
  Contrib::Utils::Quantization::HTTP.url(url, options)
@@ -178,29 +179,43 @@ module Datadog
178
179
  end
179
180
 
180
181
  if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_BASE_URL).nil?
181
- request_obj = ::Rack::Request.new(env)
182
+ options = configuration[:quantize]
182
183
 
183
- base_url = if request_obj.respond_to?(:base_url)
184
- request_obj.base_url
185
- else
186
- # Compatibility for older Rack versions
187
- request_obj.url.chomp(request_obj.fullpath)
188
- end
184
+ unless options[:base] == :show
185
+ base_url = Contrib::Utils::Quantization::HTTP.base_url(url)
189
186
 
190
- request_span.set_tag(Tracing::Metadata::Ext::HTTP::TAG_BASE_URL, base_url)
187
+ unless base_url.empty?
188
+ request_span.set_tag(
189
+ Tracing::Metadata::Ext::HTTP::TAG_BASE_URL,
190
+ base_url
191
+ )
192
+ end
193
+ end
194
+ end
195
+
196
+ if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_CLIENT_IP).nil?
197
+ Tracing::ClientIp.set_client_ip_tag(
198
+ request_span,
199
+ headers: request_header_collection,
200
+ remote_ip: env['REMOTE_ADDR']
201
+ )
191
202
  end
192
203
 
193
204
  if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_STATUS_CODE).nil? && status
194
205
  request_span.set_tag(Tracing::Metadata::Ext::HTTP::TAG_STATUS_CODE, status)
195
206
  end
196
207
 
208
+ if request_span.get_tag(Tracing::Metadata::Ext::HTTP::TAG_USER_AGENT).nil? && user_agent
209
+ request_span.set_tag(Tracing::Metadata::Ext::HTTP::TAG_USER_AGENT, user_agent)
210
+ end
211
+
197
212
  # Request headers
198
- request_headers.each do |name, value|
213
+ request_headers_tags.each do |name, value|
199
214
  request_span.set_tag(name, value) if request_span.get_tag(name).nil?
200
215
  end
201
216
 
202
217
  # Response headers
203
- response_headers.each do |name, value|
218
+ response_headers_tags.each do |name, value|
204
219
  request_span.set_tag(name, value) if request_span.get_tag(name).nil?
205
220
  end
206
221
 
@@ -219,14 +234,57 @@ module Datadog
219
234
  Datadog.configuration.tracing[:rack]
220
235
  end
221
236
 
222
- def parse_request_headers(env)
223
- {}.tap do |result|
224
- whitelist = configuration[:headers][:request] || []
225
- whitelist.each do |header|
226
- rack_header = header_to_rack_header(header)
227
- if env.key?(rack_header)
228
- result[Tracing::Metadata::Ext::HTTP::RequestHeaders.to_tag(header)] = env[rack_header]
229
- end
237
+ def parse_url(env, original_env)
238
+ request_obj = ::Rack::Request.new(env)
239
+
240
+ # scheme, host, and port
241
+ base_url = if request_obj.respond_to?(:base_url)
242
+ request_obj.base_url
243
+ else
244
+ # Compatibility for older Rack versions
245
+ request_obj.url.chomp(request_obj.fullpath)
246
+ end
247
+
248
+ # https://github.com/rack/rack/blob/main/SPEC.rdoc
249
+ #
250
+ # The source of truth in Rack is the PATH_INFO key that holds the
251
+ # URL for the current request; but some frameworks may override that
252
+ # value, especially during exception handling.
253
+ #
254
+ # Because of this, we prefer to use REQUEST_URI, if available, which is the
255
+ # relative path + query string, and doesn't mutate.
256
+ #
257
+ # REQUEST_URI is only available depending on what web server is running though.
258
+ # So when its not available, we want the original, unmutated PATH_INFO, which
259
+ # is just the relative path without query strings.
260
+ #
261
+ # SCRIPT_NAME is the first part of the request URL path, so that
262
+ # the application can know its virtual location. It should be
263
+ # prepended to PATH_INFO to reflect the correct user visible path.
264
+ request_uri = env['REQUEST_URI'].to_s
265
+ fullpath = if request_uri.empty?
266
+ query_string = original_env['QUERY_STRING'].to_s
267
+ path = original_env['SCRIPT_NAME'].to_s + original_env['PATH_INFO'].to_s
268
+
269
+ query_string.empty? ? path : "#{path}?#{query_string}"
270
+ else
271
+ request_uri
272
+ end
273
+
274
+ ::URI.join(base_url, fullpath).to_s
275
+ end
276
+
277
+ def parse_user_agent_header(headers)
278
+ headers.get(Tracing::Metadata::Ext::HTTP::HEADER_USER_AGENT)
279
+ end
280
+
281
+ def parse_request_headers(headers)
282
+ whitelist = configuration[:headers][:request] || []
283
+ whitelist.each_with_object({}) do |header, result|
284
+ header_value = headers.get(header)
285
+ unless header_value.nil?
286
+ header_tag = Tracing::Metadata::Ext::HTTP::RequestHeaders.to_tag(header)
287
+ result[header_tag] = header_value
230
288
  end
231
289
  end
232
290
  end
@@ -248,10 +306,6 @@ module Datadog
248
306
  end
249
307
  end
250
308
  end
251
-
252
- def header_to_rack_header(name)
253
- "HTTP_#{name.to_s.upcase.gsub(/[-\s]/, '_')}"
254
- end
255
309
  end
256
310
  end
257
311
  end
@@ -14,12 +14,28 @@ module Datadog
14
14
 
15
15
  PLACEHOLDER = '?'.freeze
16
16
 
17
+ # taken from Ruby https://github.com/ruby/uri/blob/ffbab83de6d8748c9454414e02db5317609166eb/lib/uri/rfc3986_parser.rb
18
+ # but adjusted to parse only <scheme>://<host>:<port>/ components
19
+ # and stop there, since we don't care about the path, query string,
20
+ # and fragment components
21
+ RFC3986_URL_BASE = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*))(?::(?<port>\d*))?)))(?:\/|\z)/.freeze # rubocop:disable Style/RegexpLiteral, Layout/LineLength
22
+
17
23
  module_function
18
24
 
19
25
  def url(url, options = {})
20
26
  url!(url, options)
21
27
  rescue StandardError
22
- options[:placeholder] || PLACEHOLDER
28
+ placeholder = options[:placeholder] || PLACEHOLDER
29
+
30
+ options[:base] == :exclude ? placeholder : "#{base_url(url)}/#{placeholder}"
31
+ end
32
+
33
+ def base_url(url, options = {})
34
+ if (m = RFC3986_URL_BASE.match(url))
35
+ m[1]
36
+ else
37
+ ''
38
+ end
23
39
  end
24
40
 
25
41
  def url!(url, options = {})
@@ -32,8 +48,14 @@ module Datadog
32
48
  uri.query = (!query.nil? && query.empty? ? nil : query)
33
49
  end
34
50
 
35
- # Remove any URI framents
51
+ # Remove any URI fragments
36
52
  uri.fragment = nil unless options[:fragment] == :show
53
+
54
+ if options[:base] == :exclude
55
+ uri.host = nil
56
+ uri.port = nil
57
+ uri.scheme = nil
58
+ end
37
59
  end.to_s
38
60
  end
39
61
 
@@ -45,22 +67,26 @@ module Datadog
45
67
 
46
68
  def query!(query, options = {})
47
69
  options ||= {}
48
- options[:show] = options[:show] || []
70
+ options[:obfuscate] = {} if options[:obfuscate] == :internal
71
+ options[:show] = options[:show] || (options[:obfuscate] ? :all : [])
49
72
  options[:exclude] = options[:exclude] || []
50
73
 
51
74
  # Short circuit if query string is meant to exclude everything
52
75
  # or if the query string is meant to include everything
53
76
  return '' if options[:exclude] == :all
54
- return query if options[:show] == :all
55
77
 
56
- collect_query(query, uniq: true) do |key, value|
57
- if options[:exclude].include?(key)
58
- [nil, nil]
59
- else
60
- value = options[:show].include?(key) ? value : nil
61
- [key, value]
78
+ unless options[:show] == :all && !(options[:obfuscate] && options[:exclude])
79
+ query = collect_query(query, uniq: true) do |key, value|
80
+ if options[:exclude].include?(key)
81
+ [nil, nil]
82
+ else
83
+ value = options[:show] == :all || options[:show].include?(key) ? value : nil
84
+ [key, value]
85
+ end
62
86
  end
63
87
  end
88
+
89
+ options[:obfuscate] ? obfuscate_query(query, options[:obfuscate]) : query
64
90
  end
65
91
 
66
92
  # Iterate over each key value pair, yielding to the block given.
@@ -91,6 +117,62 @@ module Datadog
91
117
  end
92
118
 
93
119
  private_class_method :collect_query
120
+
121
+ # Scans over the query string and obfuscates sensitive data by
122
+ # replacing matches with an opaque value
123
+ def obfuscate_query(query, options = {})
124
+ options[:regex] = nil if options[:regex] == :internal
125
+ re = options[:regex] || OBFUSCATOR_REGEX
126
+ with = options[:with] || OBFUSCATOR_WITH
127
+
128
+ query.gsub(re, with)
129
+ end
130
+
131
+ private_class_method :obfuscate_query
132
+
133
+ OBFUSCATOR_WITH = '<redacted>'.freeze
134
+
135
+ # rubocop:disable Layout/LineLength
136
+ OBFUSCATOR_REGEX = %r{
137
+ (?: # JSON-ish leading quote
138
+ (?:"|%22)?
139
+ )
140
+ (?: # common keys
141
+ (?:old_?|new_?)?p(?:ass)?w(?:or)?d(?:1|2)? # pw, password variants
142
+ |pass(?:_?phrase)? # pass, passphrase variants
143
+ |secret
144
+ |(?: # key, key_id variants
145
+ api_?
146
+ |private_?
147
+ |public_?
148
+ |access_?
149
+ |secret_?
150
+ )key(?:_?id)?
151
+ |token
152
+ |consumer_?(?:id|key|secret)
153
+ |sign(?:ed|ature)?
154
+ |auth(?:entication|orization)?
155
+ )
156
+ (?:
157
+ # '=' query string separator, plus value til next '&' separator
158
+ (?:\s|%20)*(?:=|%3D)[^&]+
159
+ # JSON-ish '": "somevalue"', key being handled with case above, without the opening '"'
160
+ |(?:"|%22) # closing '"' at end of key
161
+ (?:\s|%20)*(?::|%3A)(?:\s|%20)* # ':' key-value spearator, with surrounding spaces
162
+ (?:"|%22) # opening '"' at start of value
163
+ (?:%2[^2]|%[^2]|[^"%])+ # value
164
+ (?:"|%22) # closing '"' at end of value
165
+ )
166
+ |(?: # other common secret values
167
+ bearer(?:\s|%20)+[a-z0-9._\-]+
168
+ |token(?::|%3A)[a-z0-9]{13}
169
+ |gh[opsu]_[0-9a-zA-Z]{36}
170
+ |ey[I-L](?:[\w=-]|%3D)+\.ey[I-L](?:[\w=-]|%3D)+(?:\.(?:[\w.+/=-]|%3D|%2F|%2B)+)?
171
+ |-{5}BEGIN(?:[a-z\s]|%20)+PRIVATE(?:\s|%20)KEY-{5}[^\-]+-{5}END(?:[a-z\s]|%20)+PRIVATE(?:\s|%20)KEY(?:-{5})?(?:\n|%0A)?
172
+ |(?:ssh-(?:rsa|dss)|ecdsa-[a-z0-9]+-[a-z0-9]+)(?:\s|%20)*(?:[a-z0-9/.+]|%2F|%5C|%2B){100,}(?:=|%3D)*(?:(?:\s+)[a-z0-9._-]+)?
173
+ )
174
+ }ix.freeze
175
+ # rubocop:enable Layout/LineLength
94
176
  end
95
177
  end
96
178
  end
@@ -3,71 +3,93 @@
3
3
  module Datadog
4
4
  module Tracing
5
5
  module Flush
6
- # Consumes only completed traces (where all spans have finished)
7
- class Finished
8
- # Consumes and returns completed traces (where all spans have finished)
9
- # from the provided \trace_op, if any.
6
+ # Consumes and returns a {TraceSegment} to be flushed, from
7
+ # the provided {TraceSegment}.
8
+ #
9
+ # Only finished spans are consumed. Any spans consumed are
10
+ # removed from +trace_op+ as a side effect. Unfinished spans are
11
+ # unaffected.
12
+ #
13
+ # @abstract
14
+ class Base
15
+ # Consumes and returns a {TraceSegment} to be flushed, from
16
+ # the provided {TraceSegment}.
10
17
  #
11
- # Any traces consumed are removed from +trace_op+ as a side effect.
18
+ # Only finished spans are consumed. Any spans consumed are
19
+ # removed from +trace_op+ as a side effect. Unfinished spans are
20
+ # unaffected.
12
21
  #
22
+ # @param [TraceOperation] trace_op
13
23
  # @return [TraceSegment] trace to be flushed, or +nil+ if the trace is not finished
14
24
  def consume!(trace_op)
15
- return unless full_flush?(trace_op)
25
+ return unless flush?(trace_op)
16
26
 
17
27
  get_trace(trace_op)
18
28
  end
19
29
 
20
- def full_flush?(trace_op)
21
- trace_op && trace_op.sampled? && trace_op.finished?
30
+ # Should we consume spans from the +trace_op+?
31
+ # @abstract
32
+ def flush?(trace_op)
33
+ raise NotImplementedError
22
34
  end
23
35
 
24
36
  protected
25
37
 
38
+ # Consumes all finished spans from trace.
39
+ # @return [TraceSegment]
26
40
  def get_trace(trace_op)
27
- trace_op.flush!
41
+ trace_op.flush! do |spans|
42
+ spans.select! { |span| single_sampled?(span) } unless trace_op.sampled?
43
+
44
+ spans
45
+ end
46
+ end
47
+
48
+ # Single Span Sampling has chosen to keep this span
49
+ # regardless of the trace-level sampling decision
50
+ def single_sampled?(span)
51
+ span.get_metric(Sampling::Span::Ext::TAG_MECHANISM) == Sampling::Span::Ext::MECHANISM_SPAN_SAMPLING_RATE
52
+ end
53
+ end
54
+
55
+ # Consumes and returns completed traces (where all spans have finished),
56
+ # if any, from the provided +trace_op+.
57
+ #
58
+ # Spans consumed are removed from +trace_op+ as a side effect.
59
+ class Finished < Base
60
+ # Are all spans finished?
61
+ def flush?(trace_op)
62
+ trace_op && trace_op.finished?
28
63
  end
29
64
  end
30
65
 
31
- # Performs partial trace flushing to avoid large traces residing in memory for too long
32
- class Partial
66
+ # Consumes and returns completed or partially completed
67
+ # traces from the provided +trace_op+, if any.
68
+ #
69
+ # Partial trace flushing avoids large traces residing in memory for too long.
70
+ #
71
+ # Partially completed traces, where not all spans have finished,
72
+ # will only be returned if there are at least
73
+ # +@min_spans_for_partial+ finished spans.
74
+ #
75
+ # Spans consumed are removed from +trace_op+ as a side effect.
76
+ class Partial < Base
33
77
  # Start flushing partial trace after this many active spans in one trace
34
78
  DEFAULT_MIN_SPANS_FOR_PARTIAL_FLUSH = 500
35
79
 
36
80
  attr_reader :min_spans_for_partial
37
81
 
38
82
  def initialize(options = {})
83
+ super()
39
84
  @min_spans_for_partial = options.fetch(:min_spans_before_partial_flush, DEFAULT_MIN_SPANS_FOR_PARTIAL_FLUSH)
40
85
  end
41
86
 
42
- # Consumes and returns completed or partially completed
43
- # traces from the provided +trace_op+, if any.
44
- #
45
- # Partially completed traces, where not all spans have finished,
46
- # will only be returned if there are at least
47
- # +@min_spans_for_partial+ finished spans.
48
- #
49
- # Any spans consumed are removed from +trace_op+ as a side effect.
50
- #
51
- # @return [TraceSegment] partial or complete trace to be flushed, or +nil+ if no spans are finished
52
- def consume!(trace_op)
53
- return unless partial_flush?(trace_op)
54
-
55
- get_trace(trace_op)
56
- end
57
-
58
- def partial_flush?(trace_op)
59
- return false unless trace_op.sampled?
87
+ def flush?(trace_op)
60
88
  return true if trace_op.finished?
61
89
  return false if trace_op.finished_span_count < @min_spans_for_partial
62
90
 
63
91
  true
64
92
  end
65
-
66
- protected
67
-
68
- def get_trace(trace_op)
69
- trace_op.flush!
70
- end
71
93
  end
72
94
  end
73
95
  end
@@ -63,11 +63,14 @@ module Datadog
63
63
  TAG_BASE_URL = 'http.base_url'
64
64
  TAG_METHOD = 'http.method'
65
65
  TAG_STATUS_CODE = 'http.status_code'
66
+ TAG_USER_AGENT = 'http.useragent'
66
67
  TAG_URL = 'http.url'
67
68
  TYPE_INBOUND = AppTypes::TYPE_WEB.freeze
68
69
  TYPE_OUTBOUND = 'http'
69
70
  TYPE_PROXY = 'proxy'
70
71
  TYPE_TEMPLATE = 'template'
72
+ TAG_CLIENT_IP = 'http.client_ip'
73
+ HEADER_USER_AGENT = 'User-Agent'
71
74
 
72
75
  # General header functionality
73
76
  module Headers
@@ -153,15 +156,6 @@ module Datadog
153
156
  TAG_QUERY = 'sql.query'
154
157
  end
155
158
 
156
- # @public_api
157
- module DB
158
- TAG_INSTANCE = 'db.instance'
159
- TAG_USER = 'db.user'
160
- TAG_SYSTEM = 'db.system'
161
- TAG_STATEMENT = 'db.statement'
162
- TAG_ROW_COUNT = 'db.row_count'
163
- end
164
-
165
159
  # @public_api
166
160
  module SpanKind
167
161
  TAG_SERVER = 'server'
@@ -65,6 +65,15 @@ module Datadog
65
65
  tags.each { |k, v| set_tag(k, v) }
66
66
  end
67
67
 
68
+ # Returns true if the provided `tag` was set to a non-nil value.
69
+ # False otherwise.
70
+ #
71
+ # @param [String] tag the tag or metric to check for presence
72
+ # @return [Boolean] if the tag is present and not nil
73
+ def has_tag?(tag) # rubocop:disable Naming/PredicateName
74
+ !get_tag(tag).nil? # nil is considered not present, thus we can't use `Hash#has_key?`
75
+ end
76
+
68
77
  # This method removes a tag for the given key.
69
78
  def clear_tag(key)
70
79
  meta.delete(key)
@@ -39,6 +39,9 @@ module Datadog
39
39
  def initialize(rate, max_tokens = rate)
40
40
  super()
41
41
 
42
+ raise ArgumentError, "rate must be a number: #{rate}" unless rate.is_a?(Numeric)
43
+ raise ArgumentError, "max_tokens must be a number: #{max_tokens}" unless max_tokens.is_a?(Numeric)
44
+
42
45
  @rate = rate
43
46
  @max_tokens = max_tokens
44
47
 
@@ -20,6 +20,16 @@ module Datadog
20
20
  # * +sample_rate+: the sample rate as a {Float} between 0.0 and 1.0. 0.0
21
21
  # means that no trace will be sampled; 1.0 means that all traces will be
22
22
  # sampled.
23
+ #
24
+ # DEV-2.0: Allow for `sample_rate` zero (drop all) to be allowed. This eases
25
+ # DEV-2.0: usage for all internal users of the {RateSampler} class: both
26
+ # DEV-2.0: RuleSampler and Single Span Sampling leverage the RateSampler, but want
27
+ # DEV-2.0: `sample_rate` zero to mean "drop all". They work around this by hard-
28
+ # DEV-2.0: setting the `sample_rate` to zero like so:
29
+ # DEV-2.0: ```
30
+ # DEV-2.0: sampler = RateSampler.new
31
+ # DEV-2.0: sampler.sample_rate = sample_rate
32
+ # DEV-2.0: ```
23
33
  def initialize(sample_rate = 1.0)
24
34
  super()
25
35
 
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Datadog
4
+ module Tracing
5
+ module Sampling
6
+ module Span
7
+ # Single Span Sampling constants.
8
+ module Ext
9
+ # Accept all spans (100% retention).
10
+ DEFAULT_SAMPLE_RATE = 1.0
11
+ # Unlimited.
12
+ # @see Datadog::Tracing::Sampling::TokenBucket
13
+ DEFAULT_MAX_PER_SECOND = -1
14
+
15
+ # Sampling decision method used to come to the sampling decision for this span
16
+ TAG_MECHANISM = '_dd.span_sampling.mechanism'
17
+ # Sampling rate applied to this span, if a rule applies
18
+ TAG_RULE_RATE = '_dd.span_sampling.rule_rate'
19
+ # Rate limit configured for this span, if a rule applies
20
+ TAG_MAX_PER_SECOND = '_dd.span_sampling.max_per_second'
21
+
22
+ # This span was sampled on account of a Span Sampling Rule
23
+ # @see Datadog::Tracing::Sampling::Span::Rule
24
+ MECHANISM_SPAN_SAMPLING_RATE = 8
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -6,6 +6,8 @@ module Datadog
6
6
  module Span
7
7
  # Checks if a span conforms to a matching criteria.
8
8
  class Matcher
9
+ attr_reader :name, :service
10
+
9
11
  # Pattern that matches any string
10
12
  MATCH_ALL_PATTERN = '*'
11
13
 
@@ -54,6 +56,13 @@ module Datadog
54
56
  end
55
57
  end
56
58
 
59
+ def ==(other)
60
+ return super unless other.is_a?(Matcher)
61
+
62
+ name == other.name &&
63
+ service == other.service
64
+ end
65
+
57
66
  private
58
67
 
59
68
  # @param pattern [String]
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'ext'
4
+
5
+ module Datadog
6
+ module Tracing
7
+ module Sampling
8
+ module Span
9
+ # Span sampling rule that applies a sampling rate if the span
10
+ # matches the provided {Matcher}.
11
+ # Additionally, a rate limiter is also applied.
12
+ #
13
+ # If a span does not conform to the matcher, no changes are made.
14
+ class Rule
15
+ attr_reader :matcher, :sample_rate, :rate_limit
16
+
17
+ # Creates a new span sampling rule.
18
+ #
19
+ # @param [Sampling::Span::Matcher] matcher whether this rule applies to a specific span
20
+ # @param [Float] sample_rate span sampling ratio, between 0.0 (0%) and 1.0 (100%).
21
+ # @param [Numeric] rate_limit maximum number of spans sampled per second. Negative numbers mean unlimited spans.
22
+ def initialize(
23
+ matcher,
24
+ sample_rate: Span::Ext::DEFAULT_SAMPLE_RATE,
25
+ rate_limit: Span::Ext::DEFAULT_MAX_PER_SECOND
26
+ )
27
+
28
+ @matcher = matcher
29
+ @sample_rate = sample_rate
30
+ @rate_limit = rate_limit
31
+
32
+ @sampler = Sampling::RateSampler.new
33
+ # Set the sample_rate outside of the initializer to allow for
34
+ # zero to be a "drop all".
35
+ # The RateSampler initializer enforces non-zero, falling back to 100% sampling
36
+ # if zero is provided.
37
+ @sampler.sample_rate = sample_rate
38
+ @rate_limiter = Sampling::TokenBucket.new(rate_limit)
39
+ end
40
+
41
+ # This method should only be invoked for spans that are part
42
+ # of a trace that has been dropped by trace-level sampling.
43
+ # Invoking it for other spans will cause incorrect sampling
44
+ # metrics to be reported by the Datadog App.
45
+ #
46
+ # Returns `true` if the provided span is sampled.
47
+ # If the span is dropped due to sampling rate or rate limiting,
48
+ # it returns `false`.
49
+ #
50
+ # Returns `nil` if the span did not meet the matching criteria by the
51
+ # provided matcher.
52
+ #
53
+ # This method modifies the `span` if it matches the provided matcher.
54
+ #
55
+ # @param [Datadog::Tracing::SpanOperation] span_op span to be sampled
56
+ # @return [:kept,:rejected] should this span be sampled?
57
+ # @return [:not_matched] span did not satisfy the matcher, no changes are made to the span
58
+ def sample!(span_op)
59
+ return :not_matched unless @matcher.match?(span_op)
60
+
61
+ if @sampler.sample?(span_op) && @rate_limiter.allow?(1)
62
+ span_op.set_metric(Span::Ext::TAG_MECHANISM, Span::Ext::MECHANISM_SPAN_SAMPLING_RATE)
63
+ span_op.set_metric(Span::Ext::TAG_RULE_RATE, @sample_rate)
64
+ span_op.set_metric(Span::Ext::TAG_MAX_PER_SECOND, @rate_limit)
65
+ :kept
66
+ else
67
+ :rejected
68
+ end
69
+ end
70
+
71
+ def ==(other)
72
+ return super unless other.is_a?(Rule)
73
+
74
+ matcher == other.matcher &&
75
+ sample_rate == other.sample_rate &&
76
+ rate_limit == other.rate_limit
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end