ddtrace 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.env +11 -0
- data/.gitignore +56 -0
- data/.rubocop.yml +43 -0
- data/Appraisals +65 -0
- data/Gemfile +3 -0
- data/LICENSE +24 -0
- data/README.md +119 -0
- data/Rakefile +103 -0
- data/circle.yml +68 -0
- data/ddtrace.gemspec +41 -0
- data/docker-compose.yml +33 -0
- data/docs/GettingStarted +352 -0
- data/gemfiles/contrib.gemfile +9 -0
- data/gemfiles/rails3_mysql2.gemfile +11 -0
- data/gemfiles/rails3_postgres.gemfile +10 -0
- data/gemfiles/rails3_postgres_redis.gemfile +11 -0
- data/gemfiles/rails4_mysql2.gemfile +9 -0
- data/gemfiles/rails4_postgres.gemfile +9 -0
- data/gemfiles/rails4_postgres_redis.gemfile +10 -0
- data/gemfiles/rails5_mysql2.gemfile +8 -0
- data/gemfiles/rails5_postgres.gemfile +8 -0
- data/gemfiles/rails5_postgres_redis.gemfile +9 -0
- data/lib/ddtrace.rb +63 -0
- data/lib/ddtrace/buffer.rb +77 -0
- data/lib/ddtrace/contrib/elasticsearch/core.rb +56 -0
- data/lib/ddtrace/contrib/elasticsearch/patcher.rb +35 -0
- data/lib/ddtrace/contrib/elasticsearch/quantize.rb +22 -0
- data/lib/ddtrace/contrib/rails/action_controller.rb +75 -0
- data/lib/ddtrace/contrib/rails/action_view.rb +121 -0
- data/lib/ddtrace/contrib/rails/active_record.rb +44 -0
- data/lib/ddtrace/contrib/rails/active_support.rb +115 -0
- data/lib/ddtrace/contrib/rails/core_extensions.rb +89 -0
- data/lib/ddtrace/contrib/rails/framework.rb +107 -0
- data/lib/ddtrace/contrib/rails/utils.rb +42 -0
- data/lib/ddtrace/contrib/redis/core.rb +72 -0
- data/lib/ddtrace/contrib/redis/patcher.rb +36 -0
- data/lib/ddtrace/contrib/redis/quantize.rb +30 -0
- data/lib/ddtrace/contrib/redis/tags.rb +19 -0
- data/lib/ddtrace/encoding.rb +65 -0
- data/lib/ddtrace/ext/app_types.rb +9 -0
- data/lib/ddtrace/ext/cache.rb +7 -0
- data/lib/ddtrace/ext/errors.rb +9 -0
- data/lib/ddtrace/ext/http.rb +11 -0
- data/lib/ddtrace/ext/net.rb +8 -0
- data/lib/ddtrace/ext/redis.rb +16 -0
- data/lib/ddtrace/ext/sql.rb +8 -0
- data/lib/ddtrace/monkey.rb +60 -0
- data/lib/ddtrace/pin.rb +62 -0
- data/lib/ddtrace/span.rb +163 -0
- data/lib/ddtrace/tracer.rb +180 -0
- data/lib/ddtrace/transport.rb +149 -0
- data/lib/ddtrace/utils.rb +9 -0
- data/lib/ddtrace/version.rb +9 -0
- data/lib/ddtrace/workers.rb +109 -0
- data/lib/ddtrace/writer.rb +119 -0
- metadata +187 -0
@@ -0,0 +1,180 @@
|
|
1
|
+
require 'pp'
|
2
|
+
require 'thread'
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
require 'ddtrace/span'
|
6
|
+
require 'ddtrace/buffer'
|
7
|
+
require 'ddtrace/writer'
|
8
|
+
|
9
|
+
# \Datadog global namespace that includes all tracing functionality for Tracer and Span classes.
|
10
|
+
module Datadog
|
11
|
+
# A \Tracer keeps track of the time spent by an application processing a single operation. For
|
12
|
+
# example, a trace can be used to track the entire time spent processing a complicated web request.
|
13
|
+
# Even though the request may require multiple resources and machines to handle the request, all
|
14
|
+
# of these function calls and sub-requests would be encapsulated within a single trace.
|
15
|
+
class Tracer
|
16
|
+
attr_reader :writer, :services
|
17
|
+
attr_accessor :enabled
|
18
|
+
|
19
|
+
# Global, memoized, lazy initialized instance of a logger that is used within the the Datadog
|
20
|
+
# namespace. This logger outputs to +STDOUT+ by default, and is considered thread-safe.
|
21
|
+
def self.log
|
22
|
+
unless defined? @logger
|
23
|
+
@logger = Logger.new(STDOUT)
|
24
|
+
@logger.level = Logger::INFO
|
25
|
+
end
|
26
|
+
@logger
|
27
|
+
end
|
28
|
+
|
29
|
+
# Activate the debug mode providing more information related to tracer usage
|
30
|
+
def self.debug_logging=(value)
|
31
|
+
log.level = value ? Logger::DEBUG : Logger::INFO
|
32
|
+
end
|
33
|
+
|
34
|
+
# Return if the debug mode is activated or not
|
35
|
+
def self.debug_logging
|
36
|
+
log.level == Logger::DEBUG
|
37
|
+
end
|
38
|
+
|
39
|
+
# Initialize a new \Tracer used to create, sample and submit spans that measure the
|
40
|
+
# time of sections of code. Available +options+ are:
|
41
|
+
#
|
42
|
+
# * +enabled+: set if the tracer submits or not spans to the local agent. It's enabled
|
43
|
+
# by default.
|
44
|
+
def initialize(options = {})
|
45
|
+
@enabled = options.fetch(:enabled, true)
|
46
|
+
@writer = options.fetch(:writer, Datadog::Writer.new)
|
47
|
+
@buffer = Datadog::SpanBuffer.new()
|
48
|
+
|
49
|
+
@mutex = Mutex.new
|
50
|
+
@spans = []
|
51
|
+
@services = {}
|
52
|
+
end
|
53
|
+
|
54
|
+
# Updates the current \Tracer instance, so that the tracer can be configured after the
|
55
|
+
# initialization. Available +options+ are:
|
56
|
+
#
|
57
|
+
# * +enabled+: set if the tracer submits or not spans to the trace agent
|
58
|
+
# * +hostname+: change the location of the trace agent
|
59
|
+
# * +port+: change the port of the trace agent
|
60
|
+
#
|
61
|
+
# For instance, if the trace agent runs in a different location, just:
|
62
|
+
#
|
63
|
+
# tracer.configure(hostname: 'agent.service.consul', port: '8777')
|
64
|
+
#
|
65
|
+
def configure(options = {})
|
66
|
+
enabled = options.fetch(:enabled, nil)
|
67
|
+
hostname = options.fetch(:hostname, nil)
|
68
|
+
port = options.fetch(:port, nil)
|
69
|
+
|
70
|
+
@enabled = enabled unless enabled.nil?
|
71
|
+
@writer.transport.hostname = hostname unless hostname.nil?
|
72
|
+
@writer.transport.port = port unless port.nil?
|
73
|
+
end
|
74
|
+
|
75
|
+
# Set the information about the given service. A valid example is:
|
76
|
+
#
|
77
|
+
# tracer.set_service_info('web-application', 'rails', 'web')
|
78
|
+
def set_service_info(service, app, app_type)
|
79
|
+
@services[service] = {
|
80
|
+
'app' => app,
|
81
|
+
'app_type' => app_type
|
82
|
+
}
|
83
|
+
|
84
|
+
return unless Datadog::Tracer.debug_logging
|
85
|
+
Datadog::Tracer.log.debug("set_service_info: service: #{service} app: #{app} type: #{app_type}")
|
86
|
+
end
|
87
|
+
|
88
|
+
# Return a +span+ that will trace an operation called +name+. You could trace your code
|
89
|
+
# using a <tt>do-block</tt> like:
|
90
|
+
#
|
91
|
+
# tracer.trace('web.request') do |span|
|
92
|
+
# span.service = 'my-web-site'
|
93
|
+
# span.resource = '/'
|
94
|
+
# span.set_tag('http.method', request.request_method)
|
95
|
+
# do_something()
|
96
|
+
# end
|
97
|
+
#
|
98
|
+
# The <tt>tracer.trace()</tt> method can also be used without a block in this way:
|
99
|
+
#
|
100
|
+
# span = tracer.trace('web.request', service: 'my-web-site')
|
101
|
+
# do_something()
|
102
|
+
# span.finish()
|
103
|
+
#
|
104
|
+
# Remember that in this case, calling <tt>span.finish()</tt> is mandatory.
|
105
|
+
#
|
106
|
+
# When a Trace is started, <tt>trace()</tt> will store the created span; subsequent spans will
|
107
|
+
# become it's children and will inherit some properties:
|
108
|
+
#
|
109
|
+
# parent = tracer.trace('parent') # has no parent span
|
110
|
+
# child = tracer.trace('child') # is a child of 'parent'
|
111
|
+
# child.finish()
|
112
|
+
# parent.finish()
|
113
|
+
# parent2 = tracer.trace('parent2') # has no parent span
|
114
|
+
# parent2.finish()
|
115
|
+
#
|
116
|
+
def trace(name, options = {})
|
117
|
+
span = Span.new(self, name, options)
|
118
|
+
|
119
|
+
# set up inheritance
|
120
|
+
parent = @buffer.get()
|
121
|
+
span.set_parent(parent)
|
122
|
+
@buffer.set(span)
|
123
|
+
|
124
|
+
# call the finish only if a block is given; this ensures
|
125
|
+
# that a call to tracer.trace() without a block, returns
|
126
|
+
# a span that should be manually finished.
|
127
|
+
if block_given?
|
128
|
+
begin
|
129
|
+
yield(span)
|
130
|
+
rescue StandardError => e
|
131
|
+
span.set_error(e)
|
132
|
+
raise
|
133
|
+
ensure
|
134
|
+
span.finish()
|
135
|
+
end
|
136
|
+
else
|
137
|
+
span
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Record the given finished span in the +spans+ list. When a +span+ is recorded, it will be sent
|
142
|
+
# to the Datadog trace agent as soon as the trace is finished.
|
143
|
+
def record(span)
|
144
|
+
spans = []
|
145
|
+
@mutex.synchronize do
|
146
|
+
@spans << span
|
147
|
+
parent = span.parent
|
148
|
+
# Bubble up until we find a non-finished parent. This is necessary for
|
149
|
+
# the case when the parent finished after its parent.
|
150
|
+
parent = parent.parent while !parent.nil? && parent.finished?
|
151
|
+
@buffer.set(parent)
|
152
|
+
|
153
|
+
return unless parent.nil?
|
154
|
+
spans = @spans
|
155
|
+
@spans = []
|
156
|
+
end
|
157
|
+
|
158
|
+
return if spans.empty?
|
159
|
+
write(spans)
|
160
|
+
end
|
161
|
+
|
162
|
+
# Return the current active span or +nil+.
|
163
|
+
def active_span
|
164
|
+
@buffer.get()
|
165
|
+
end
|
166
|
+
|
167
|
+
def write(spans)
|
168
|
+
return if @writer.nil? || !@enabled
|
169
|
+
|
170
|
+
if Datadog::Tracer.debug_logging
|
171
|
+
Datadog::Tracer.log.debug("Writing #{spans.length} spans (enabled: #{@enabled})")
|
172
|
+
PP.pp(spans)
|
173
|
+
end
|
174
|
+
|
175
|
+
@writer.write(spans, @services)
|
176
|
+
end
|
177
|
+
|
178
|
+
private :write
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
require 'ddtrace/encoding'
|
5
|
+
|
6
|
+
module Datadog
|
7
|
+
# Transport class that handles the spans delivery to the
|
8
|
+
# local trace-agent. The class wraps a Net:HTTP instance
|
9
|
+
# so that the Transport is thread-safe.
|
10
|
+
class HTTPTransport
|
11
|
+
attr_accessor :hostname, :port
|
12
|
+
|
13
|
+
# seconds before the transport timeout
|
14
|
+
TIMEOUT = 1
|
15
|
+
|
16
|
+
def initialize(hostname, port, options = {})
|
17
|
+
@hostname = hostname
|
18
|
+
@port = port
|
19
|
+
@traces_endpoint = '/v0.3/traces'.freeze
|
20
|
+
@services_endpoint = '/v0.3/services'.freeze
|
21
|
+
@compatibility_mode = false
|
22
|
+
@encoder = options.fetch(:encoder, Datadog::Encoding::MsgpackEncoder.new())
|
23
|
+
|
24
|
+
# overwrite the Content-type with the one chosen in the Encoder
|
25
|
+
@headers = options.fetch(:headers, {})
|
26
|
+
@headers['Content-Type'] = @encoder.content_type
|
27
|
+
|
28
|
+
# stats
|
29
|
+
@mutex = Mutex.new
|
30
|
+
@count_success = 0
|
31
|
+
@count_client_error = 0
|
32
|
+
@count_server_error = 0
|
33
|
+
@count_internal_error = 0
|
34
|
+
end
|
35
|
+
|
36
|
+
# route the send to the right endpoint
|
37
|
+
def send(endpoint, data)
|
38
|
+
case endpoint
|
39
|
+
when :services
|
40
|
+
payload = @encoder.encode_services(data)
|
41
|
+
status_code = post(@services_endpoint, payload)
|
42
|
+
when :traces
|
43
|
+
payload = @encoder.encode_traces(data)
|
44
|
+
status_code = post(@traces_endpoint, payload)
|
45
|
+
else
|
46
|
+
Datadog::Tracer.log.error("Unsupported endpoint: #{endpoint}")
|
47
|
+
return nil
|
48
|
+
end
|
49
|
+
|
50
|
+
return status_code unless downgrade?(status_code) && !@compatibility_mode
|
51
|
+
|
52
|
+
# the API endpoint is not available so we should downgrade the connection and re-try the call
|
53
|
+
downgrade!
|
54
|
+
send(endpoint, data)
|
55
|
+
end
|
56
|
+
|
57
|
+
# send data to the trace-agent; the method is thread-safe
|
58
|
+
def post(url, data)
|
59
|
+
Datadog::Tracer.log.debug("Sending data from process: #{Process.pid}")
|
60
|
+
request = Net::HTTP::Post.new(url, @headers)
|
61
|
+
request.body = data
|
62
|
+
|
63
|
+
response = Net::HTTP.start(@hostname, @port, read_timeout: TIMEOUT) { |http| http.request(request) }
|
64
|
+
handle_response(response)
|
65
|
+
rescue StandardError => e
|
66
|
+
Datadog::Tracer.log.error(e.message)
|
67
|
+
500
|
68
|
+
end
|
69
|
+
|
70
|
+
# Downgrade the connection to a compatibility version of the HTTPTransport;
|
71
|
+
# this method should target a stable API that works whatever is the agent
|
72
|
+
# or the tracing client versions.
|
73
|
+
def downgrade!
|
74
|
+
@compatibility_mode = true
|
75
|
+
@traces_endpoint = '/v0.2/traces'.freeze
|
76
|
+
@services_endpoint = '/v0.2/services'.freeze
|
77
|
+
@encoder = Datadog::Encoding::JSONEncoder.new()
|
78
|
+
@headers['Content-Type'] = @encoder.content_type
|
79
|
+
end
|
80
|
+
|
81
|
+
def informational?(code)
|
82
|
+
code.between?(100, 199)
|
83
|
+
end
|
84
|
+
|
85
|
+
def success?(code)
|
86
|
+
code.between?(200, 299)
|
87
|
+
end
|
88
|
+
|
89
|
+
def redirect?(code)
|
90
|
+
code.between?(300, 399)
|
91
|
+
end
|
92
|
+
|
93
|
+
def client_error?(code)
|
94
|
+
code.between?(400, 499)
|
95
|
+
end
|
96
|
+
|
97
|
+
def server_error?(code)
|
98
|
+
code.between?(500, 599)
|
99
|
+
end
|
100
|
+
|
101
|
+
# receiving a 404 means that we're targeting an endpoint that is not available
|
102
|
+
# in the trace agent. Usually this means that we've an up-to-date tracing client,
|
103
|
+
# while running an obsolete agent.
|
104
|
+
# receiving a 415 means that we're using an unsupported content-type with an existing
|
105
|
+
# endpoint. Usually this means that we're using a newer encoder with a previous
|
106
|
+
# endpoint. In both cases, we're going to downgrade the transporter encoder so that
|
107
|
+
# it will target a stable API.
|
108
|
+
def downgrade?(code)
|
109
|
+
code == 404 || code == 415
|
110
|
+
end
|
111
|
+
|
112
|
+
# handles the server response; here you can log the trace-agent response
|
113
|
+
# or do something more complex to recover from a possible error. This
|
114
|
+
# function is handled within the HTTP mutex.synchronize so it's thread-safe.
|
115
|
+
def handle_response(response)
|
116
|
+
status_code = response.code.to_i
|
117
|
+
|
118
|
+
if success?(status_code)
|
119
|
+
Datadog::Tracer.log.debug('Payload correctly sent to the trace agent.')
|
120
|
+
@mutex.synchronize { @count_success += 1 }
|
121
|
+
elsif downgrade?(status_code)
|
122
|
+
Datadog::Tracer.log.debug("calling the endpoint but received #{status_code}; downgrading the API")
|
123
|
+
elsif client_error?(status_code)
|
124
|
+
Datadog::Tracer.log.error("Client error: #{response.message}")
|
125
|
+
@mutex.synchronize { @count_client_error += 1 }
|
126
|
+
elsif server_error?(status_code)
|
127
|
+
Datadog::Tracer.log.error("Server error: #{response.message}")
|
128
|
+
@mutex.synchronize { @count_server_error += 1 }
|
129
|
+
end
|
130
|
+
|
131
|
+
status_code
|
132
|
+
rescue StandardError => e
|
133
|
+
Datadog::Tracer.log.error(e.message)
|
134
|
+
@mutex.synchronize { @count_internal_error += 1 }
|
135
|
+
500
|
136
|
+
end
|
137
|
+
|
138
|
+
def stats
|
139
|
+
@mutex.synchronize do
|
140
|
+
{
|
141
|
+
success: @count_success,
|
142
|
+
client_error: @count_client_error,
|
143
|
+
server_error: @count_server_error,
|
144
|
+
internal_error: @count_internal_error
|
145
|
+
}
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
require 'ddtrace/buffer'
|
4
|
+
|
5
|
+
module Datadog
|
6
|
+
module Workers
|
7
|
+
# Asynchronous worker that executes a +Send()+ operation after given
|
8
|
+
# seconds. Under the hood, it uses +Concurrent::TimerTask+ so that the thread
|
9
|
+
# will perform a task at regular intervals. The thread can be stopped
|
10
|
+
# with the +stop()+ method and can start with the +start()+ method.
|
11
|
+
class AsyncTransport
|
12
|
+
def initialize(span_interval, service_interval, transport, buff_size, trace_task, service_task)
|
13
|
+
@trace_task = trace_task
|
14
|
+
@service_task = service_task
|
15
|
+
@span_interval = span_interval
|
16
|
+
@service_interval = service_interval
|
17
|
+
@trace_buffer = TraceBuffer.new(buff_size)
|
18
|
+
@service_buffer = TraceBuffer.new(buff_size)
|
19
|
+
@transport = transport
|
20
|
+
|
21
|
+
@worker = nil
|
22
|
+
@run = false
|
23
|
+
end
|
24
|
+
|
25
|
+
# Callback function that process traces and executes the +send_traces()+ method.
|
26
|
+
def callback_traces
|
27
|
+
return if @trace_buffer.empty?
|
28
|
+
|
29
|
+
begin
|
30
|
+
traces = @trace_buffer.pop()
|
31
|
+
@trace_task.call(traces, @transport)
|
32
|
+
rescue StandardError => e
|
33
|
+
# ensures that the thread will not die because of an exception.
|
34
|
+
# TODO[manu]: findout the reason and reschedule the send if it's not
|
35
|
+
# a fatal exception
|
36
|
+
Datadog::Tracer.log.error("Error during traces flush: dropped #{items.length} items. Cause: #{e}")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Callback function that process traces and executes the +send_services()+ method.
|
41
|
+
def callback_services
|
42
|
+
return if @service_buffer.empty?
|
43
|
+
|
44
|
+
begin
|
45
|
+
services = @service_buffer.pop()
|
46
|
+
# pick up the latest services hash (this is a FIFO list)
|
47
|
+
# that is different from what we sent before.
|
48
|
+
different = services.inject(false) { |acc, elem| elem != @last_flushed_services ? elem : acc }
|
49
|
+
if different
|
50
|
+
if @service_task.call(different, @transport)
|
51
|
+
@last_flushed_services = different.clone
|
52
|
+
end
|
53
|
+
else
|
54
|
+
Datadog::Tracer.log.debug('No new different services, skipping flush.')
|
55
|
+
end
|
56
|
+
rescue StandardError => e
|
57
|
+
# ensures that the thread will not die because of an exception.
|
58
|
+
# TODO[manu]: findout the reason and reschedule the send if it's not
|
59
|
+
# a fatal exception
|
60
|
+
Datadog::Tracer.log.error("Error during services flush: dropped #{items.length} items. Cause: #{e}")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Start the timer execution.
|
65
|
+
def start
|
66
|
+
return if @run
|
67
|
+
@run = true
|
68
|
+
@worker = Thread.new() do
|
69
|
+
Datadog::Tracer.log.debug("Starting thread in the process: #{Process.pid}")
|
70
|
+
@last_flushed_services = nil
|
71
|
+
next_send_services = Time.now
|
72
|
+
|
73
|
+
# this loop assumes spans are flushed more often than services
|
74
|
+
while @run
|
75
|
+
callback_traces
|
76
|
+
if Time.now >= next_send_services
|
77
|
+
next_send_services = Time.now + @service_interval
|
78
|
+
callback_services
|
79
|
+
end
|
80
|
+
sleep(@span_interval)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Stop the timer execution. Tasks already in the queue will be executed.
|
86
|
+
def stop
|
87
|
+
@run = false
|
88
|
+
end
|
89
|
+
|
90
|
+
# Block until executor shutdown is complete or until timeout seconds have passed.
|
91
|
+
def join
|
92
|
+
@worker.join(10)
|
93
|
+
end
|
94
|
+
|
95
|
+
# Enqueue an item in the trace internal buffer. This operation is thread-safe
|
96
|
+
# because uses the +TraceBuffer+ data structure.
|
97
|
+
def enqueue_trace(trace)
|
98
|
+
@trace_buffer.push(trace)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Enqueue an item in the service internal buffer. This operation is thread-safe
|
102
|
+
# because uses the +TraceBuffer+ data structure.
|
103
|
+
def enqueue_service(service)
|
104
|
+
return if service == {} # no use to send this, not worth it
|
105
|
+
@service_buffer.push(service)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|