ddtrace 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.env +11 -0
- data/.gitignore +56 -0
- data/.rubocop.yml +43 -0
- data/Appraisals +65 -0
- data/Gemfile +3 -0
- data/LICENSE +24 -0
- data/README.md +119 -0
- data/Rakefile +103 -0
- data/circle.yml +68 -0
- data/ddtrace.gemspec +41 -0
- data/docker-compose.yml +33 -0
- data/docs/GettingStarted +352 -0
- data/gemfiles/contrib.gemfile +9 -0
- data/gemfiles/rails3_mysql2.gemfile +11 -0
- data/gemfiles/rails3_postgres.gemfile +10 -0
- data/gemfiles/rails3_postgres_redis.gemfile +11 -0
- data/gemfiles/rails4_mysql2.gemfile +9 -0
- data/gemfiles/rails4_postgres.gemfile +9 -0
- data/gemfiles/rails4_postgres_redis.gemfile +10 -0
- data/gemfiles/rails5_mysql2.gemfile +8 -0
- data/gemfiles/rails5_postgres.gemfile +8 -0
- data/gemfiles/rails5_postgres_redis.gemfile +9 -0
- data/lib/ddtrace.rb +63 -0
- data/lib/ddtrace/buffer.rb +77 -0
- data/lib/ddtrace/contrib/elasticsearch/core.rb +56 -0
- data/lib/ddtrace/contrib/elasticsearch/patcher.rb +35 -0
- data/lib/ddtrace/contrib/elasticsearch/quantize.rb +22 -0
- data/lib/ddtrace/contrib/rails/action_controller.rb +75 -0
- data/lib/ddtrace/contrib/rails/action_view.rb +121 -0
- data/lib/ddtrace/contrib/rails/active_record.rb +44 -0
- data/lib/ddtrace/contrib/rails/active_support.rb +115 -0
- data/lib/ddtrace/contrib/rails/core_extensions.rb +89 -0
- data/lib/ddtrace/contrib/rails/framework.rb +107 -0
- data/lib/ddtrace/contrib/rails/utils.rb +42 -0
- data/lib/ddtrace/contrib/redis/core.rb +72 -0
- data/lib/ddtrace/contrib/redis/patcher.rb +36 -0
- data/lib/ddtrace/contrib/redis/quantize.rb +30 -0
- data/lib/ddtrace/contrib/redis/tags.rb +19 -0
- data/lib/ddtrace/encoding.rb +65 -0
- data/lib/ddtrace/ext/app_types.rb +9 -0
- data/lib/ddtrace/ext/cache.rb +7 -0
- data/lib/ddtrace/ext/errors.rb +9 -0
- data/lib/ddtrace/ext/http.rb +11 -0
- data/lib/ddtrace/ext/net.rb +8 -0
- data/lib/ddtrace/ext/redis.rb +16 -0
- data/lib/ddtrace/ext/sql.rb +8 -0
- data/lib/ddtrace/monkey.rb +60 -0
- data/lib/ddtrace/pin.rb +62 -0
- data/lib/ddtrace/span.rb +163 -0
- data/lib/ddtrace/tracer.rb +180 -0
- data/lib/ddtrace/transport.rb +149 -0
- data/lib/ddtrace/utils.rb +9 -0
- data/lib/ddtrace/version.rb +9 -0
- data/lib/ddtrace/workers.rb +109 -0
- data/lib/ddtrace/writer.rb +119 -0
- metadata +187 -0
@@ -0,0 +1,180 @@
|
|
1
|
+
require 'pp'
|
2
|
+
require 'thread'
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
require 'ddtrace/span'
|
6
|
+
require 'ddtrace/buffer'
|
7
|
+
require 'ddtrace/writer'
|
8
|
+
|
9
|
+
# \Datadog global namespace that includes all tracing functionality for Tracer and Span classes.
|
10
|
+
module Datadog
|
11
|
+
# A \Tracer keeps track of the time spent by an application processing a single operation. For
|
12
|
+
# example, a trace can be used to track the entire time spent processing a complicated web request.
|
13
|
+
# Even though the request may require multiple resources and machines to handle the request, all
|
14
|
+
# of these function calls and sub-requests would be encapsulated within a single trace.
|
15
|
+
class Tracer
|
16
|
+
attr_reader :writer, :services
|
17
|
+
attr_accessor :enabled
|
18
|
+
|
19
|
+
# Global, memoized, lazy initialized instance of a logger that is used within the the Datadog
|
20
|
+
# namespace. This logger outputs to +STDOUT+ by default, and is considered thread-safe.
|
21
|
+
def self.log
|
22
|
+
unless defined? @logger
|
23
|
+
@logger = Logger.new(STDOUT)
|
24
|
+
@logger.level = Logger::INFO
|
25
|
+
end
|
26
|
+
@logger
|
27
|
+
end
|
28
|
+
|
29
|
+
# Activate the debug mode providing more information related to tracer usage
|
30
|
+
def self.debug_logging=(value)
|
31
|
+
log.level = value ? Logger::DEBUG : Logger::INFO
|
32
|
+
end
|
33
|
+
|
34
|
+
# Return if the debug mode is activated or not
|
35
|
+
def self.debug_logging
|
36
|
+
log.level == Logger::DEBUG
|
37
|
+
end
|
38
|
+
|
39
|
+
# Initialize a new \Tracer used to create, sample and submit spans that measure the
|
40
|
+
# time of sections of code. Available +options+ are:
|
41
|
+
#
|
42
|
+
# * +enabled+: set if the tracer submits or not spans to the local agent. It's enabled
|
43
|
+
# by default.
|
44
|
+
def initialize(options = {})
|
45
|
+
@enabled = options.fetch(:enabled, true)
|
46
|
+
@writer = options.fetch(:writer, Datadog::Writer.new)
|
47
|
+
@buffer = Datadog::SpanBuffer.new()
|
48
|
+
|
49
|
+
@mutex = Mutex.new
|
50
|
+
@spans = []
|
51
|
+
@services = {}
|
52
|
+
end
|
53
|
+
|
54
|
+
# Updates the current \Tracer instance, so that the tracer can be configured after the
|
55
|
+
# initialization. Available +options+ are:
|
56
|
+
#
|
57
|
+
# * +enabled+: set if the tracer submits or not spans to the trace agent
|
58
|
+
# * +hostname+: change the location of the trace agent
|
59
|
+
# * +port+: change the port of the trace agent
|
60
|
+
#
|
61
|
+
# For instance, if the trace agent runs in a different location, just:
|
62
|
+
#
|
63
|
+
# tracer.configure(hostname: 'agent.service.consul', port: '8777')
|
64
|
+
#
|
65
|
+
def configure(options = {})
|
66
|
+
enabled = options.fetch(:enabled, nil)
|
67
|
+
hostname = options.fetch(:hostname, nil)
|
68
|
+
port = options.fetch(:port, nil)
|
69
|
+
|
70
|
+
@enabled = enabled unless enabled.nil?
|
71
|
+
@writer.transport.hostname = hostname unless hostname.nil?
|
72
|
+
@writer.transport.port = port unless port.nil?
|
73
|
+
end
|
74
|
+
|
75
|
+
# Set the information about the given service. A valid example is:
|
76
|
+
#
|
77
|
+
# tracer.set_service_info('web-application', 'rails', 'web')
|
78
|
+
def set_service_info(service, app, app_type)
|
79
|
+
@services[service] = {
|
80
|
+
'app' => app,
|
81
|
+
'app_type' => app_type
|
82
|
+
}
|
83
|
+
|
84
|
+
return unless Datadog::Tracer.debug_logging
|
85
|
+
Datadog::Tracer.log.debug("set_service_info: service: #{service} app: #{app} type: #{app_type}")
|
86
|
+
end
|
87
|
+
|
88
|
+
# Return a +span+ that will trace an operation called +name+. You could trace your code
|
89
|
+
# using a <tt>do-block</tt> like:
|
90
|
+
#
|
91
|
+
# tracer.trace('web.request') do |span|
|
92
|
+
# span.service = 'my-web-site'
|
93
|
+
# span.resource = '/'
|
94
|
+
# span.set_tag('http.method', request.request_method)
|
95
|
+
# do_something()
|
96
|
+
# end
|
97
|
+
#
|
98
|
+
# The <tt>tracer.trace()</tt> method can also be used without a block in this way:
|
99
|
+
#
|
100
|
+
# span = tracer.trace('web.request', service: 'my-web-site')
|
101
|
+
# do_something()
|
102
|
+
# span.finish()
|
103
|
+
#
|
104
|
+
# Remember that in this case, calling <tt>span.finish()</tt> is mandatory.
|
105
|
+
#
|
106
|
+
# When a Trace is started, <tt>trace()</tt> will store the created span; subsequent spans will
|
107
|
+
# become it's children and will inherit some properties:
|
108
|
+
#
|
109
|
+
# parent = tracer.trace('parent') # has no parent span
|
110
|
+
# child = tracer.trace('child') # is a child of 'parent'
|
111
|
+
# child.finish()
|
112
|
+
# parent.finish()
|
113
|
+
# parent2 = tracer.trace('parent2') # has no parent span
|
114
|
+
# parent2.finish()
|
115
|
+
#
|
116
|
+
def trace(name, options = {})
|
117
|
+
span = Span.new(self, name, options)
|
118
|
+
|
119
|
+
# set up inheritance
|
120
|
+
parent = @buffer.get()
|
121
|
+
span.set_parent(parent)
|
122
|
+
@buffer.set(span)
|
123
|
+
|
124
|
+
# call the finish only if a block is given; this ensures
|
125
|
+
# that a call to tracer.trace() without a block, returns
|
126
|
+
# a span that should be manually finished.
|
127
|
+
if block_given?
|
128
|
+
begin
|
129
|
+
yield(span)
|
130
|
+
rescue StandardError => e
|
131
|
+
span.set_error(e)
|
132
|
+
raise
|
133
|
+
ensure
|
134
|
+
span.finish()
|
135
|
+
end
|
136
|
+
else
|
137
|
+
span
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Record the given finished span in the +spans+ list. When a +span+ is recorded, it will be sent
|
142
|
+
# to the Datadog trace agent as soon as the trace is finished.
|
143
|
+
def record(span)
|
144
|
+
spans = []
|
145
|
+
@mutex.synchronize do
|
146
|
+
@spans << span
|
147
|
+
parent = span.parent
|
148
|
+
# Bubble up until we find a non-finished parent. This is necessary for
|
149
|
+
# the case when the parent finished after its parent.
|
150
|
+
parent = parent.parent while !parent.nil? && parent.finished?
|
151
|
+
@buffer.set(parent)
|
152
|
+
|
153
|
+
return unless parent.nil?
|
154
|
+
spans = @spans
|
155
|
+
@spans = []
|
156
|
+
end
|
157
|
+
|
158
|
+
return if spans.empty?
|
159
|
+
write(spans)
|
160
|
+
end
|
161
|
+
|
162
|
+
# Return the current active span or +nil+.
|
163
|
+
def active_span
|
164
|
+
@buffer.get()
|
165
|
+
end
|
166
|
+
|
167
|
+
def write(spans)
|
168
|
+
return if @writer.nil? || !@enabled
|
169
|
+
|
170
|
+
if Datadog::Tracer.debug_logging
|
171
|
+
Datadog::Tracer.log.debug("Writing #{spans.length} spans (enabled: #{@enabled})")
|
172
|
+
PP.pp(spans)
|
173
|
+
end
|
174
|
+
|
175
|
+
@writer.write(spans, @services)
|
176
|
+
end
|
177
|
+
|
178
|
+
private :write
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
require 'ddtrace/encoding'
|
5
|
+
|
6
|
+
module Datadog
|
7
|
+
# Transport class that handles the spans delivery to the
|
8
|
+
# local trace-agent. The class wraps a Net:HTTP instance
|
9
|
+
# so that the Transport is thread-safe.
|
10
|
+
class HTTPTransport
|
11
|
+
attr_accessor :hostname, :port
|
12
|
+
|
13
|
+
# seconds before the transport timeout
|
14
|
+
TIMEOUT = 1
|
15
|
+
|
16
|
+
def initialize(hostname, port, options = {})
|
17
|
+
@hostname = hostname
|
18
|
+
@port = port
|
19
|
+
@traces_endpoint = '/v0.3/traces'.freeze
|
20
|
+
@services_endpoint = '/v0.3/services'.freeze
|
21
|
+
@compatibility_mode = false
|
22
|
+
@encoder = options.fetch(:encoder, Datadog::Encoding::MsgpackEncoder.new())
|
23
|
+
|
24
|
+
# overwrite the Content-type with the one chosen in the Encoder
|
25
|
+
@headers = options.fetch(:headers, {})
|
26
|
+
@headers['Content-Type'] = @encoder.content_type
|
27
|
+
|
28
|
+
# stats
|
29
|
+
@mutex = Mutex.new
|
30
|
+
@count_success = 0
|
31
|
+
@count_client_error = 0
|
32
|
+
@count_server_error = 0
|
33
|
+
@count_internal_error = 0
|
34
|
+
end
|
35
|
+
|
36
|
+
# route the send to the right endpoint
|
37
|
+
def send(endpoint, data)
|
38
|
+
case endpoint
|
39
|
+
when :services
|
40
|
+
payload = @encoder.encode_services(data)
|
41
|
+
status_code = post(@services_endpoint, payload)
|
42
|
+
when :traces
|
43
|
+
payload = @encoder.encode_traces(data)
|
44
|
+
status_code = post(@traces_endpoint, payload)
|
45
|
+
else
|
46
|
+
Datadog::Tracer.log.error("Unsupported endpoint: #{endpoint}")
|
47
|
+
return nil
|
48
|
+
end
|
49
|
+
|
50
|
+
return status_code unless downgrade?(status_code) && !@compatibility_mode
|
51
|
+
|
52
|
+
# the API endpoint is not available so we should downgrade the connection and re-try the call
|
53
|
+
downgrade!
|
54
|
+
send(endpoint, data)
|
55
|
+
end
|
56
|
+
|
57
|
+
# send data to the trace-agent; the method is thread-safe
|
58
|
+
def post(url, data)
|
59
|
+
Datadog::Tracer.log.debug("Sending data from process: #{Process.pid}")
|
60
|
+
request = Net::HTTP::Post.new(url, @headers)
|
61
|
+
request.body = data
|
62
|
+
|
63
|
+
response = Net::HTTP.start(@hostname, @port, read_timeout: TIMEOUT) { |http| http.request(request) }
|
64
|
+
handle_response(response)
|
65
|
+
rescue StandardError => e
|
66
|
+
Datadog::Tracer.log.error(e.message)
|
67
|
+
500
|
68
|
+
end
|
69
|
+
|
70
|
+
# Downgrade the connection to a compatibility version of the HTTPTransport;
|
71
|
+
# this method should target a stable API that works whatever is the agent
|
72
|
+
# or the tracing client versions.
|
73
|
+
def downgrade!
|
74
|
+
@compatibility_mode = true
|
75
|
+
@traces_endpoint = '/v0.2/traces'.freeze
|
76
|
+
@services_endpoint = '/v0.2/services'.freeze
|
77
|
+
@encoder = Datadog::Encoding::JSONEncoder.new()
|
78
|
+
@headers['Content-Type'] = @encoder.content_type
|
79
|
+
end
|
80
|
+
|
81
|
+
def informational?(code)
|
82
|
+
code.between?(100, 199)
|
83
|
+
end
|
84
|
+
|
85
|
+
def success?(code)
|
86
|
+
code.between?(200, 299)
|
87
|
+
end
|
88
|
+
|
89
|
+
def redirect?(code)
|
90
|
+
code.between?(300, 399)
|
91
|
+
end
|
92
|
+
|
93
|
+
def client_error?(code)
|
94
|
+
code.between?(400, 499)
|
95
|
+
end
|
96
|
+
|
97
|
+
def server_error?(code)
|
98
|
+
code.between?(500, 599)
|
99
|
+
end
|
100
|
+
|
101
|
+
# receiving a 404 means that we're targeting an endpoint that is not available
|
102
|
+
# in the trace agent. Usually this means that we've an up-to-date tracing client,
|
103
|
+
# while running an obsolete agent.
|
104
|
+
# receiving a 415 means that we're using an unsupported content-type with an existing
|
105
|
+
# endpoint. Usually this means that we're using a newer encoder with a previous
|
106
|
+
# endpoint. In both cases, we're going to downgrade the transporter encoder so that
|
107
|
+
# it will target a stable API.
|
108
|
+
def downgrade?(code)
|
109
|
+
code == 404 || code == 415
|
110
|
+
end
|
111
|
+
|
112
|
+
# handles the server response; here you can log the trace-agent response
|
113
|
+
# or do something more complex to recover from a possible error. This
|
114
|
+
# function is handled within the HTTP mutex.synchronize so it's thread-safe.
|
115
|
+
def handle_response(response)
|
116
|
+
status_code = response.code.to_i
|
117
|
+
|
118
|
+
if success?(status_code)
|
119
|
+
Datadog::Tracer.log.debug('Payload correctly sent to the trace agent.')
|
120
|
+
@mutex.synchronize { @count_success += 1 }
|
121
|
+
elsif downgrade?(status_code)
|
122
|
+
Datadog::Tracer.log.debug("calling the endpoint but received #{status_code}; downgrading the API")
|
123
|
+
elsif client_error?(status_code)
|
124
|
+
Datadog::Tracer.log.error("Client error: #{response.message}")
|
125
|
+
@mutex.synchronize { @count_client_error += 1 }
|
126
|
+
elsif server_error?(status_code)
|
127
|
+
Datadog::Tracer.log.error("Server error: #{response.message}")
|
128
|
+
@mutex.synchronize { @count_server_error += 1 }
|
129
|
+
end
|
130
|
+
|
131
|
+
status_code
|
132
|
+
rescue StandardError => e
|
133
|
+
Datadog::Tracer.log.error(e.message)
|
134
|
+
@mutex.synchronize { @count_internal_error += 1 }
|
135
|
+
500
|
136
|
+
end
|
137
|
+
|
138
|
+
def stats
|
139
|
+
@mutex.synchronize do
|
140
|
+
{
|
141
|
+
success: @count_success,
|
142
|
+
client_error: @count_client_error,
|
143
|
+
server_error: @count_server_error,
|
144
|
+
internal_error: @count_internal_error
|
145
|
+
}
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
require 'ddtrace/buffer'
|
4
|
+
|
5
|
+
module Datadog
|
6
|
+
module Workers
|
7
|
+
# Asynchronous worker that executes a +Send()+ operation after given
|
8
|
+
# seconds. Under the hood, it uses +Concurrent::TimerTask+ so that the thread
|
9
|
+
# will perform a task at regular intervals. The thread can be stopped
|
10
|
+
# with the +stop()+ method and can start with the +start()+ method.
|
11
|
+
class AsyncTransport
|
12
|
+
def initialize(span_interval, service_interval, transport, buff_size, trace_task, service_task)
|
13
|
+
@trace_task = trace_task
|
14
|
+
@service_task = service_task
|
15
|
+
@span_interval = span_interval
|
16
|
+
@service_interval = service_interval
|
17
|
+
@trace_buffer = TraceBuffer.new(buff_size)
|
18
|
+
@service_buffer = TraceBuffer.new(buff_size)
|
19
|
+
@transport = transport
|
20
|
+
|
21
|
+
@worker = nil
|
22
|
+
@run = false
|
23
|
+
end
|
24
|
+
|
25
|
+
# Callback function that process traces and executes the +send_traces()+ method.
|
26
|
+
def callback_traces
|
27
|
+
return if @trace_buffer.empty?
|
28
|
+
|
29
|
+
begin
|
30
|
+
traces = @trace_buffer.pop()
|
31
|
+
@trace_task.call(traces, @transport)
|
32
|
+
rescue StandardError => e
|
33
|
+
# ensures that the thread will not die because of an exception.
|
34
|
+
# TODO[manu]: findout the reason and reschedule the send if it's not
|
35
|
+
# a fatal exception
|
36
|
+
Datadog::Tracer.log.error("Error during traces flush: dropped #{items.length} items. Cause: #{e}")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Callback function that process traces and executes the +send_services()+ method.
|
41
|
+
def callback_services
|
42
|
+
return if @service_buffer.empty?
|
43
|
+
|
44
|
+
begin
|
45
|
+
services = @service_buffer.pop()
|
46
|
+
# pick up the latest services hash (this is a FIFO list)
|
47
|
+
# that is different from what we sent before.
|
48
|
+
different = services.inject(false) { |acc, elem| elem != @last_flushed_services ? elem : acc }
|
49
|
+
if different
|
50
|
+
if @service_task.call(different, @transport)
|
51
|
+
@last_flushed_services = different.clone
|
52
|
+
end
|
53
|
+
else
|
54
|
+
Datadog::Tracer.log.debug('No new different services, skipping flush.')
|
55
|
+
end
|
56
|
+
rescue StandardError => e
|
57
|
+
# ensures that the thread will not die because of an exception.
|
58
|
+
# TODO[manu]: findout the reason and reschedule the send if it's not
|
59
|
+
# a fatal exception
|
60
|
+
Datadog::Tracer.log.error("Error during services flush: dropped #{items.length} items. Cause: #{e}")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Start the timer execution.
|
65
|
+
def start
|
66
|
+
return if @run
|
67
|
+
@run = true
|
68
|
+
@worker = Thread.new() do
|
69
|
+
Datadog::Tracer.log.debug("Starting thread in the process: #{Process.pid}")
|
70
|
+
@last_flushed_services = nil
|
71
|
+
next_send_services = Time.now
|
72
|
+
|
73
|
+
# this loop assumes spans are flushed more often than services
|
74
|
+
while @run
|
75
|
+
callback_traces
|
76
|
+
if Time.now >= next_send_services
|
77
|
+
next_send_services = Time.now + @service_interval
|
78
|
+
callback_services
|
79
|
+
end
|
80
|
+
sleep(@span_interval)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Stop the timer execution. Tasks already in the queue will be executed.
|
86
|
+
def stop
|
87
|
+
@run = false
|
88
|
+
end
|
89
|
+
|
90
|
+
# Block until executor shutdown is complete or until timeout seconds have passed.
|
91
|
+
def join
|
92
|
+
@worker.join(10)
|
93
|
+
end
|
94
|
+
|
95
|
+
# Enqueue an item in the trace internal buffer. This operation is thread-safe
|
96
|
+
# because uses the +TraceBuffer+ data structure.
|
97
|
+
def enqueue_trace(trace)
|
98
|
+
@trace_buffer.push(trace)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Enqueue an item in the service internal buffer. This operation is thread-safe
|
102
|
+
# because uses the +TraceBuffer+ data structure.
|
103
|
+
def enqueue_service(service)
|
104
|
+
return if service == {} # no use to send this, not worth it
|
105
|
+
@service_buffer.push(service)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|