apollo-studio-tracing 1.0.0.beta.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +27 -0
- data/LICENSE +23 -0
- data/README.md +71 -0
- data/bin/generate-protos.sh +15 -0
- data/bin/rspec +29 -0
- data/lib/apollo-studio-tracing.rb +44 -0
- data/lib/apollo-studio-tracing/api.rb +63 -0
- data/lib/apollo-studio-tracing/node_map.rb +79 -0
- data/lib/apollo-studio-tracing/proto.rb +13 -0
- data/lib/apollo-studio-tracing/proto/apollo.proto +381 -0
- data/lib/apollo-studio-tracing/proto/apollo_pb.rb +235 -0
- data/lib/apollo-studio-tracing/shutdown_barrier.rb +23 -0
- data/lib/apollo-studio-tracing/trace_channel.rb +182 -0
- data/lib/apollo-studio-tracing/tracer.rb +292 -0
- data/lib/apollo-studio-tracing/version.rb +5 -0
- metadata +258 -0
@@ -0,0 +1,235 @@
|
|
1
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
+
# source: apollo.proto
|
3
|
+
|
4
|
+
require 'google/protobuf'
|
5
|
+
|
6
|
+
require 'google/protobuf/timestamp_pb'
|
7
|
+
Google::Protobuf::DescriptorPool.generated_pool.build do
|
8
|
+
add_file("apollo.proto", :syntax => :proto3) do
|
9
|
+
add_message "mdg.engine.proto.Trace" do
|
10
|
+
optional :start_time, :message, 4, "google.protobuf.Timestamp"
|
11
|
+
optional :end_time, :message, 3, "google.protobuf.Timestamp"
|
12
|
+
optional :duration_ns, :uint64, 11
|
13
|
+
optional :root, :message, 14, "mdg.engine.proto.Trace.Node"
|
14
|
+
optional :signature, :string, 19
|
15
|
+
optional :details, :message, 6, "mdg.engine.proto.Trace.Details"
|
16
|
+
optional :client_name, :string, 7
|
17
|
+
optional :client_version, :string, 8
|
18
|
+
optional :client_address, :string, 9
|
19
|
+
optional :client_reference_id, :string, 23
|
20
|
+
optional :http, :message, 10, "mdg.engine.proto.Trace.HTTP"
|
21
|
+
optional :cache_policy, :message, 18, "mdg.engine.proto.Trace.CachePolicy"
|
22
|
+
optional :query_plan, :message, 26, "mdg.engine.proto.Trace.QueryPlanNode"
|
23
|
+
optional :full_query_cache_hit, :bool, 20
|
24
|
+
optional :persisted_query_hit, :bool, 21
|
25
|
+
optional :persisted_query_register, :bool, 22
|
26
|
+
optional :registered_operation, :bool, 24
|
27
|
+
optional :forbidden_operation, :bool, 25
|
28
|
+
optional :legacy_signature_needs_resigning, :string, 5
|
29
|
+
end
|
30
|
+
add_message "mdg.engine.proto.Trace.CachePolicy" do
|
31
|
+
optional :scope, :enum, 1, "mdg.engine.proto.Trace.CachePolicy.Scope"
|
32
|
+
optional :max_age_ns, :int64, 2
|
33
|
+
end
|
34
|
+
add_enum "mdg.engine.proto.Trace.CachePolicy.Scope" do
|
35
|
+
value :UNKNOWN, 0
|
36
|
+
value :PUBLIC, 1
|
37
|
+
value :PRIVATE, 2
|
38
|
+
end
|
39
|
+
add_message "mdg.engine.proto.Trace.Details" do
|
40
|
+
map :variables_json, :string, :string, 4
|
41
|
+
map :deprecated_variables, :string, :bytes, 1
|
42
|
+
optional :operation_name, :string, 3
|
43
|
+
end
|
44
|
+
add_message "mdg.engine.proto.Trace.Error" do
|
45
|
+
optional :message, :string, 1
|
46
|
+
repeated :location, :message, 2, "mdg.engine.proto.Trace.Location"
|
47
|
+
optional :time_ns, :uint64, 3
|
48
|
+
optional :json, :string, 4
|
49
|
+
end
|
50
|
+
add_message "mdg.engine.proto.Trace.HTTP" do
|
51
|
+
optional :method, :enum, 1, "mdg.engine.proto.Trace.HTTP.Method"
|
52
|
+
optional :host, :string, 2
|
53
|
+
optional :path, :string, 3
|
54
|
+
map :request_headers, :string, :message, 4, "mdg.engine.proto.Trace.HTTP.Values"
|
55
|
+
map :response_headers, :string, :message, 5, "mdg.engine.proto.Trace.HTTP.Values"
|
56
|
+
optional :status_code, :uint32, 6
|
57
|
+
optional :secure, :bool, 8
|
58
|
+
optional :protocol, :string, 9
|
59
|
+
end
|
60
|
+
add_message "mdg.engine.proto.Trace.HTTP.Values" do
|
61
|
+
repeated :value, :string, 1
|
62
|
+
end
|
63
|
+
add_enum "mdg.engine.proto.Trace.HTTP.Method" do
|
64
|
+
value :UNKNOWN, 0
|
65
|
+
value :OPTIONS, 1
|
66
|
+
value :GET, 2
|
67
|
+
value :HEAD, 3
|
68
|
+
value :POST, 4
|
69
|
+
value :PUT, 5
|
70
|
+
value :DELETE, 6
|
71
|
+
value :TRACE, 7
|
72
|
+
value :CONNECT, 8
|
73
|
+
value :PATCH, 9
|
74
|
+
end
|
75
|
+
add_message "mdg.engine.proto.Trace.Location" do
|
76
|
+
optional :line, :uint32, 1
|
77
|
+
optional :column, :uint32, 2
|
78
|
+
end
|
79
|
+
add_message "mdg.engine.proto.Trace.Node" do
|
80
|
+
optional :original_field_name, :string, 14
|
81
|
+
optional :type, :string, 3
|
82
|
+
optional :parent_type, :string, 13
|
83
|
+
optional :cache_policy, :message, 5, "mdg.engine.proto.Trace.CachePolicy"
|
84
|
+
optional :start_time, :uint64, 8
|
85
|
+
optional :end_time, :uint64, 9
|
86
|
+
repeated :error, :message, 11, "mdg.engine.proto.Trace.Error"
|
87
|
+
repeated :child, :message, 12, "mdg.engine.proto.Trace.Node"
|
88
|
+
oneof :id do
|
89
|
+
optional :response_name, :string, 1
|
90
|
+
optional :index, :uint32, 2
|
91
|
+
end
|
92
|
+
end
|
93
|
+
add_message "mdg.engine.proto.Trace.QueryPlanNode" do
|
94
|
+
oneof :node do
|
95
|
+
optional :sequence, :message, 1, "mdg.engine.proto.Trace.QueryPlanNode.SequenceNode"
|
96
|
+
optional :parallel, :message, 2, "mdg.engine.proto.Trace.QueryPlanNode.ParallelNode"
|
97
|
+
optional :fetch, :message, 3, "mdg.engine.proto.Trace.QueryPlanNode.FetchNode"
|
98
|
+
optional :flatten, :message, 4, "mdg.engine.proto.Trace.QueryPlanNode.FlattenNode"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
add_message "mdg.engine.proto.Trace.QueryPlanNode.SequenceNode" do
|
102
|
+
repeated :nodes, :message, 1, "mdg.engine.proto.Trace.QueryPlanNode"
|
103
|
+
end
|
104
|
+
add_message "mdg.engine.proto.Trace.QueryPlanNode.ParallelNode" do
|
105
|
+
repeated :nodes, :message, 1, "mdg.engine.proto.Trace.QueryPlanNode"
|
106
|
+
end
|
107
|
+
add_message "mdg.engine.proto.Trace.QueryPlanNode.FetchNode" do
|
108
|
+
optional :service_name, :string, 1
|
109
|
+
optional :trace_parsing_failed, :bool, 2
|
110
|
+
optional :trace, :message, 3, "mdg.engine.proto.Trace"
|
111
|
+
optional :sent_time_offset, :uint64, 4
|
112
|
+
optional :sent_time, :message, 5, "google.protobuf.Timestamp"
|
113
|
+
optional :received_time, :message, 6, "google.protobuf.Timestamp"
|
114
|
+
end
|
115
|
+
add_message "mdg.engine.proto.Trace.QueryPlanNode.FlattenNode" do
|
116
|
+
repeated :response_path, :message, 1, "mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement"
|
117
|
+
optional :node, :message, 2, "mdg.engine.proto.Trace.QueryPlanNode"
|
118
|
+
end
|
119
|
+
add_message "mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement" do
|
120
|
+
oneof :id do
|
121
|
+
optional :field_name, :string, 1
|
122
|
+
optional :index, :uint32, 2
|
123
|
+
end
|
124
|
+
end
|
125
|
+
add_message "mdg.engine.proto.ReportHeader" do
|
126
|
+
optional :hostname, :string, 5
|
127
|
+
optional :agent_version, :string, 6
|
128
|
+
optional :service_version, :string, 7
|
129
|
+
optional :runtime_version, :string, 8
|
130
|
+
optional :uname, :string, 9
|
131
|
+
optional :schema_tag, :string, 10
|
132
|
+
optional :executable_schema_id, :string, 11
|
133
|
+
end
|
134
|
+
add_message "mdg.engine.proto.PathErrorStats" do
|
135
|
+
map :children, :string, :message, 1, "mdg.engine.proto.PathErrorStats"
|
136
|
+
optional :errors_count, :uint64, 4
|
137
|
+
optional :requests_with_errors_count, :uint64, 5
|
138
|
+
end
|
139
|
+
add_message "mdg.engine.proto.QueryLatencyStats" do
|
140
|
+
repeated :latency_count, :int64, 1
|
141
|
+
optional :request_count, :uint64, 2
|
142
|
+
optional :cache_hits, :uint64, 3
|
143
|
+
optional :persisted_query_hits, :uint64, 4
|
144
|
+
optional :persisted_query_misses, :uint64, 5
|
145
|
+
repeated :cache_latency_count, :int64, 6
|
146
|
+
optional :root_error_stats, :message, 7, "mdg.engine.proto.PathErrorStats"
|
147
|
+
optional :requests_with_errors_count, :uint64, 8
|
148
|
+
repeated :public_cache_ttl_count, :int64, 9
|
149
|
+
repeated :private_cache_ttl_count, :int64, 10
|
150
|
+
optional :registered_operation_count, :uint64, 11
|
151
|
+
optional :forbidden_operation_count, :uint64, 12
|
152
|
+
end
|
153
|
+
add_message "mdg.engine.proto.StatsContext" do
|
154
|
+
optional :client_reference_id, :string, 1
|
155
|
+
optional :client_name, :string, 2
|
156
|
+
optional :client_version, :string, 3
|
157
|
+
end
|
158
|
+
add_message "mdg.engine.proto.ContextualizedQueryLatencyStats" do
|
159
|
+
optional :query_latency_stats, :message, 1, "mdg.engine.proto.QueryLatencyStats"
|
160
|
+
optional :context, :message, 2, "mdg.engine.proto.StatsContext"
|
161
|
+
end
|
162
|
+
add_message "mdg.engine.proto.ContextualizedTypeStats" do
|
163
|
+
optional :context, :message, 1, "mdg.engine.proto.StatsContext"
|
164
|
+
map :per_type_stat, :string, :message, 2, "mdg.engine.proto.TypeStat"
|
165
|
+
end
|
166
|
+
add_message "mdg.engine.proto.FieldStat" do
|
167
|
+
optional :return_type, :string, 3
|
168
|
+
optional :errors_count, :uint64, 4
|
169
|
+
optional :count, :uint64, 5
|
170
|
+
optional :requests_with_errors_count, :uint64, 6
|
171
|
+
repeated :latency_count, :int64, 8
|
172
|
+
end
|
173
|
+
add_message "mdg.engine.proto.TypeStat" do
|
174
|
+
map :per_field_stat, :string, :message, 3, "mdg.engine.proto.FieldStat"
|
175
|
+
end
|
176
|
+
add_message "mdg.engine.proto.Field" do
|
177
|
+
optional :name, :string, 2
|
178
|
+
optional :return_type, :string, 3
|
179
|
+
end
|
180
|
+
add_message "mdg.engine.proto.Type" do
|
181
|
+
optional :name, :string, 1
|
182
|
+
repeated :field, :message, 2, "mdg.engine.proto.Field"
|
183
|
+
end
|
184
|
+
add_message "mdg.engine.proto.Report" do
|
185
|
+
optional :header, :message, 1, "mdg.engine.proto.ReportHeader"
|
186
|
+
map :traces_per_query, :string, :message, 5, "mdg.engine.proto.TracesAndStats"
|
187
|
+
optional :end_time, :message, 2, "google.protobuf.Timestamp"
|
188
|
+
end
|
189
|
+
add_message "mdg.engine.proto.ContextualizedStats" do
|
190
|
+
optional :context, :message, 1, "mdg.engine.proto.StatsContext"
|
191
|
+
optional :query_latency_stats, :message, 2, "mdg.engine.proto.QueryLatencyStats"
|
192
|
+
map :per_type_stat, :string, :message, 3, "mdg.engine.proto.TypeStat"
|
193
|
+
end
|
194
|
+
add_message "mdg.engine.proto.TracesAndStats" do
|
195
|
+
repeated :trace, :message, 1, "mdg.engine.proto.Trace"
|
196
|
+
repeated :stats_with_context, :message, 2, "mdg.engine.proto.ContextualizedStats"
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
module Mdg
|
202
|
+
module Engine
|
203
|
+
module Proto
|
204
|
+
Trace = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace").msgclass
|
205
|
+
Trace::CachePolicy = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.CachePolicy").msgclass
|
206
|
+
Trace::CachePolicy::Scope = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.CachePolicy.Scope").enummodule
|
207
|
+
Trace::Details = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.Details").msgclass
|
208
|
+
Trace::Error = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.Error").msgclass
|
209
|
+
Trace::HTTP = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.HTTP").msgclass
|
210
|
+
Trace::HTTP::Values = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.HTTP.Values").msgclass
|
211
|
+
Trace::HTTP::Method = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.HTTP.Method").enummodule
|
212
|
+
Trace::Location = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.Location").msgclass
|
213
|
+
Trace::Node = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.Node").msgclass
|
214
|
+
Trace::QueryPlanNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode").msgclass
|
215
|
+
Trace::QueryPlanNode::SequenceNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.SequenceNode").msgclass
|
216
|
+
Trace::QueryPlanNode::ParallelNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.ParallelNode").msgclass
|
217
|
+
Trace::QueryPlanNode::FetchNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.FetchNode").msgclass
|
218
|
+
Trace::QueryPlanNode::FlattenNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.FlattenNode").msgclass
|
219
|
+
Trace::QueryPlanNode::ResponsePathElement = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement").msgclass
|
220
|
+
ReportHeader = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.ReportHeader").msgclass
|
221
|
+
PathErrorStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.PathErrorStats").msgclass
|
222
|
+
QueryLatencyStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.QueryLatencyStats").msgclass
|
223
|
+
StatsContext = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.StatsContext").msgclass
|
224
|
+
ContextualizedQueryLatencyStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.ContextualizedQueryLatencyStats").msgclass
|
225
|
+
ContextualizedTypeStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.ContextualizedTypeStats").msgclass
|
226
|
+
FieldStat = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.FieldStat").msgclass
|
227
|
+
TypeStat = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.TypeStat").msgclass
|
228
|
+
Field = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Field").msgclass
|
229
|
+
Type = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Type").msgclass
|
230
|
+
Report = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Report").msgclass
|
231
|
+
ContextualizedStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.ContextualizedStats").msgclass
|
232
|
+
TracesAndStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.TracesAndStats").msgclass
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'concurrent-ruby'
|
4
|
+
|
5
|
+
module ApolloStudioTracing
|
6
|
+
class ShutdownBarrier
|
7
|
+
def initialize
|
8
|
+
@latch = Concurrent::CountDownLatch.new(1)
|
9
|
+
end
|
10
|
+
|
11
|
+
def await_shutdown(timeout_secs)
|
12
|
+
@latch.wait(timeout_secs)
|
13
|
+
end
|
14
|
+
|
15
|
+
def shutdown
|
16
|
+
@latch.count_down
|
17
|
+
end
|
18
|
+
|
19
|
+
def shutting_down?
|
20
|
+
@latch.count.zero?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'api'
|
4
|
+
require_relative 'proto'
|
5
|
+
require_relative 'shutdown_barrier'
|
6
|
+
|
7
|
+
module ApolloStudioTracing
|
8
|
+
# rubocop:disable Metrics/ClassLength
|
9
|
+
class TraceChannel
|
10
|
+
attr_reader :compress,
|
11
|
+
:api_key,
|
12
|
+
:reporting_interval,
|
13
|
+
:max_uncompressed_report_size,
|
14
|
+
:debug_reports,
|
15
|
+
:max_upload_attempts,
|
16
|
+
:min_upload_retry_delay_secs,
|
17
|
+
:max_queue_bytes
|
18
|
+
|
19
|
+
alias debug_reports? debug_reports
|
20
|
+
|
21
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
22
|
+
def initialize(report_header:, compress: nil, api_key: nil, reporting_interval: nil,
|
23
|
+
max_uncompressed_report_size: nil, max_queue_bytes: nil, debug_reports: nil,
|
24
|
+
max_upload_attempts: nil, min_upload_retry_delay_secs: nil)
|
25
|
+
@report_header = report_header
|
26
|
+
@compress = compress.nil? ? true : compress
|
27
|
+
@api_key = api_key || ENV.fetch('ENGINE_API_KEY', ENV.fetch('APOLLO_KEY', 'NO_API_KEY'))
|
28
|
+
@reporting_interval = reporting_interval || 5
|
29
|
+
@max_uncompressed_report_size = max_uncompressed_report_size || 4 * 1024 * 1024
|
30
|
+
@max_queue_bytes = max_queue_bytes || @max_uncompressed_report_size * 10
|
31
|
+
@max_upload_attempts = max_upload_attempts || 5
|
32
|
+
@min_upload_retry_delay_secs = min_upload_retry_delay_secs || 0.1
|
33
|
+
@debug_reports = debug_reports.nil? ? false : debug_reports
|
34
|
+
@queue = Queue.new
|
35
|
+
@queue_bytes = Concurrent::AtomicFixnum.new(0)
|
36
|
+
@queue_full = false
|
37
|
+
@enqueue_mutex = Mutex.new
|
38
|
+
@shutdown_barrier = ApolloStudioTracing::ShutdownBarrier.new
|
39
|
+
end
|
40
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
41
|
+
|
42
|
+
def queue(query_key, trace, context)
|
43
|
+
@enqueue_mutex.synchronize do
|
44
|
+
if @queue_bytes.value >= max_queue_bytes
|
45
|
+
unless @queue_full
|
46
|
+
ApolloStudioTracing.logger.warn(
|
47
|
+
"Apollo tracing queue is above the threshold of #{max_queue_bytes} bytes and " \
|
48
|
+
'trace collection will be paused.',
|
49
|
+
)
|
50
|
+
@queue_full = true
|
51
|
+
end
|
52
|
+
else
|
53
|
+
if @queue_full
|
54
|
+
ApolloStudioTracing.logger.info(
|
55
|
+
"Apollo tracing queue is below the threshold of #{max_queue_bytes} bytes and " \
|
56
|
+
'trace collection will resume.',
|
57
|
+
)
|
58
|
+
@queue_full = false
|
59
|
+
end
|
60
|
+
|
61
|
+
if debug_reports?
|
62
|
+
ApolloStudioTracing.logger.info("Queueing a trace for #{query_key}")
|
63
|
+
end
|
64
|
+
|
65
|
+
proto = ApolloStudioTracing::Trace.new(
|
66
|
+
start_time: to_proto_timestamp(trace[:start_time]),
|
67
|
+
end_time: to_proto_timestamp(trace[:end_time]),
|
68
|
+
duration_ns: trace[:end_time_nanos] - trace[:start_time_nanos],
|
69
|
+
root: trace[:node_map].root,
|
70
|
+
client_name: context[:apollo_client_name],
|
71
|
+
client_version: context[:apollo_client_version],
|
72
|
+
)
|
73
|
+
|
74
|
+
encoded_trace = ApolloStudioTracing::Trace.encode(proto)
|
75
|
+
@queue << [query_key, encoded_trace]
|
76
|
+
@queue_bytes.increment(encoded_trace.bytesize + query_key.bytesize)
|
77
|
+
|
78
|
+
ensure_thread_started
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def start
|
84
|
+
@uploader_thread = Thread.new do
|
85
|
+
run_uploader
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def ensure_thread_started
|
90
|
+
return if @uploader_thread&.alive?
|
91
|
+
|
92
|
+
start
|
93
|
+
end
|
94
|
+
|
95
|
+
def flush
|
96
|
+
until @queue.empty?
|
97
|
+
# If the uploader thread isn't running then the queue will never drain
|
98
|
+
break unless @uploader_thread&.alive?
|
99
|
+
|
100
|
+
sleep(0.1)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def shutdown
|
105
|
+
return unless @uploader_thread
|
106
|
+
|
107
|
+
@shutdown_barrier.shutdown
|
108
|
+
@uploader_thread.join
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def queue_full?
|
114
|
+
@queue_bytes.value >= max_queue_bytes
|
115
|
+
end
|
116
|
+
|
117
|
+
def run_uploader
|
118
|
+
ApolloStudioTracing.logger.info('Apollo trace uploader starting')
|
119
|
+
drain_queue until @shutdown_barrier.await_shutdown(reporting_interval)
|
120
|
+
puts 'Stopping uploader run loop'
|
121
|
+
drain_queue
|
122
|
+
rescue StandardError => e
|
123
|
+
ApolloStudioTracing.logger.warn("Exception thrown in uploader process. #{e}")
|
124
|
+
raise e
|
125
|
+
ensure
|
126
|
+
ApolloStudioTracing.logger.info('Apollo trace uploader exiting')
|
127
|
+
end
|
128
|
+
|
129
|
+
def drain_queue
|
130
|
+
traces_per_query = {}
|
131
|
+
report_size = 0
|
132
|
+
until @queue.empty?
|
133
|
+
query_key, encoded_trace = @queue.pop(false)
|
134
|
+
@queue_bytes.decrement(encoded_trace.bytesize + query_key.bytesize)
|
135
|
+
|
136
|
+
traces_per_query[query_key] ||= []
|
137
|
+
traces_per_query[query_key] << encoded_trace
|
138
|
+
report_size += encoded_trace.bytesize + query_key.bytesize
|
139
|
+
|
140
|
+
next unless report_size >= max_uncompressed_report_size
|
141
|
+
|
142
|
+
send_report(traces_per_query)
|
143
|
+
traces_per_query = {}
|
144
|
+
report_size = 0
|
145
|
+
end
|
146
|
+
|
147
|
+
send_report(traces_per_query) unless traces_per_query.empty?
|
148
|
+
end
|
149
|
+
|
150
|
+
def send_report(traces_per_query)
|
151
|
+
trace_report = ApolloStudioTracing::Report.new(header: @report_header)
|
152
|
+
traces_per_query.each do |query_key, encoded_traces|
|
153
|
+
trace_report.traces_per_query[query_key] = ApolloStudioTracing::TracesAndStats.new(
|
154
|
+
# TODO: Figure out how to use the already encoded traces like Apollo
|
155
|
+
# https://github.com/apollographql/apollo-server/blob/master/packages/apollo-engine-reporting-protobuf/src/index.js
|
156
|
+
trace: encoded_traces.map do |encoded_trace|
|
157
|
+
ApolloStudioTracing::Trace.decode(encoded_trace)
|
158
|
+
end,
|
159
|
+
)
|
160
|
+
end
|
161
|
+
|
162
|
+
if debug_reports?
|
163
|
+
ApolloStudioTracing.logger.info(
|
164
|
+
"Sending trace report:\n#{JSON.pretty_generate(JSON.parse(trace_report.to_json))}",
|
165
|
+
)
|
166
|
+
end
|
167
|
+
|
168
|
+
ApolloStudioTracing::API.upload(
|
169
|
+
ApolloStudioTracing::Report.encode(trace_report),
|
170
|
+
api_key: api_key,
|
171
|
+
compress: compress,
|
172
|
+
max_attempts: max_upload_attempts,
|
173
|
+
min_retry_delay_secs: min_upload_retry_delay_secs,
|
174
|
+
)
|
175
|
+
end
|
176
|
+
|
177
|
+
def to_proto_timestamp(time)
|
178
|
+
Google::Protobuf::Timestamp.new(seconds: time.to_i, nanos: time.nsec)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
# rubocop:enable Metrics/ClassLength
|
182
|
+
end
|
@@ -0,0 +1,292 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'apollo-studio-tracing/version'
|
4
|
+
require 'apollo-studio-tracing/trace_channel'
|
5
|
+
|
6
|
+
# Trace events are nested and fire in this order
|
7
|
+
# for a simple single-field query like `{ foo }`:
|
8
|
+
#
|
9
|
+
# <execute_multiplex>
|
10
|
+
# <lex></lex>
|
11
|
+
# <parse></parse>
|
12
|
+
# <validate></validate>
|
13
|
+
# <analyze_multiplex>
|
14
|
+
# <analyze_query></analyze_query>
|
15
|
+
# </analyze_multiplex>
|
16
|
+
#
|
17
|
+
# <execute_query>
|
18
|
+
# <execute_field></execute_field>
|
19
|
+
# </execute_query>
|
20
|
+
#
|
21
|
+
# <execute_query_lazy>
|
22
|
+
#
|
23
|
+
# # `execute_field_lazy` fires *only* when the field is lazy
|
24
|
+
# # (https://graphql-ruby.org/schema/lazy_execution.html)
|
25
|
+
# # so if it fires we should overwrite the ending times recorded
|
26
|
+
# # in `execute_field` to capture the total execution time.
|
27
|
+
#
|
28
|
+
# <execute_field_lazy></execute_field_lazy>
|
29
|
+
#
|
30
|
+
# </execute_query_lazy>
|
31
|
+
#
|
32
|
+
# # `execute_query_lazy` *always* fires, so it's a
|
33
|
+
# # safe place to capture ending times of the full query.
|
34
|
+
#
|
35
|
+
# </execute_multiplex>
|
36
|
+
|
37
|
+
module ApolloStudioTracing
|
38
|
+
# rubocop:disable Metrics/ClassLength
|
39
|
+
class Tracer
|
40
|
+
# store string constants to avoid creating new strings for each call to .trace
|
41
|
+
EXECUTE_MULTIPLEX = 'execute_multiplex'
|
42
|
+
EXECUTE_QUERY = 'execute_query'
|
43
|
+
EXECUTE_QUERY_LAZY = 'execute_query_lazy'
|
44
|
+
EXECUTE_FIELD = 'execute_field'
|
45
|
+
EXECUTE_FIELD_LAZY = 'execute_field_lazy'
|
46
|
+
|
47
|
+
attr_reader :trace_prepare, :query_signature
|
48
|
+
|
49
|
+
def initialize(
|
50
|
+
schema_tag: nil,
|
51
|
+
executable_schema_id: nil,
|
52
|
+
service_version: nil,
|
53
|
+
trace_prepare: nil,
|
54
|
+
query_signature: nil,
|
55
|
+
api_key: nil,
|
56
|
+
**trace_channel_options
|
57
|
+
)
|
58
|
+
@trace_prepare = trace_prepare || proc {}
|
59
|
+
@query_signature = query_signature || proc do |query|
|
60
|
+
# TODO: This should be smarter
|
61
|
+
# TODO (lsanwick) Replace with reference implementation from
|
62
|
+
# https://github.com/apollographql/apollo-tooling/blob/master/packages/apollo-graphql/src/operationId.ts
|
63
|
+
query.query_string
|
64
|
+
end
|
65
|
+
|
66
|
+
report_header = ApolloStudioTracing::ReportHeader.new(
|
67
|
+
hostname: hostname,
|
68
|
+
agent_version: agent_version,
|
69
|
+
service_version: service_version,
|
70
|
+
runtime_version: RUBY_DESCRIPTION,
|
71
|
+
uname: uname,
|
72
|
+
schema_tag: schema_tag || ENV.fetch('ENGINE_SCHEMA_TAG', 'current'),
|
73
|
+
executable_schema_id: executable_schema_id,
|
74
|
+
)
|
75
|
+
@trace_channel = ApolloStudioTracing::TraceChannel.new(
|
76
|
+
report_header: report_header,
|
77
|
+
api_key: api_key,
|
78
|
+
**trace_channel_options,
|
79
|
+
)
|
80
|
+
end
|
81
|
+
|
82
|
+
def start_trace_channel
|
83
|
+
@trace_channel.start
|
84
|
+
end
|
85
|
+
|
86
|
+
def shutdown_trace_channel
|
87
|
+
@trace_channel.shutdown
|
88
|
+
end
|
89
|
+
|
90
|
+
def flush_trace_channel
|
91
|
+
@trace_channel.flush
|
92
|
+
end
|
93
|
+
|
94
|
+
def trace(key, data, &block)
|
95
|
+
case key
|
96
|
+
when EXECUTE_MULTIPLEX
|
97
|
+
execute_multiplex(data, &block)
|
98
|
+
when EXECUTE_QUERY_LAZY
|
99
|
+
execute_query_lazy(data, &block)
|
100
|
+
when EXECUTE_FIELD
|
101
|
+
execute_field(data, &block)
|
102
|
+
when EXECUTE_FIELD_LAZY
|
103
|
+
execute_field_lazy(data, &block)
|
104
|
+
else
|
105
|
+
yield
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def tracing_enabled?(context)
|
110
|
+
context && context[:apollo_tracing_enabled]
|
111
|
+
end
|
112
|
+
|
113
|
+
def execute_multiplex(data, &block)
|
114
|
+
# Step 1:
|
115
|
+
# Create a trace hash on each query's context and record start times.
|
116
|
+
data.fetch(:multiplex).queries.each { |query| start_trace(query) }
|
117
|
+
|
118
|
+
results = block.call
|
119
|
+
|
120
|
+
# Step 5
|
121
|
+
# Enqueue the final trace onto the TraceChannel.
|
122
|
+
results.map { |result| attach_trace_to_result(result) }
|
123
|
+
end
|
124
|
+
|
125
|
+
def start_trace(query)
|
126
|
+
return unless tracing_enabled?(query&.context)
|
127
|
+
|
128
|
+
query.context.namespace(ApolloStudioTracing::KEY).merge!(
|
129
|
+
start_time: Time.now.utc,
|
130
|
+
start_time_nanos: Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond),
|
131
|
+
node_map: NodeMap.new,
|
132
|
+
)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Step 2:
|
136
|
+
# * Record start and end times for the field resolver.
|
137
|
+
# * Rescue errors so the method doesn't exit early.
|
138
|
+
# * Create a trace "node" and attach field details.
|
139
|
+
# * Propagate the error (if necessary) so it ends up in the top-level errors array.
|
140
|
+
#
|
141
|
+
# The values in `data` are different depending on the executor runtime.
|
142
|
+
# https://graphql-ruby.org/api-doc/1.9.3/GraphQL/Tracing
|
143
|
+
#
|
144
|
+
# Nodes are added the NodeMap stored in the trace hash.
|
145
|
+
#
|
146
|
+
# Errors are added to nodes in `ApolloStudioTracing::Tracing.attach_trace_to_result`
|
147
|
+
# because we don't have the error `location` here.
|
148
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
149
|
+
def execute_field(data, &block)
|
150
|
+
context = data.fetch(:context, nil) || data.fetch(:query).context
|
151
|
+
return block.call unless tracing_enabled?(context)
|
152
|
+
|
153
|
+
start_time_nanos = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
|
154
|
+
|
155
|
+
begin
|
156
|
+
result = block.call
|
157
|
+
rescue StandardError => e
|
158
|
+
error = e
|
159
|
+
end
|
160
|
+
|
161
|
+
end_time_nanos = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
|
162
|
+
|
163
|
+
# legacy runtime
|
164
|
+
if data.include?(:context)
|
165
|
+
path = context.path
|
166
|
+
field_name = context.field.graphql_name
|
167
|
+
field_type = context.field.type.to_s
|
168
|
+
parent_type = context.parent_type.graphql_name
|
169
|
+
else # interpreter runtime
|
170
|
+
path = data.fetch(:path)
|
171
|
+
field = data.fetch(:field)
|
172
|
+
field_name = field.graphql_name
|
173
|
+
field_type = field.type.to_type_signature
|
174
|
+
parent_type = data.fetch(:owner).graphql_name
|
175
|
+
end
|
176
|
+
|
177
|
+
trace = context.namespace(ApolloStudioTracing::KEY)
|
178
|
+
node = trace[:node_map].add(path)
|
179
|
+
|
180
|
+
# original_field_name is set only for aliased fields
|
181
|
+
node.original_field_name = field_name if field_name != path.last
|
182
|
+
node.type = field_type
|
183
|
+
node.parent_type = parent_type
|
184
|
+
node.start_time = start_time_nanos - trace[:start_time_nanos]
|
185
|
+
node.end_time = end_time_nanos - trace[:start_time_nanos]
|
186
|
+
|
187
|
+
raise error if error
|
188
|
+
|
189
|
+
result
|
190
|
+
end
|
191
|
+
|
192
|
+
# Optional Step 3:
|
193
|
+
# Overwrite the end times on the trace node if the resolver was lazy.
|
194
|
+
def execute_field_lazy(data, &block)
|
195
|
+
context = data.fetch(:context, nil) || data.fetch(:query).context
|
196
|
+
return block.call unless tracing_enabled?(context)
|
197
|
+
|
198
|
+
begin
|
199
|
+
result = block.call
|
200
|
+
rescue StandardError => e
|
201
|
+
error = e
|
202
|
+
end
|
203
|
+
|
204
|
+
end_time_nanos = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
|
205
|
+
|
206
|
+
# legacy runtime
|
207
|
+
if data.include?(:context)
|
208
|
+
path = context.path
|
209
|
+
field = context.field
|
210
|
+
else # interpreter runtime
|
211
|
+
path = data.fetch(:path)
|
212
|
+
field = data.fetch(:field)
|
213
|
+
end
|
214
|
+
|
215
|
+
trace = context.namespace(ApolloStudioTracing::KEY)
|
216
|
+
|
217
|
+
# When a field is resolved with an array of lazy values, the interpreter fires an
|
218
|
+
# `execute_field` for the resolution of the field and then a `execute_field_lazy` event for
|
219
|
+
# each lazy value in the array. Since the path here will contain an index (indicating which
|
220
|
+
# lazy value we're executing: e.g. ['arrayOfLazies', 0]), we won't have a node for the path.
|
221
|
+
# We only care about the end of the parent field (e.g. ['arrayOfLazies']), so we get the
|
222
|
+
# node for that path. What ends up happening is we update the end_time for the parent node
|
223
|
+
# for each of the lazy values. The last one that's executed becomes the final end time.
|
224
|
+
if field.type.list? && path.last.is_a?(Integer)
|
225
|
+
path = path[0...-1]
|
226
|
+
end
|
227
|
+
node = trace[:node_map].node_for_path(path)
|
228
|
+
node.end_time = end_time_nanos - trace[:start_time_nanos]
|
229
|
+
|
230
|
+
raise error if error
|
231
|
+
|
232
|
+
result
|
233
|
+
end
|
234
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
235
|
+
|
236
|
+
# Step 4:
|
237
|
+
# Record end times and merge them into the trace hash
|
238
|
+
def execute_query_lazy(data, &block)
|
239
|
+
result = block.call
|
240
|
+
|
241
|
+
# Normalize to an array of queries regardless of whether we are multiplexing or performing a
|
242
|
+
# single query.
|
243
|
+
queries = Array(data.fetch(:multiplex)&.queries || data.fetch(:query))
|
244
|
+
|
245
|
+
queries.map do |query|
|
246
|
+
next unless tracing_enabled?(query&.context)
|
247
|
+
|
248
|
+
trace = query.context.namespace(ApolloStudioTracing::KEY)
|
249
|
+
|
250
|
+
trace.merge!(
|
251
|
+
end_time: Time.now.utc,
|
252
|
+
end_time_nanos: Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond),
|
253
|
+
)
|
254
|
+
end
|
255
|
+
|
256
|
+
result
|
257
|
+
end
|
258
|
+
|
259
|
+
private
|
260
|
+
|
261
|
+
def attach_trace_to_result(result)
|
262
|
+
return result unless tracing_enabled?(result.context)
|
263
|
+
|
264
|
+
trace = result.context.namespace(ApolloStudioTracing::KEY)
|
265
|
+
|
266
|
+
result['errors']&.each do |error|
|
267
|
+
trace[:node_map].add_error(error)
|
268
|
+
end
|
269
|
+
|
270
|
+
@trace_channel.queue(
|
271
|
+
"# #{result.query.operation_name || '-'}\n#{query_signature.call(result.query)}",
|
272
|
+
trace,
|
273
|
+
result.context,
|
274
|
+
)
|
275
|
+
|
276
|
+
result
|
277
|
+
end
|
278
|
+
|
279
|
+
def hostname
|
280
|
+
@hostname ||= Socket.gethostname
|
281
|
+
end
|
282
|
+
|
283
|
+
def agent_version
|
284
|
+
@agent_version ||= "apollo-studio-tracing #{ApolloStudioTracing::VERSION}"
|
285
|
+
end
|
286
|
+
|
287
|
+
def uname
|
288
|
+
@uname ||= `uname -a`
|
289
|
+
end
|
290
|
+
end
|
291
|
+
# rubocop:enable Metrics/ClassLength
|
292
|
+
end
|