apollo-studio-tracing 1.0.0.beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +27 -0
- data/LICENSE +23 -0
- data/README.md +71 -0
- data/bin/generate-protos.sh +15 -0
- data/bin/rspec +29 -0
- data/lib/apollo-studio-tracing.rb +44 -0
- data/lib/apollo-studio-tracing/api.rb +63 -0
- data/lib/apollo-studio-tracing/node_map.rb +79 -0
- data/lib/apollo-studio-tracing/proto.rb +13 -0
- data/lib/apollo-studio-tracing/proto/apollo.proto +381 -0
- data/lib/apollo-studio-tracing/proto/apollo_pb.rb +235 -0
- data/lib/apollo-studio-tracing/shutdown_barrier.rb +23 -0
- data/lib/apollo-studio-tracing/trace_channel.rb +182 -0
- data/lib/apollo-studio-tracing/tracer.rb +292 -0
- data/lib/apollo-studio-tracing/version.rb +5 -0
- metadata +258 -0
@@ -0,0 +1,235 @@
|
|
1
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
+
# source: apollo.proto
|
3
|
+
|
4
|
+
require 'google/protobuf'
|
5
|
+
|
6
|
+
require 'google/protobuf/timestamp_pb'
|
7
|
+
Google::Protobuf::DescriptorPool.generated_pool.build do
|
8
|
+
add_file("apollo.proto", :syntax => :proto3) do
|
9
|
+
add_message "mdg.engine.proto.Trace" do
|
10
|
+
optional :start_time, :message, 4, "google.protobuf.Timestamp"
|
11
|
+
optional :end_time, :message, 3, "google.protobuf.Timestamp"
|
12
|
+
optional :duration_ns, :uint64, 11
|
13
|
+
optional :root, :message, 14, "mdg.engine.proto.Trace.Node"
|
14
|
+
optional :signature, :string, 19
|
15
|
+
optional :details, :message, 6, "mdg.engine.proto.Trace.Details"
|
16
|
+
optional :client_name, :string, 7
|
17
|
+
optional :client_version, :string, 8
|
18
|
+
optional :client_address, :string, 9
|
19
|
+
optional :client_reference_id, :string, 23
|
20
|
+
optional :http, :message, 10, "mdg.engine.proto.Trace.HTTP"
|
21
|
+
optional :cache_policy, :message, 18, "mdg.engine.proto.Trace.CachePolicy"
|
22
|
+
optional :query_plan, :message, 26, "mdg.engine.proto.Trace.QueryPlanNode"
|
23
|
+
optional :full_query_cache_hit, :bool, 20
|
24
|
+
optional :persisted_query_hit, :bool, 21
|
25
|
+
optional :persisted_query_register, :bool, 22
|
26
|
+
optional :registered_operation, :bool, 24
|
27
|
+
optional :forbidden_operation, :bool, 25
|
28
|
+
optional :legacy_signature_needs_resigning, :string, 5
|
29
|
+
end
|
30
|
+
add_message "mdg.engine.proto.Trace.CachePolicy" do
|
31
|
+
optional :scope, :enum, 1, "mdg.engine.proto.Trace.CachePolicy.Scope"
|
32
|
+
optional :max_age_ns, :int64, 2
|
33
|
+
end
|
34
|
+
add_enum "mdg.engine.proto.Trace.CachePolicy.Scope" do
|
35
|
+
value :UNKNOWN, 0
|
36
|
+
value :PUBLIC, 1
|
37
|
+
value :PRIVATE, 2
|
38
|
+
end
|
39
|
+
add_message "mdg.engine.proto.Trace.Details" do
|
40
|
+
map :variables_json, :string, :string, 4
|
41
|
+
map :deprecated_variables, :string, :bytes, 1
|
42
|
+
optional :operation_name, :string, 3
|
43
|
+
end
|
44
|
+
add_message "mdg.engine.proto.Trace.Error" do
|
45
|
+
optional :message, :string, 1
|
46
|
+
repeated :location, :message, 2, "mdg.engine.proto.Trace.Location"
|
47
|
+
optional :time_ns, :uint64, 3
|
48
|
+
optional :json, :string, 4
|
49
|
+
end
|
50
|
+
add_message "mdg.engine.proto.Trace.HTTP" do
|
51
|
+
optional :method, :enum, 1, "mdg.engine.proto.Trace.HTTP.Method"
|
52
|
+
optional :host, :string, 2
|
53
|
+
optional :path, :string, 3
|
54
|
+
map :request_headers, :string, :message, 4, "mdg.engine.proto.Trace.HTTP.Values"
|
55
|
+
map :response_headers, :string, :message, 5, "mdg.engine.proto.Trace.HTTP.Values"
|
56
|
+
optional :status_code, :uint32, 6
|
57
|
+
optional :secure, :bool, 8
|
58
|
+
optional :protocol, :string, 9
|
59
|
+
end
|
60
|
+
add_message "mdg.engine.proto.Trace.HTTP.Values" do
|
61
|
+
repeated :value, :string, 1
|
62
|
+
end
|
63
|
+
add_enum "mdg.engine.proto.Trace.HTTP.Method" do
|
64
|
+
value :UNKNOWN, 0
|
65
|
+
value :OPTIONS, 1
|
66
|
+
value :GET, 2
|
67
|
+
value :HEAD, 3
|
68
|
+
value :POST, 4
|
69
|
+
value :PUT, 5
|
70
|
+
value :DELETE, 6
|
71
|
+
value :TRACE, 7
|
72
|
+
value :CONNECT, 8
|
73
|
+
value :PATCH, 9
|
74
|
+
end
|
75
|
+
add_message "mdg.engine.proto.Trace.Location" do
|
76
|
+
optional :line, :uint32, 1
|
77
|
+
optional :column, :uint32, 2
|
78
|
+
end
|
79
|
+
add_message "mdg.engine.proto.Trace.Node" do
|
80
|
+
optional :original_field_name, :string, 14
|
81
|
+
optional :type, :string, 3
|
82
|
+
optional :parent_type, :string, 13
|
83
|
+
optional :cache_policy, :message, 5, "mdg.engine.proto.Trace.CachePolicy"
|
84
|
+
optional :start_time, :uint64, 8
|
85
|
+
optional :end_time, :uint64, 9
|
86
|
+
repeated :error, :message, 11, "mdg.engine.proto.Trace.Error"
|
87
|
+
repeated :child, :message, 12, "mdg.engine.proto.Trace.Node"
|
88
|
+
oneof :id do
|
89
|
+
optional :response_name, :string, 1
|
90
|
+
optional :index, :uint32, 2
|
91
|
+
end
|
92
|
+
end
|
93
|
+
add_message "mdg.engine.proto.Trace.QueryPlanNode" do
|
94
|
+
oneof :node do
|
95
|
+
optional :sequence, :message, 1, "mdg.engine.proto.Trace.QueryPlanNode.SequenceNode"
|
96
|
+
optional :parallel, :message, 2, "mdg.engine.proto.Trace.QueryPlanNode.ParallelNode"
|
97
|
+
optional :fetch, :message, 3, "mdg.engine.proto.Trace.QueryPlanNode.FetchNode"
|
98
|
+
optional :flatten, :message, 4, "mdg.engine.proto.Trace.QueryPlanNode.FlattenNode"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
add_message "mdg.engine.proto.Trace.QueryPlanNode.SequenceNode" do
|
102
|
+
repeated :nodes, :message, 1, "mdg.engine.proto.Trace.QueryPlanNode"
|
103
|
+
end
|
104
|
+
add_message "mdg.engine.proto.Trace.QueryPlanNode.ParallelNode" do
|
105
|
+
repeated :nodes, :message, 1, "mdg.engine.proto.Trace.QueryPlanNode"
|
106
|
+
end
|
107
|
+
add_message "mdg.engine.proto.Trace.QueryPlanNode.FetchNode" do
|
108
|
+
optional :service_name, :string, 1
|
109
|
+
optional :trace_parsing_failed, :bool, 2
|
110
|
+
optional :trace, :message, 3, "mdg.engine.proto.Trace"
|
111
|
+
optional :sent_time_offset, :uint64, 4
|
112
|
+
optional :sent_time, :message, 5, "google.protobuf.Timestamp"
|
113
|
+
optional :received_time, :message, 6, "google.protobuf.Timestamp"
|
114
|
+
end
|
115
|
+
add_message "mdg.engine.proto.Trace.QueryPlanNode.FlattenNode" do
|
116
|
+
repeated :response_path, :message, 1, "mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement"
|
117
|
+
optional :node, :message, 2, "mdg.engine.proto.Trace.QueryPlanNode"
|
118
|
+
end
|
119
|
+
add_message "mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement" do
|
120
|
+
oneof :id do
|
121
|
+
optional :field_name, :string, 1
|
122
|
+
optional :index, :uint32, 2
|
123
|
+
end
|
124
|
+
end
|
125
|
+
add_message "mdg.engine.proto.ReportHeader" do
|
126
|
+
optional :hostname, :string, 5
|
127
|
+
optional :agent_version, :string, 6
|
128
|
+
optional :service_version, :string, 7
|
129
|
+
optional :runtime_version, :string, 8
|
130
|
+
optional :uname, :string, 9
|
131
|
+
optional :schema_tag, :string, 10
|
132
|
+
optional :executable_schema_id, :string, 11
|
133
|
+
end
|
134
|
+
add_message "mdg.engine.proto.PathErrorStats" do
|
135
|
+
map :children, :string, :message, 1, "mdg.engine.proto.PathErrorStats"
|
136
|
+
optional :errors_count, :uint64, 4
|
137
|
+
optional :requests_with_errors_count, :uint64, 5
|
138
|
+
end
|
139
|
+
add_message "mdg.engine.proto.QueryLatencyStats" do
|
140
|
+
repeated :latency_count, :int64, 1
|
141
|
+
optional :request_count, :uint64, 2
|
142
|
+
optional :cache_hits, :uint64, 3
|
143
|
+
optional :persisted_query_hits, :uint64, 4
|
144
|
+
optional :persisted_query_misses, :uint64, 5
|
145
|
+
repeated :cache_latency_count, :int64, 6
|
146
|
+
optional :root_error_stats, :message, 7, "mdg.engine.proto.PathErrorStats"
|
147
|
+
optional :requests_with_errors_count, :uint64, 8
|
148
|
+
repeated :public_cache_ttl_count, :int64, 9
|
149
|
+
repeated :private_cache_ttl_count, :int64, 10
|
150
|
+
optional :registered_operation_count, :uint64, 11
|
151
|
+
optional :forbidden_operation_count, :uint64, 12
|
152
|
+
end
|
153
|
+
add_message "mdg.engine.proto.StatsContext" do
|
154
|
+
optional :client_reference_id, :string, 1
|
155
|
+
optional :client_name, :string, 2
|
156
|
+
optional :client_version, :string, 3
|
157
|
+
end
|
158
|
+
add_message "mdg.engine.proto.ContextualizedQueryLatencyStats" do
|
159
|
+
optional :query_latency_stats, :message, 1, "mdg.engine.proto.QueryLatencyStats"
|
160
|
+
optional :context, :message, 2, "mdg.engine.proto.StatsContext"
|
161
|
+
end
|
162
|
+
add_message "mdg.engine.proto.ContextualizedTypeStats" do
|
163
|
+
optional :context, :message, 1, "mdg.engine.proto.StatsContext"
|
164
|
+
map :per_type_stat, :string, :message, 2, "mdg.engine.proto.TypeStat"
|
165
|
+
end
|
166
|
+
add_message "mdg.engine.proto.FieldStat" do
|
167
|
+
optional :return_type, :string, 3
|
168
|
+
optional :errors_count, :uint64, 4
|
169
|
+
optional :count, :uint64, 5
|
170
|
+
optional :requests_with_errors_count, :uint64, 6
|
171
|
+
repeated :latency_count, :int64, 8
|
172
|
+
end
|
173
|
+
add_message "mdg.engine.proto.TypeStat" do
|
174
|
+
map :per_field_stat, :string, :message, 3, "mdg.engine.proto.FieldStat"
|
175
|
+
end
|
176
|
+
add_message "mdg.engine.proto.Field" do
|
177
|
+
optional :name, :string, 2
|
178
|
+
optional :return_type, :string, 3
|
179
|
+
end
|
180
|
+
add_message "mdg.engine.proto.Type" do
|
181
|
+
optional :name, :string, 1
|
182
|
+
repeated :field, :message, 2, "mdg.engine.proto.Field"
|
183
|
+
end
|
184
|
+
add_message "mdg.engine.proto.Report" do
|
185
|
+
optional :header, :message, 1, "mdg.engine.proto.ReportHeader"
|
186
|
+
map :traces_per_query, :string, :message, 5, "mdg.engine.proto.TracesAndStats"
|
187
|
+
optional :end_time, :message, 2, "google.protobuf.Timestamp"
|
188
|
+
end
|
189
|
+
add_message "mdg.engine.proto.ContextualizedStats" do
|
190
|
+
optional :context, :message, 1, "mdg.engine.proto.StatsContext"
|
191
|
+
optional :query_latency_stats, :message, 2, "mdg.engine.proto.QueryLatencyStats"
|
192
|
+
map :per_type_stat, :string, :message, 3, "mdg.engine.proto.TypeStat"
|
193
|
+
end
|
194
|
+
add_message "mdg.engine.proto.TracesAndStats" do
|
195
|
+
repeated :trace, :message, 1, "mdg.engine.proto.Trace"
|
196
|
+
repeated :stats_with_context, :message, 2, "mdg.engine.proto.ContextualizedStats"
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
module Mdg
|
202
|
+
module Engine
|
203
|
+
module Proto
|
204
|
+
Trace = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace").msgclass
|
205
|
+
Trace::CachePolicy = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.CachePolicy").msgclass
|
206
|
+
Trace::CachePolicy::Scope = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.CachePolicy.Scope").enummodule
|
207
|
+
Trace::Details = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.Details").msgclass
|
208
|
+
Trace::Error = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.Error").msgclass
|
209
|
+
Trace::HTTP = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.HTTP").msgclass
|
210
|
+
Trace::HTTP::Values = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.HTTP.Values").msgclass
|
211
|
+
Trace::HTTP::Method = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.HTTP.Method").enummodule
|
212
|
+
Trace::Location = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.Location").msgclass
|
213
|
+
Trace::Node = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.Node").msgclass
|
214
|
+
Trace::QueryPlanNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode").msgclass
|
215
|
+
Trace::QueryPlanNode::SequenceNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.SequenceNode").msgclass
|
216
|
+
Trace::QueryPlanNode::ParallelNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.ParallelNode").msgclass
|
217
|
+
Trace::QueryPlanNode::FetchNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.FetchNode").msgclass
|
218
|
+
Trace::QueryPlanNode::FlattenNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.FlattenNode").msgclass
|
219
|
+
Trace::QueryPlanNode::ResponsePathElement = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement").msgclass
|
220
|
+
ReportHeader = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.ReportHeader").msgclass
|
221
|
+
PathErrorStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.PathErrorStats").msgclass
|
222
|
+
QueryLatencyStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.QueryLatencyStats").msgclass
|
223
|
+
StatsContext = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.StatsContext").msgclass
|
224
|
+
ContextualizedQueryLatencyStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.ContextualizedQueryLatencyStats").msgclass
|
225
|
+
ContextualizedTypeStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.ContextualizedTypeStats").msgclass
|
226
|
+
FieldStat = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.FieldStat").msgclass
|
227
|
+
TypeStat = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.TypeStat").msgclass
|
228
|
+
Field = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Field").msgclass
|
229
|
+
Type = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Type").msgclass
|
230
|
+
Report = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Report").msgclass
|
231
|
+
ContextualizedStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.ContextualizedStats").msgclass
|
232
|
+
TracesAndStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.TracesAndStats").msgclass
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'concurrent-ruby'
|
4
|
+
|
5
|
+
module ApolloStudioTracing
|
6
|
+
class ShutdownBarrier
|
7
|
+
def initialize
|
8
|
+
@latch = Concurrent::CountDownLatch.new(1)
|
9
|
+
end
|
10
|
+
|
11
|
+
def await_shutdown(timeout_secs)
|
12
|
+
@latch.wait(timeout_secs)
|
13
|
+
end
|
14
|
+
|
15
|
+
def shutdown
|
16
|
+
@latch.count_down
|
17
|
+
end
|
18
|
+
|
19
|
+
def shutting_down?
|
20
|
+
@latch.count.zero?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'api'
|
4
|
+
require_relative 'proto'
|
5
|
+
require_relative 'shutdown_barrier'
|
6
|
+
|
7
|
+
module ApolloStudioTracing
|
8
|
+
# rubocop:disable Metrics/ClassLength
|
9
|
+
class TraceChannel
|
10
|
+
attr_reader :compress,
|
11
|
+
:api_key,
|
12
|
+
:reporting_interval,
|
13
|
+
:max_uncompressed_report_size,
|
14
|
+
:debug_reports,
|
15
|
+
:max_upload_attempts,
|
16
|
+
:min_upload_retry_delay_secs,
|
17
|
+
:max_queue_bytes
|
18
|
+
|
19
|
+
alias debug_reports? debug_reports
|
20
|
+
|
21
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
22
|
+
def initialize(report_header:, compress: nil, api_key: nil, reporting_interval: nil,
|
23
|
+
max_uncompressed_report_size: nil, max_queue_bytes: nil, debug_reports: nil,
|
24
|
+
max_upload_attempts: nil, min_upload_retry_delay_secs: nil)
|
25
|
+
@report_header = report_header
|
26
|
+
@compress = compress.nil? ? true : compress
|
27
|
+
@api_key = api_key || ENV.fetch('ENGINE_API_KEY', ENV.fetch('APOLLO_KEY', 'NO_API_KEY'))
|
28
|
+
@reporting_interval = reporting_interval || 5
|
29
|
+
@max_uncompressed_report_size = max_uncompressed_report_size || 4 * 1024 * 1024
|
30
|
+
@max_queue_bytes = max_queue_bytes || @max_uncompressed_report_size * 10
|
31
|
+
@max_upload_attempts = max_upload_attempts || 5
|
32
|
+
@min_upload_retry_delay_secs = min_upload_retry_delay_secs || 0.1
|
33
|
+
@debug_reports = debug_reports.nil? ? false : debug_reports
|
34
|
+
@queue = Queue.new
|
35
|
+
@queue_bytes = Concurrent::AtomicFixnum.new(0)
|
36
|
+
@queue_full = false
|
37
|
+
@enqueue_mutex = Mutex.new
|
38
|
+
@shutdown_barrier = ApolloStudioTracing::ShutdownBarrier.new
|
39
|
+
end
|
40
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
41
|
+
|
42
|
+
def queue(query_key, trace, context)
|
43
|
+
@enqueue_mutex.synchronize do
|
44
|
+
if @queue_bytes.value >= max_queue_bytes
|
45
|
+
unless @queue_full
|
46
|
+
ApolloStudioTracing.logger.warn(
|
47
|
+
"Apollo tracing queue is above the threshold of #{max_queue_bytes} bytes and " \
|
48
|
+
'trace collection will be paused.',
|
49
|
+
)
|
50
|
+
@queue_full = true
|
51
|
+
end
|
52
|
+
else
|
53
|
+
if @queue_full
|
54
|
+
ApolloStudioTracing.logger.info(
|
55
|
+
"Apollo tracing queue is below the threshold of #{max_queue_bytes} bytes and " \
|
56
|
+
'trace collection will resume.',
|
57
|
+
)
|
58
|
+
@queue_full = false
|
59
|
+
end
|
60
|
+
|
61
|
+
if debug_reports?
|
62
|
+
ApolloStudioTracing.logger.info("Queueing a trace for #{query_key}")
|
63
|
+
end
|
64
|
+
|
65
|
+
proto = ApolloStudioTracing::Trace.new(
|
66
|
+
start_time: to_proto_timestamp(trace[:start_time]),
|
67
|
+
end_time: to_proto_timestamp(trace[:end_time]),
|
68
|
+
duration_ns: trace[:end_time_nanos] - trace[:start_time_nanos],
|
69
|
+
root: trace[:node_map].root,
|
70
|
+
client_name: context[:apollo_client_name],
|
71
|
+
client_version: context[:apollo_client_version],
|
72
|
+
)
|
73
|
+
|
74
|
+
encoded_trace = ApolloStudioTracing::Trace.encode(proto)
|
75
|
+
@queue << [query_key, encoded_trace]
|
76
|
+
@queue_bytes.increment(encoded_trace.bytesize + query_key.bytesize)
|
77
|
+
|
78
|
+
ensure_thread_started
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def start
|
84
|
+
@uploader_thread = Thread.new do
|
85
|
+
run_uploader
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def ensure_thread_started
|
90
|
+
return if @uploader_thread&.alive?
|
91
|
+
|
92
|
+
start
|
93
|
+
end
|
94
|
+
|
95
|
+
def flush
|
96
|
+
until @queue.empty?
|
97
|
+
# If the uploader thread isn't running then the queue will never drain
|
98
|
+
break unless @uploader_thread&.alive?
|
99
|
+
|
100
|
+
sleep(0.1)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def shutdown
|
105
|
+
return unless @uploader_thread
|
106
|
+
|
107
|
+
@shutdown_barrier.shutdown
|
108
|
+
@uploader_thread.join
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def queue_full?
|
114
|
+
@queue_bytes.value >= max_queue_bytes
|
115
|
+
end
|
116
|
+
|
117
|
+
def run_uploader
|
118
|
+
ApolloStudioTracing.logger.info('Apollo trace uploader starting')
|
119
|
+
drain_queue until @shutdown_barrier.await_shutdown(reporting_interval)
|
120
|
+
puts 'Stopping uploader run loop'
|
121
|
+
drain_queue
|
122
|
+
rescue StandardError => e
|
123
|
+
ApolloStudioTracing.logger.warn("Exception thrown in uploader process. #{e}")
|
124
|
+
raise e
|
125
|
+
ensure
|
126
|
+
ApolloStudioTracing.logger.info('Apollo trace uploader exiting')
|
127
|
+
end
|
128
|
+
|
129
|
+
def drain_queue
|
130
|
+
traces_per_query = {}
|
131
|
+
report_size = 0
|
132
|
+
until @queue.empty?
|
133
|
+
query_key, encoded_trace = @queue.pop(false)
|
134
|
+
@queue_bytes.decrement(encoded_trace.bytesize + query_key.bytesize)
|
135
|
+
|
136
|
+
traces_per_query[query_key] ||= []
|
137
|
+
traces_per_query[query_key] << encoded_trace
|
138
|
+
report_size += encoded_trace.bytesize + query_key.bytesize
|
139
|
+
|
140
|
+
next unless report_size >= max_uncompressed_report_size
|
141
|
+
|
142
|
+
send_report(traces_per_query)
|
143
|
+
traces_per_query = {}
|
144
|
+
report_size = 0
|
145
|
+
end
|
146
|
+
|
147
|
+
send_report(traces_per_query) unless traces_per_query.empty?
|
148
|
+
end
|
149
|
+
|
150
|
+
def send_report(traces_per_query)
|
151
|
+
trace_report = ApolloStudioTracing::Report.new(header: @report_header)
|
152
|
+
traces_per_query.each do |query_key, encoded_traces|
|
153
|
+
trace_report.traces_per_query[query_key] = ApolloStudioTracing::TracesAndStats.new(
|
154
|
+
# TODO: Figure out how to use the already encoded traces like Apollo
|
155
|
+
# https://github.com/apollographql/apollo-server/blob/master/packages/apollo-engine-reporting-protobuf/src/index.js
|
156
|
+
trace: encoded_traces.map do |encoded_trace|
|
157
|
+
ApolloStudioTracing::Trace.decode(encoded_trace)
|
158
|
+
end,
|
159
|
+
)
|
160
|
+
end
|
161
|
+
|
162
|
+
if debug_reports?
|
163
|
+
ApolloStudioTracing.logger.info(
|
164
|
+
"Sending trace report:\n#{JSON.pretty_generate(JSON.parse(trace_report.to_json))}",
|
165
|
+
)
|
166
|
+
end
|
167
|
+
|
168
|
+
ApolloStudioTracing::API.upload(
|
169
|
+
ApolloStudioTracing::Report.encode(trace_report),
|
170
|
+
api_key: api_key,
|
171
|
+
compress: compress,
|
172
|
+
max_attempts: max_upload_attempts,
|
173
|
+
min_retry_delay_secs: min_upload_retry_delay_secs,
|
174
|
+
)
|
175
|
+
end
|
176
|
+
|
177
|
+
def to_proto_timestamp(time)
|
178
|
+
Google::Protobuf::Timestamp.new(seconds: time.to_i, nanos: time.nsec)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
# rubocop:enable Metrics/ClassLength
|
182
|
+
end
|
@@ -0,0 +1,292 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'apollo-studio-tracing/version'
|
4
|
+
require 'apollo-studio-tracing/trace_channel'
|
5
|
+
|
6
|
+
# Trace events are nested and fire in this order
|
7
|
+
# for a simple single-field query like `{ foo }`:
|
8
|
+
#
|
9
|
+
# <execute_multiplex>
|
10
|
+
# <lex></lex>
|
11
|
+
# <parse></parse>
|
12
|
+
# <validate></validate>
|
13
|
+
# <analyze_multiplex>
|
14
|
+
# <analyze_query></analyze_query>
|
15
|
+
# </analyze_multiplex>
|
16
|
+
#
|
17
|
+
# <execute_query>
|
18
|
+
# <execute_field></execute_field>
|
19
|
+
# </execute_query>
|
20
|
+
#
|
21
|
+
# <execute_query_lazy>
|
22
|
+
#
|
23
|
+
# # `execute_field_lazy` fires *only* when the field is lazy
|
24
|
+
# # (https://graphql-ruby.org/schema/lazy_execution.html)
|
25
|
+
# # so if it fires we should overwrite the ending times recorded
|
26
|
+
# # in `execute_field` to capture the total execution time.
|
27
|
+
#
|
28
|
+
# <execute_field_lazy></execute_field_lazy>
|
29
|
+
#
|
30
|
+
# </execute_query_lazy>
|
31
|
+
#
|
32
|
+
# # `execute_query_lazy` *always* fires, so it's a
|
33
|
+
# # safe place to capture ending times of the full query.
|
34
|
+
#
|
35
|
+
# </execute_multiplex>
|
36
|
+
|
37
|
+
module ApolloStudioTracing
|
38
|
+
# rubocop:disable Metrics/ClassLength
|
39
|
+
class Tracer
|
40
|
+
# store string constants to avoid creating new strings for each call to .trace
|
41
|
+
EXECUTE_MULTIPLEX = 'execute_multiplex'
|
42
|
+
EXECUTE_QUERY = 'execute_query'
|
43
|
+
EXECUTE_QUERY_LAZY = 'execute_query_lazy'
|
44
|
+
EXECUTE_FIELD = 'execute_field'
|
45
|
+
EXECUTE_FIELD_LAZY = 'execute_field_lazy'
|
46
|
+
|
47
|
+
attr_reader :trace_prepare, :query_signature
|
48
|
+
|
49
|
+
def initialize(
|
50
|
+
schema_tag: nil,
|
51
|
+
executable_schema_id: nil,
|
52
|
+
service_version: nil,
|
53
|
+
trace_prepare: nil,
|
54
|
+
query_signature: nil,
|
55
|
+
api_key: nil,
|
56
|
+
**trace_channel_options
|
57
|
+
)
|
58
|
+
@trace_prepare = trace_prepare || proc {}
|
59
|
+
@query_signature = query_signature || proc do |query|
|
60
|
+
# TODO: This should be smarter
|
61
|
+
# TODO (lsanwick) Replace with reference implementation from
|
62
|
+
# https://github.com/apollographql/apollo-tooling/blob/master/packages/apollo-graphql/src/operationId.ts
|
63
|
+
query.query_string
|
64
|
+
end
|
65
|
+
|
66
|
+
report_header = ApolloStudioTracing::ReportHeader.new(
|
67
|
+
hostname: hostname,
|
68
|
+
agent_version: agent_version,
|
69
|
+
service_version: service_version,
|
70
|
+
runtime_version: RUBY_DESCRIPTION,
|
71
|
+
uname: uname,
|
72
|
+
schema_tag: schema_tag || ENV.fetch('ENGINE_SCHEMA_TAG', 'current'),
|
73
|
+
executable_schema_id: executable_schema_id,
|
74
|
+
)
|
75
|
+
@trace_channel = ApolloStudioTracing::TraceChannel.new(
|
76
|
+
report_header: report_header,
|
77
|
+
api_key: api_key,
|
78
|
+
**trace_channel_options,
|
79
|
+
)
|
80
|
+
end
|
81
|
+
|
82
|
+
def start_trace_channel
|
83
|
+
@trace_channel.start
|
84
|
+
end
|
85
|
+
|
86
|
+
def shutdown_trace_channel
|
87
|
+
@trace_channel.shutdown
|
88
|
+
end
|
89
|
+
|
90
|
+
def flush_trace_channel
|
91
|
+
@trace_channel.flush
|
92
|
+
end
|
93
|
+
|
94
|
+
def trace(key, data, &block)
|
95
|
+
case key
|
96
|
+
when EXECUTE_MULTIPLEX
|
97
|
+
execute_multiplex(data, &block)
|
98
|
+
when EXECUTE_QUERY_LAZY
|
99
|
+
execute_query_lazy(data, &block)
|
100
|
+
when EXECUTE_FIELD
|
101
|
+
execute_field(data, &block)
|
102
|
+
when EXECUTE_FIELD_LAZY
|
103
|
+
execute_field_lazy(data, &block)
|
104
|
+
else
|
105
|
+
yield
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def tracing_enabled?(context)
|
110
|
+
context && context[:apollo_tracing_enabled]
|
111
|
+
end
|
112
|
+
|
113
|
+
def execute_multiplex(data, &block)
|
114
|
+
# Step 1:
|
115
|
+
# Create a trace hash on each query's context and record start times.
|
116
|
+
data.fetch(:multiplex).queries.each { |query| start_trace(query) }
|
117
|
+
|
118
|
+
results = block.call
|
119
|
+
|
120
|
+
# Step 5
|
121
|
+
# Enqueue the final trace onto the TraceChannel.
|
122
|
+
results.map { |result| attach_trace_to_result(result) }
|
123
|
+
end
|
124
|
+
|
125
|
+
def start_trace(query)
|
126
|
+
return unless tracing_enabled?(query&.context)
|
127
|
+
|
128
|
+
query.context.namespace(ApolloStudioTracing::KEY).merge!(
|
129
|
+
start_time: Time.now.utc,
|
130
|
+
start_time_nanos: Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond),
|
131
|
+
node_map: NodeMap.new,
|
132
|
+
)
|
133
|
+
end
|
134
|
+
|
135
|
+
# Step 2:
|
136
|
+
# * Record start and end times for the field resolver.
|
137
|
+
# * Rescue errors so the method doesn't exit early.
|
138
|
+
# * Create a trace "node" and attach field details.
|
139
|
+
# * Propagate the error (if necessary) so it ends up in the top-level errors array.
|
140
|
+
#
|
141
|
+
# The values in `data` are different depending on the executor runtime.
|
142
|
+
# https://graphql-ruby.org/api-doc/1.9.3/GraphQL/Tracing
|
143
|
+
#
|
144
|
+
# Nodes are added the NodeMap stored in the trace hash.
|
145
|
+
#
|
146
|
+
# Errors are added to nodes in `ApolloStudioTracing::Tracing.attach_trace_to_result`
|
147
|
+
# because we don't have the error `location` here.
|
148
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
149
|
+
def execute_field(data, &block)
|
150
|
+
context = data.fetch(:context, nil) || data.fetch(:query).context
|
151
|
+
return block.call unless tracing_enabled?(context)
|
152
|
+
|
153
|
+
start_time_nanos = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
|
154
|
+
|
155
|
+
begin
|
156
|
+
result = block.call
|
157
|
+
rescue StandardError => e
|
158
|
+
error = e
|
159
|
+
end
|
160
|
+
|
161
|
+
end_time_nanos = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
|
162
|
+
|
163
|
+
# legacy runtime
|
164
|
+
if data.include?(:context)
|
165
|
+
path = context.path
|
166
|
+
field_name = context.field.graphql_name
|
167
|
+
field_type = context.field.type.to_s
|
168
|
+
parent_type = context.parent_type.graphql_name
|
169
|
+
else # interpreter runtime
|
170
|
+
path = data.fetch(:path)
|
171
|
+
field = data.fetch(:field)
|
172
|
+
field_name = field.graphql_name
|
173
|
+
field_type = field.type.to_type_signature
|
174
|
+
parent_type = data.fetch(:owner).graphql_name
|
175
|
+
end
|
176
|
+
|
177
|
+
trace = context.namespace(ApolloStudioTracing::KEY)
|
178
|
+
node = trace[:node_map].add(path)
|
179
|
+
|
180
|
+
# original_field_name is set only for aliased fields
|
181
|
+
node.original_field_name = field_name if field_name != path.last
|
182
|
+
node.type = field_type
|
183
|
+
node.parent_type = parent_type
|
184
|
+
node.start_time = start_time_nanos - trace[:start_time_nanos]
|
185
|
+
node.end_time = end_time_nanos - trace[:start_time_nanos]
|
186
|
+
|
187
|
+
raise error if error
|
188
|
+
|
189
|
+
result
|
190
|
+
end
|
191
|
+
|
192
|
+
# Optional Step 3:
|
193
|
+
# Overwrite the end times on the trace node if the resolver was lazy.
|
194
|
+
def execute_field_lazy(data, &block)
|
195
|
+
context = data.fetch(:context, nil) || data.fetch(:query).context
|
196
|
+
return block.call unless tracing_enabled?(context)
|
197
|
+
|
198
|
+
begin
|
199
|
+
result = block.call
|
200
|
+
rescue StandardError => e
|
201
|
+
error = e
|
202
|
+
end
|
203
|
+
|
204
|
+
end_time_nanos = Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
|
205
|
+
|
206
|
+
# legacy runtime
|
207
|
+
if data.include?(:context)
|
208
|
+
path = context.path
|
209
|
+
field = context.field
|
210
|
+
else # interpreter runtime
|
211
|
+
path = data.fetch(:path)
|
212
|
+
field = data.fetch(:field)
|
213
|
+
end
|
214
|
+
|
215
|
+
trace = context.namespace(ApolloStudioTracing::KEY)
|
216
|
+
|
217
|
+
# When a field is resolved with an array of lazy values, the interpreter fires an
|
218
|
+
# `execute_field` for the resolution of the field and then a `execute_field_lazy` event for
|
219
|
+
# each lazy value in the array. Since the path here will contain an index (indicating which
|
220
|
+
# lazy value we're executing: e.g. ['arrayOfLazies', 0]), we won't have a node for the path.
|
221
|
+
# We only care about the end of the parent field (e.g. ['arrayOfLazies']), so we get the
|
222
|
+
# node for that path. What ends up happening is we update the end_time for the parent node
|
223
|
+
# for each of the lazy values. The last one that's executed becomes the final end time.
|
224
|
+
if field.type.list? && path.last.is_a?(Integer)
|
225
|
+
path = path[0...-1]
|
226
|
+
end
|
227
|
+
node = trace[:node_map].node_for_path(path)
|
228
|
+
node.end_time = end_time_nanos - trace[:start_time_nanos]
|
229
|
+
|
230
|
+
raise error if error
|
231
|
+
|
232
|
+
result
|
233
|
+
end
|
234
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
235
|
+
|
236
|
+
# Step 4:
|
237
|
+
# Record end times and merge them into the trace hash
|
238
|
+
def execute_query_lazy(data, &block)
|
239
|
+
result = block.call
|
240
|
+
|
241
|
+
# Normalize to an array of queries regardless of whether we are multiplexing or performing a
|
242
|
+
# single query.
|
243
|
+
queries = Array(data.fetch(:multiplex)&.queries || data.fetch(:query))
|
244
|
+
|
245
|
+
queries.map do |query|
|
246
|
+
next unless tracing_enabled?(query&.context)
|
247
|
+
|
248
|
+
trace = query.context.namespace(ApolloStudioTracing::KEY)
|
249
|
+
|
250
|
+
trace.merge!(
|
251
|
+
end_time: Time.now.utc,
|
252
|
+
end_time_nanos: Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond),
|
253
|
+
)
|
254
|
+
end
|
255
|
+
|
256
|
+
result
|
257
|
+
end
|
258
|
+
|
259
|
+
private
|
260
|
+
|
261
|
+
def attach_trace_to_result(result)
|
262
|
+
return result unless tracing_enabled?(result.context)
|
263
|
+
|
264
|
+
trace = result.context.namespace(ApolloStudioTracing::KEY)
|
265
|
+
|
266
|
+
result['errors']&.each do |error|
|
267
|
+
trace[:node_map].add_error(error)
|
268
|
+
end
|
269
|
+
|
270
|
+
@trace_channel.queue(
|
271
|
+
"# #{result.query.operation_name || '-'}\n#{query_signature.call(result.query)}",
|
272
|
+
trace,
|
273
|
+
result.context,
|
274
|
+
)
|
275
|
+
|
276
|
+
result
|
277
|
+
end
|
278
|
+
|
279
|
+
def hostname
|
280
|
+
@hostname ||= Socket.gethostname
|
281
|
+
end
|
282
|
+
|
283
|
+
def agent_version
|
284
|
+
@agent_version ||= "apollo-studio-tracing #{ApolloStudioTracing::VERSION}"
|
285
|
+
end
|
286
|
+
|
287
|
+
def uname
|
288
|
+
@uname ||= `uname -a`
|
289
|
+
end
|
290
|
+
end
|
291
|
+
# rubocop:enable Metrics/ClassLength
|
292
|
+
end
|