buildkite-test_collector 1.5.0 → 2.0.0.pre

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 90ace3070ba5267cd956e59f09db4c065556295c916a042abeb5e9f6d6d2ff06
4
- data.tar.gz: 8ea5a68cdeb8fe4cbab374e8f5769daf867e913433ab45cc8630cad0ce1033c4
3
+ metadata.gz: b7fc00254b8a2002e8680ca265b00f5b2700dace5d6948aa56f779102f8ce0c6
4
+ data.tar.gz: 6c05b872c740262ca0c66f4d6c09b5b253a366466241513b74706c8d0a56328a
5
5
  SHA512:
6
- metadata.gz: c8d067d2b24e5baab884443ec5911a9f9642cd94c61329f214ed2fbea642f6324e094e7d8adb909057986b85339f2757dc54b6904aaa0272ef6e0c93bf63f097
7
- data.tar.gz: d16f6a95ae882f1fc2c24ed4838b6a9e4af7a5648c94f15788463cb3a1080357f2ae4b797ae80b66a2cdb110235d4d1900bd59133b350f6430705fdeaae23438
6
+ metadata.gz: 2f646c1a4564960ba78494efef9ec64fa43eb9a4107d7c004884d977649d5cdf07aa19abec49b887d046d1c7a15ec7b61afbde60f4b12769c77f7578ba55df8e
7
+ data.tar.gz: b80f9fd21f045f396bceea9af860e471191669dc80b1d3e57ae38c5d1b7dcebe3fdfc1646774af043d6b8f243cd7335c9b8f044aff9232146e3c0a4dad8a58d8
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## v2.0.0.pre
4
+
5
+ - Major change: RSpec plugin to use HTTP Upload API instead of websocket connection to send test data #174 #175 - @niceking
6
+ - `identifier` field removed from trace #176 - @amybiyuliu
7
+
3
8
  ## v1.5.0
4
9
 
5
10
  - Send `failure_expanded` from minitest #171 - @nprizal
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- buildkite-test_collector (1.5.0)
4
+ buildkite-test_collector (2.0.0.pre)
5
5
  activesupport (>= 4.2)
6
6
  websocket (~> 1.2)
7
7
 
@@ -13,11 +13,11 @@ GEM
13
13
  i18n (>= 1.6, < 2)
14
14
  minitest (>= 5.1)
15
15
  tzinfo (~> 2.0)
16
- concurrent-ruby (1.2.0)
16
+ concurrent-ruby (1.2.2)
17
17
  diff-lcs (1.4.4)
18
18
  i18n (1.12.0)
19
19
  concurrent-ruby (~> 1.0)
20
- minitest (5.17.0)
20
+ minitest (5.18.0)
21
21
  rake (13.0.6)
22
22
  rspec (3.10.0)
23
23
  rspec-core (~> 3.10.0)
@@ -47,4 +47,4 @@ DEPENDENCIES
47
47
  rspec-expectations (~> 3.10)
48
48
 
49
49
  BUNDLED WITH
50
- 2.2.22
50
+ 2.3.25
@@ -28,6 +28,28 @@ module Buildkite::TestCollector
28
28
  http.request(contact)
29
29
  end
30
30
 
31
+ def post_json(data)
32
+ contact_uri = URI.parse(url)
33
+
34
+ http = Net::HTTP.new(contact_uri.host, contact_uri.port)
35
+ http.use_ssl = contact_uri.scheme == "https"
36
+
37
+ contact = Net::HTTP::Post.new(contact_uri.path, {
38
+ "Authorization" => authorization_header,
39
+ "Content-Type" => "application/json",
40
+ })
41
+
42
+ data_set = data.map(&:as_hash)
43
+
44
+ contact.body = {
45
+ run_env: Buildkite::TestCollector::CI.env,
46
+ format: "json",
47
+ data: data_set
48
+ }.to_json
49
+
50
+ http.request(contact)
51
+ end
52
+
31
53
  private
32
54
 
33
55
  attr :url
@@ -11,8 +11,6 @@ Buildkite::TestCollector.uploader = Buildkite::TestCollector::Uploader
11
11
  RSpec.configure do |config|
12
12
  config.before(:suite) do
13
13
  config.add_formatter Buildkite::TestCollector::RSpecPlugin::Reporter
14
-
15
- Buildkite::TestCollector.safe { Buildkite::TestCollector::Uploader.configure }
16
14
  end
17
15
 
18
16
  config.around(:each) do |example|
@@ -34,7 +34,6 @@ module Buildkite::TestCollector::MinitestPlugin
34
34
  id: id,
35
35
  scope: example.class.name,
36
36
  name: example.name,
37
- identifier: identifier,
38
37
  location: location,
39
38
  file_name: file_name,
40
39
  result: result,
@@ -51,7 +50,6 @@ module Buildkite::TestCollector::MinitestPlugin
51
50
  "#{file_name}:#{line_number}"
52
51
  end
53
52
  end
54
- alias_method :identifier, :location
55
53
 
56
54
  def file_name
57
55
  @file_name ||= File.join('./', source_location[0].delete_prefix(project_dir))
@@ -7,6 +7,7 @@ module Buildkite::TestCollector::RSpecPlugin
7
7
  attr_reader :output
8
8
 
9
9
  def initialize(output)
10
+ Buildkite::TestCollector.session = Buildkite::TestCollector::Session.new
10
11
  @output = output
11
12
  end
12
13
 
@@ -19,21 +20,13 @@ module Buildkite::TestCollector::RSpecPlugin
19
20
  if example.execution_result.status == :failed
20
21
  trace.failure_reason, trace.failure_expanded = failure_info(notification)
21
22
  end
22
- Buildkite::TestCollector.session&.write_result(trace)
23
+ Buildkite::TestCollector.session.add_example_to_send_queue(example.id)
23
24
  end
24
25
  end
25
26
 
26
- def dump_summary(notification)
27
- if Buildkite::TestCollector.session.present?
28
- examples_count = {
29
- examples: notification.examples.count,
30
- failed: notification.failed_examples.count,
31
- pending: notification.pending_examples.count,
32
- errors_outside_examples: notification.errors_outside_of_examples_count
33
- }
34
-
35
- Buildkite::TestCollector.session.close(examples_count)
36
- end
27
+ def dump_summary(_notification)
28
+ Buildkite::TestCollector.session.send_remaining_data
29
+ Buildkite::TestCollector.session.close
37
30
  end
38
31
 
39
32
  alias_method :example_passed, :handle_example
@@ -28,7 +28,6 @@ module Buildkite::TestCollector::RSpecPlugin
28
28
  id: id,
29
29
  scope: example.example_group.metadata[:full_description],
30
30
  name: example.description,
31
- identifier: example.id,
32
31
  location: example.location,
33
32
  file_name: file_name,
34
33
  result: result,
@@ -2,332 +2,56 @@
2
2
 
3
3
  module Buildkite::TestCollector
4
4
  class Session
5
- # Picked 75 as the magic timeout number as it's longer than the TCP timeout of 60s 🤷‍♀️
6
- CONFIRMATION_TIMEOUT = ENV.fetch("BUILDKITE_ANALYTICS_CONFIRMATION_TIMEOUT") { 75 }.to_i
7
- MAX_RECONNECTION_ATTEMPTS = ENV.fetch("BUILDKITE_ANALYTICS_RECONNECTION_ATTEMPTS") { 3 }.to_i
8
- WAIT_BETWEEN_RECONNECTIONS = ENV.fetch("BUILDKITE_ANALYTICS_RECONNECTION_WAIT") { 5 }.to_i
5
+ UPLOAD_THREAD_TIMEOUT = 60
6
+ UPLOAD_SESSION_TIMEOUT = 60
7
+ UPLOAD_API_MAX_RESULTS = 5000
9
8
 
10
- # We keep a private reference so that mocking libraries won't break JSON
11
- JSON_PARSE = JSON.method(:parse)
12
- private_constant :JSON_PARSE
13
-
14
- class RejectedSubscription < StandardError; end
15
- class InitialConnectionFailure < StandardError; end
16
-
17
- DISCONNECTED_EXCEPTIONS = [
18
- Buildkite::TestCollector::SocketConnection::HandshakeError,
19
- Buildkite::TestCollector::TimeoutError,
20
- Buildkite::TestCollector::SocketConnection::SocketError,
21
- RejectedSubscription,
22
- InitialConnectionFailure,
23
- ]
24
-
25
- def initialize(url, authorization_header, channel)
26
- @establish_subscription_queue = Queue.new
27
- @channel = channel
28
-
29
- @unconfirmed_idents = {}
30
- @idents_mutex = Mutex.new
31
- @send_queue = Queue.new
32
- @empty = ConditionVariable.new
33
- @closing = false
34
- @eot_queued = false
35
- @eot_queued_mutex = Mutex.new
36
- @reconnection_mutex = Mutex.new
37
-
38
- @url = url
39
- @authorization_header = authorization_header
40
-
41
- reconnection_count = 0
42
-
43
- begin
44
- reconnection_count += 1
45
- connect
46
- rescue Buildkite::TestCollector::TimeoutError, InitialConnectionFailure => e
47
- Buildkite::TestCollector.logger.warn("buildkite-test_collector could not establish an initial connection with Buildkite due to #{e}. Attempting retry #{reconnection_count} of #{MAX_RECONNECTION_ATTEMPTS}...")
48
- if reconnection_count > MAX_RECONNECTION_ATTEMPTS
49
- Buildkite::TestCollector.logger.error "buildkite-test_collector could not establish an initial connection with Buildkite due to #{e.message} after #{MAX_RECONNECTION_ATTEMPTS} attempts. You may be missing some data for this test suite, please contact support if this issue persists."
50
- else
51
- sleep(WAIT_BETWEEN_RECONNECTIONS)
52
- Buildkite::TestCollector.logger.warn("retrying reconnection")
53
- retry
54
- end
55
- end
56
- init_write_thread
9
+ def initialize
10
+ @send_queue_ids = []
11
+ @upload_threads = []
57
12
  end
58
13
 
59
- def disconnected(connection)
60
- @reconnection_mutex.synchronize do
61
- # When the first thread detects a disconnection, it calls the disconnect method
62
- # with the current connection. This thread grabs the reconnection mutex and does the
63
- # reconnection, which then updates the value of @connection.
64
- #
65
- # At some point in that process, the second thread would have detected the
66
- # disconnection too, and it also calls it with the current connection. However, the
67
- # second thread can't run the reconnection code because of the mutex. By the
68
- # time the mutex is released, the value of @connection has been refreshed, and so
69
- # the second thread returns early and does not reattempt the reconnection.
70
- return unless connection == @connection
71
- Buildkite::TestCollector.logger.debug("starting reconnection")
72
-
73
- reconnection_count = 0
14
+ def add_example_to_send_queue(id)
15
+ @send_queue_ids << id
74
16
 
75
- begin
76
- reconnection_count += 1
77
- connect
78
- init_write_thread
79
- rescue *DISCONNECTED_EXCEPTIONS => e
80
- Buildkite::TestCollector.logger.warn("failed reconnection attempt #{reconnection_count} due to #{e}")
81
- if reconnection_count > MAX_RECONNECTION_ATTEMPTS
82
- Buildkite::TestCollector.logger.error "buildkite-test_collector experienced a disconnection and could not reconnect to Buildkite due to #{e.message}. Please contact support."
83
- raise e
84
- else
85
- sleep(WAIT_BETWEEN_RECONNECTIONS)
86
- Buildkite::TestCollector.logger.warn("retrying reconnection")
87
- retry
88
- end
89
- end
17
+ if @send_queue_ids.size >= Buildkite::TestCollector.batch_size
18
+ send_ids = @send_queue_ids.shift(Buildkite::TestCollector.batch_size)
19
+ upload_data(send_ids)
90
20
  end
91
- retransmit
92
21
  end
93
22
 
94
- def close(examples_count)
95
- @closing = true
96
- @examples_count = examples_count
97
- Buildkite::TestCollector.logger.debug("closing socket connection")
23
+ def send_remaining_data
24
+ return if @send_queue_ids.empty?
98
25
 
99
- # Because the server only sends us confirmations after every 10mb of
100
- # data it uploads to S3, we'll never get confirmation of the
101
- # identifiers of the last upload part unless we send an explicit finish,
102
- # to which the server will respond with the last bits of data
103
- send_eot
104
-
105
- # After EOT, we wait for 75 seconds for the send queue to be drained and for the
106
- # server to confirm the last idents. If everything has already been confirmed we can
107
- # proceed without waiting.
108
- @idents_mutex.synchronize do
109
- if @unconfirmed_idents.any?
110
- Buildkite::TestCollector.logger.debug "Waiting for Buildkite Test Analytics to send results..."
111
- Buildkite::TestCollector.logger.debug("waiting for last confirm")
112
-
113
- @empty.wait(@idents_mutex, CONFIRMATION_TIMEOUT)
114
- end
115
- end
116
-
117
- # Then we always disconnect cos we can't wait forever? 🤷‍♀️
118
- @connection.close
119
- # We kill the write thread cos it's got a while loop in it, so it won't finish otherwise
120
- @write_thread&.kill
121
-
122
- Buildkite::TestCollector.logger.info "Buildkite Test Analytics completed"
123
- Buildkite::TestCollector.logger.debug("socket connection closed")
26
+ upload_data(@send_queue_ids)
124
27
  end
125
28
 
126
- def handle(_connection, data)
127
- data = JSON_PARSE.call(data)
128
- case data["type"]
129
- when "ping"
130
- # In absence of other message, the server sends us a ping every 3 seconds
131
- # We are currently not doing anything with these
132
- Buildkite::TestCollector.logger.debug("received ping")
133
- when "welcome", "confirm_subscription"
134
- # Push these two messages onto the queue, so that we block on waiting for the
135
- # initializing phase to complete
136
- @establish_subscription_queue.push(data)
137
- Buildkite::TestCollector.logger.debug("received #{data['type']}")
138
- when "reject_subscription"
139
- Buildkite::TestCollector.logger.debug("received rejected_subscription")
140
- raise RejectedSubscription
141
- else
142
- process_message(data)
143
- end
144
- end
29
+ def close
30
+ # There are two thread joins here, because the inner join will wait up to
31
+ # UPLOAD_THREAD_TIMEOUT seconds PER thread that is uploading data, i.e.
32
+ # n_threads x UPLOAD_THREAD_TIMEOUT latency if Buildkite happens to be
33
+ # down. By wrapping that in an outer thread join with the
34
+ # UPLOAD_SESSION_TIMEOUT, we ensure that we only wait a max of
35
+ # UPLOAD_SESSION_TIMEOUT seconds before the session exits.
36
+ Thread.new do
37
+ @upload_threads.each { |t| t.join(UPLOAD_THREAD_TIMEOUT) }
38
+ end.join(UPLOAD_SESSION_TIMEOUT)
145
39
 
146
- def write_result(result)
147
- queue_and_track_result(result.id, result.as_hash)
148
-
149
- Buildkite::TestCollector.logger.debug("added #{result.id} to send queue")
150
- end
151
-
152
- def unconfirmed_idents_count
153
- @idents_mutex.synchronize do
154
- @unconfirmed_idents.count
155
- end
40
+ @upload_threads.each { |t| t&.kill }
156
41
  end
157
42
 
158
43
  private
159
44
 
160
- def connect
161
- Buildkite::TestCollector.logger.debug("starting socket connection process")
162
-
163
- @connection = SocketConnection.new(self, @url, {
164
- "Authorization" => @authorization_header,
165
- })
166
-
167
- wait_for_welcome
168
-
169
- @connection.transmit({
170
- "command" => "subscribe",
171
- "identifier" => @channel
172
- })
173
-
174
- wait_for_confirm
175
-
176
- Buildkite::TestCollector.logger.info "Connected to Buildkite Test Analytics!"
177
- Buildkite::TestCollector.logger.debug("connected")
178
- end
179
-
180
- def init_write_thread
181
- # As this method can be called multiple times in the
182
- # reconnection process, kill prev write threads (if any) before
183
- # setting up the new one
184
- @write_thread&.kill
45
+ def upload_data(ids)
46
+ data = Buildkite::TestCollector.uploader.traces.values_at(*ids).compact
185
47
 
186
- @write_thread = Thread.new do
187
- Buildkite::TestCollector.logger.debug("hello from write thread")
188
- # Pretty sure this eternal loop is fine cos the call to queue.pop is blocking
189
- loop do
190
- data = @send_queue.pop
191
- message_type = data["action"]
192
-
193
- if message_type == "end_of_transmission"
194
- # Because of the unpredictable sequencing between the test suite finishing
195
- # (EOT gets queued) and disconnections happening (retransmit results gets
196
- # queued), we don't want to send an EOT before any retransmits are sent.
197
- if @send_queue.length > 0
198
- @send_queue << data
199
- Buildkite::TestCollector.logger.debug("putting eot at back of queue")
200
- next
201
- end
202
- @eot_queued_mutex.synchronize do
203
- @eot_queued = false
204
- end
205
- end
206
-
207
- @connection.transmit({
208
- "identifier" => @channel,
209
- "command" => "message",
210
- "data" => data.to_json
211
- })
212
-
213
- if Buildkite::TestCollector.debug_enabled
214
- ids = if message_type == "record_results"
215
- data["results"].map { |result| result["id"] }
216
- end
217
- Buildkite::TestCollector.logger.debug("transmitted #{message_type} #{ids}")
218
- end
219
- end
220
- end
221
- end
222
-
223
- def pop_with_timeout(message_type)
224
- Timeout.timeout(30, Buildkite::TestCollector::TimeoutError, "Timeout: Waited 30 seconds for #{message_type}") do
225
- @establish_subscription_queue.pop
48
+ # we do this in batches of UPLOAD_API_MAX_RESULTS in case the number of
49
+ # results exceeds this due to a bug, or user error in configuring the
50
+ # batch size
51
+ data.each_slice(UPLOAD_API_MAX_RESULTS) do |batch|
52
+ new_thread = Buildkite::TestCollector::Uploader.upload(batch)
53
+ @upload_threads << new_thread if new_thread
226
54
  end
227
55
  end
228
-
229
- def wait_for_welcome
230
- welcome = pop_with_timeout("welcome")
231
-
232
- if welcome && welcome != { "type" => "welcome" }
233
- raise InitialConnectionFailure.new("Wrong message received, expected a welcome, but received: #{welcome.inspect}")
234
- end
235
- end
236
-
237
- def wait_for_confirm
238
- confirm = pop_with_timeout("confirm")
239
-
240
- if confirm && confirm != { "type" => "confirm_subscription", "identifier" => @channel }
241
- raise InitialConnectionFailure.new("Wrong message received, expected a confirm, but received: #{confirm.inspect}")
242
- end
243
- end
244
-
245
- def queue_and_track_result(ident, result_as_hash)
246
- @idents_mutex.synchronize do
247
- @unconfirmed_idents[ident] = result_as_hash
248
-
249
- @send_queue << {
250
- "action" => "record_results",
251
- "results" => [result_as_hash]
252
- }
253
- end
254
- end
255
-
256
- def confirm_idents(idents)
257
- retransmit_required = @closing
258
-
259
- @idents_mutex.synchronize do
260
- # Remove received idents from unconfirmed_idents
261
- idents.each { |key| @unconfirmed_idents.delete(key) }
262
-
263
- Buildkite::TestCollector.logger.debug("received confirm for indentifiers: #{idents}")
264
-
265
- # This @empty ConditionVariable broadcasts every time that @unconfirmed_idents is
266
- # empty, which will happen about every 10mb of data as that's when the server
267
- # sends back confirmations.
268
- #
269
- # However, there aren't any threads waiting on this signal until after we
270
- # send the EOT message, so the prior broadcasts shouldn't do anything.
271
- if @unconfirmed_idents.empty?
272
- @empty.broadcast
273
-
274
- retransmit_required = false
275
-
276
- Buildkite::TestCollector.logger.debug("all identifiers have been confirmed")
277
- else
278
- Buildkite::TestCollector.logger.debug("still waiting on confirm for identifiers: #{@unconfirmed_idents.keys}")
279
- end
280
- end
281
-
282
- # If we're closing, any unconfirmed results need to be retransmitted.
283
- retransmit if retransmit_required
284
- end
285
-
286
- def send_eot
287
- @eot_queued_mutex.synchronize do
288
- return if @eot_queued
289
-
290
- @send_queue << {
291
- "action" => "end_of_transmission",
292
- "examples_count" => @examples_count.to_json
293
- }
294
- @eot_queued = true
295
-
296
- Buildkite::TestCollector.logger.debug("added EOT to send queue")
297
- end
298
- end
299
-
300
- def process_message(data)
301
- # Check we're getting the data we expect
302
- return unless data["identifier"] == @channel
303
-
304
- case
305
- when data["message"].key?("confirm")
306
- confirm_idents(data["message"]["confirm"])
307
- else
308
- # unhandled message
309
- Buildkite::TestCollector.logger.debug("received unhandled message #{data["message"]}")
310
- end
311
- end
312
-
313
- def retransmit
314
- @idents_mutex.synchronize do
315
- results = @unconfirmed_idents.values
316
-
317
- # queue the contents of the buffer, unless it's empty
318
- if results.any?
319
- @send_queue << {
320
- "action" => "record_results",
321
- "results" => results
322
- }
323
-
324
- Buildkite::TestCollector.logger.debug("queueing up retransmitted results #{@unconfirmed_idents.keys}")
325
- end
326
- end
327
-
328
- # if we were disconnected in the closing phase, then resend the EOT
329
- # message so the server can persist the last upload part
330
- send_eot if @closing
331
- end
332
56
  end
333
57
  end
@@ -73,12 +73,15 @@ module Buildkite::TestCollector
73
73
  end
74
74
  end
75
75
  # These get re-raise from session, we should fail gracefully
76
- rescue *Buildkite::TestCollector::Session::DISCONNECTED_EXCEPTIONS => e
76
+ rescue *Buildkite::TestCollector::SocketSession::DISCONNECTED_EXCEPTIONS => e
77
77
  Buildkite::TestCollector.logger.error("We could not establish a connection with Buildkite Test Analytics. The error was: #{e.message}. If this is a problem, please contact support.")
78
- rescue EOFError => e
78
+ rescue EOFError, OpenSSL::SSL::SSLError => e
79
+ # https://github.com/buildkite/test-collector-ruby/pull/147#issuecomment-1250485611
80
+ raise if e.class == OpenSSL::SSL::SSLError && e.message != "SSL_read: unexpected eof while reading"
81
+
79
82
  Buildkite::TestCollector.logger.warn("#{e}")
80
83
  if @socket
81
- Buildkite::TestCollector.logger.error("attempting disconnected flow")
84
+ Buildkite::TestCollector.logger.warn("attempting disconnected flow")
82
85
  @session.disconnected(self)
83
86
  disconnect
84
87
  end
@@ -0,0 +1,333 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Buildkite::TestCollector
4
+ class SocketSession
5
+ # Picked 75 as the magic timeout number as it's longer than the TCP timeout of 60s 🤷‍♀️
6
+ CONFIRMATION_TIMEOUT = ENV.fetch("BUILDKITE_ANALYTICS_CONFIRMATION_TIMEOUT") { 75 }.to_i
7
+ MAX_RECONNECTION_ATTEMPTS = ENV.fetch("BUILDKITE_ANALYTICS_RECONNECTION_ATTEMPTS") { 3 }.to_i
8
+ WAIT_BETWEEN_RECONNECTIONS = ENV.fetch("BUILDKITE_ANALYTICS_RECONNECTION_WAIT") { 5 }.to_i
9
+
10
+ # We keep a private reference so that mocking libraries won't break JSON
11
+ JSON_PARSE = JSON.method(:parse)
12
+ private_constant :JSON_PARSE
13
+
14
+ class RejectedSubscription < StandardError; end
15
+ class InitialConnectionFailure < StandardError; end
16
+
17
+ DISCONNECTED_EXCEPTIONS = [
18
+ Buildkite::TestCollector::SocketConnection::HandshakeError,
19
+ Buildkite::TestCollector::TimeoutError,
20
+ Buildkite::TestCollector::SocketConnection::SocketError,
21
+ RejectedSubscription,
22
+ InitialConnectionFailure,
23
+ ]
24
+
25
+ def initialize(url, authorization_header, channel)
26
+ @establish_subscription_queue = Queue.new
27
+ @channel = channel
28
+
29
+ @unconfirmed_idents = {}
30
+ @idents_mutex = Mutex.new
31
+ @send_queue = Queue.new
32
+ @empty = ConditionVariable.new
33
+ @closing = false
34
+ @eot_queued = false
35
+ @eot_queued_mutex = Mutex.new
36
+ @reconnection_mutex = Mutex.new
37
+
38
+ @url = url
39
+ @authorization_header = authorization_header
40
+
41
+ reconnection_count = 0
42
+
43
+ begin
44
+ reconnection_count += 1
45
+ connect
46
+ rescue Buildkite::TestCollector::TimeoutError, InitialConnectionFailure => e
47
+ Buildkite::TestCollector.logger.warn("buildkite-test_collector could not establish an initial connection with Buildkite due to #{e}. Attempting retry #{reconnection_count} of #{MAX_RECONNECTION_ATTEMPTS}...")
48
+ if reconnection_count > MAX_RECONNECTION_ATTEMPTS
49
+ Buildkite::TestCollector.logger.error "buildkite-test_collector could not establish an initial connection with Buildkite due to #{e.message} after #{MAX_RECONNECTION_ATTEMPTS} attempts. You may be missing some data for this test suite, please contact support if this issue persists."
50
+ else
51
+ sleep(WAIT_BETWEEN_RECONNECTIONS)
52
+ Buildkite::TestCollector.logger.warn("retrying reconnection")
53
+ retry
54
+ end
55
+ end
56
+ init_write_thread
57
+ end
58
+
59
+ def disconnected(connection)
60
+ @reconnection_mutex.synchronize do
61
+ # When the first thread detects a disconnection, it calls the disconnect method
62
+ # with the current connection. This thread grabs the reconnection mutex and does the
63
+ # reconnection, which then updates the value of @connection.
64
+ #
65
+ # At some point in that process, the second thread would have detected the
66
+ # disconnection too, and it also calls it with the current connection. However, the
67
+ # second thread can't run the reconnection code because of the mutex. By the
68
+ # time the mutex is released, the value of @connection has been refreshed, and so
69
+ # the second thread returns early and does not reattempt the reconnection.
70
+ return unless connection == @connection
71
+ Buildkite::TestCollector.logger.debug("starting reconnection")
72
+
73
+ reconnection_count = 0
74
+
75
+ begin
76
+ reconnection_count += 1
77
+ connect
78
+ init_write_thread
79
+ rescue *DISCONNECTED_EXCEPTIONS => e
80
+ Buildkite::TestCollector.logger.warn("failed reconnection attempt #{reconnection_count} due to #{e}")
81
+ if reconnection_count > MAX_RECONNECTION_ATTEMPTS
82
+ Buildkite::TestCollector.logger.error "buildkite-test_collector experienced a disconnection and could not reconnect to Buildkite due to #{e.message}. Please contact support."
83
+ raise e
84
+ else
85
+ sleep(WAIT_BETWEEN_RECONNECTIONS)
86
+ Buildkite::TestCollector.logger.warn("retrying reconnection")
87
+ retry
88
+ end
89
+ end
90
+ end
91
+ retransmit
92
+ end
93
+
94
+ def close(examples_count)
95
+ @closing = true
96
+ @examples_count = examples_count
97
+ Buildkite::TestCollector.logger.debug("closing socket connection")
98
+
99
+ # Because the server only sends us confirmations after every 10mb of
100
+ # data it uploads to S3, we'll never get confirmation of the
101
+ # identifiers of the last upload part unless we send an explicit finish,
102
+ # to which the server will respond with the last bits of data
103
+ send_eot
104
+
105
+ # After EOT, we wait for 75 seconds for the send queue to be drained and for the
106
+ # server to confirm the last idents. If everything has already been confirmed we can
107
+ # proceed without waiting.
108
+ @idents_mutex.synchronize do
109
+ if @unconfirmed_idents.any?
110
+ Buildkite::TestCollector.logger.debug "Waiting for Buildkite Test Analytics to send results..."
111
+ Buildkite::TestCollector.logger.debug("waiting for last confirm")
112
+
113
+ @empty.wait(@idents_mutex, CONFIRMATION_TIMEOUT)
114
+ end
115
+ end
116
+
117
+ # Then we always disconnect cos we can't wait forever? 🤷‍♀️
118
+ @connection.close
119
+ # We kill the write thread cos it's got a while loop in it, so it won't finish otherwise
120
+ @write_thread&.kill
121
+
122
+ Buildkite::TestCollector.logger.info "Buildkite Test Analytics completed"
123
+ Buildkite::TestCollector.logger.debug("socket connection closed")
124
+ end
125
+
126
+ def handle(_connection, data)
127
+ data = JSON_PARSE.call(data)
128
+ case data["type"]
129
+ when "ping"
130
+ # In absence of other message, the server sends us a ping every 3 seconds
131
+ # We are currently not doing anything with these
132
+ Buildkite::TestCollector.logger.debug("received ping")
133
+ when "welcome", "confirm_subscription"
134
+ # Push these two messages onto the queue, so that we block on waiting for the
135
+ # initializing phase to complete
136
+ @establish_subscription_queue.push(data)
137
+ Buildkite::TestCollector.logger.debug("received #{data['type']}")
138
+ when "reject_subscription"
139
+ Buildkite::TestCollector.logger.debug("received rejected_subscription")
140
+ raise RejectedSubscription
141
+ else
142
+ process_message(data)
143
+ end
144
+ end
145
+
146
+ def write_result(result)
147
+ queue_and_track_result(result.id, result.as_hash)
148
+
149
+ Buildkite::TestCollector.logger.debug("added #{result.id} to send queue")
150
+ end
151
+
152
+ def unconfirmed_idents_count
153
+ @idents_mutex.synchronize do
154
+ @unconfirmed_idents.count
155
+ end
156
+ end
157
+
158
+ private
159
+
160
+ def connect
161
+ Buildkite::TestCollector.logger.debug("starting socket connection process")
162
+
163
+ @connection = SocketConnection.new(self, @url, {
164
+ "Authorization" => @authorization_header,
165
+ })
166
+
167
+ wait_for_welcome
168
+
169
+ @connection.transmit({
170
+ "command" => "subscribe",
171
+ "identifier" => @channel
172
+ })
173
+
174
+ wait_for_confirm
175
+
176
+ Buildkite::TestCollector.logger.info "Connected to Buildkite Test Analytics!"
177
+ Buildkite::TestCollector.logger.debug("connected")
178
+ end
179
+
180
+ def init_write_thread
181
+ # As this method can be called multiple times in the
182
+ # reconnection process, kill prev write threads (if any) before
183
+ # setting up the new one
184
+ @write_thread&.kill
185
+
186
+ @write_thread = Thread.new do
187
+ Buildkite::TestCollector.logger.debug("hello from write thread")
188
+ # Pretty sure this eternal loop is fine cos the call to queue.pop is blocking
189
+ loop do
190
+ data = @send_queue.pop
191
+ message_type = data["action"]
192
+
193
+ if message_type == "end_of_transmission"
194
+ # Because of the unpredictable sequencing between the test suite finishing
195
+ # (EOT gets queued) and disconnections happening (retransmit results gets
196
+ # queued), we don't want to send an EOT before any retransmits are sent.
197
+ if @send_queue.length > 0
198
+ @send_queue << data
199
+ Buildkite::TestCollector.logger.debug("putting eot at back of queue")
200
+ next
201
+ end
202
+ @eot_queued_mutex.synchronize do
203
+ @eot_queued = false
204
+ end
205
+ end
206
+
207
+ @connection.transmit({
208
+ "identifier" => @channel,
209
+ "command" => "message",
210
+ "data" => data.to_json
211
+ })
212
+
213
+ if Buildkite::TestCollector.debug_enabled
214
+ ids = if message_type == "record_results"
215
+ data["results"].map { |result| result["id"] }
216
+ end
217
+ Buildkite::TestCollector.logger.debug("transmitted #{message_type} #{ids}")
218
+ end
219
+ end
220
+ end
221
+ end
222
+
223
+ def pop_with_timeout(message_type)
224
+ Timeout.timeout(30, Buildkite::TestCollector::TimeoutError, "Timeout: Waited 30 seconds for #{message_type}") do
225
+ @establish_subscription_queue.pop
226
+ end
227
+ end
228
+
229
+ def wait_for_welcome
230
+ welcome = pop_with_timeout("welcome")
231
+
232
+ if welcome && welcome != { "type" => "welcome" }
233
+ raise InitialConnectionFailure.new("Wrong message received, expected a welcome, but received: #{welcome.inspect}")
234
+ end
235
+ end
236
+
237
+ def wait_for_confirm
238
+ confirm = pop_with_timeout("confirm")
239
+
240
+ if confirm && confirm != { "type" => "confirm_subscription", "identifier" => @channel }
241
+ raise InitialConnectionFailure.new("Wrong message received, expected a confirm, but received: #{confirm.inspect}")
242
+ end
243
+ end
244
+
245
+ def queue_and_track_result(ident, result_as_hash)
246
+ @idents_mutex.synchronize do
247
+ @unconfirmed_idents[ident] = result_as_hash
248
+
249
+ @send_queue << {
250
+ "action" => "record_results",
251
+ "results" => [result_as_hash]
252
+ }
253
+ end
254
+ end
255
+
256
+ def confirm_idents(idents)
257
+ retransmit_required = @closing
258
+
259
+ @idents_mutex.synchronize do
260
+ # Remove received idents from unconfirmed_idents
261
+ idents.each { |key| @unconfirmed_idents.delete(key) }
262
+
263
+ Buildkite::TestCollector.logger.debug("received confirm for indentifiers: #{idents}")
264
+
265
+ # This @empty ConditionVariable broadcasts every time that @unconfirmed_idents is
266
+ # empty, which will happen about every 10mb of data as that's when the server
267
+ # sends back confirmations.
268
+ #
269
+ # However, there aren't any threads waiting on this signal until after we
270
+ # send the EOT message, so the prior broadcasts shouldn't do anything.
271
+ if @unconfirmed_idents.empty?
272
+ @empty.broadcast
273
+
274
+ retransmit_required = false
275
+
276
+ Buildkite::TestCollector.logger.debug("all identifiers have been confirmed")
277
+ else
278
+ Buildkite::TestCollector.logger.debug("still waiting on confirm for identifiers: #{@unconfirmed_idents.keys}")
279
+ end
280
+ end
281
+
282
+ # If we're closing, any unconfirmed results need to be retransmitted.
283
+ retransmit if retransmit_required
284
+ end
285
+
286
+ def send_eot
287
+ @eot_queued_mutex.synchronize do
288
+ return if @eot_queued
289
+
290
+ @send_queue << {
291
+ "action" => "end_of_transmission",
292
+ "examples_count" => @examples_count.to_json
293
+ }
294
+ @eot_queued = true
295
+
296
+ Buildkite::TestCollector.logger.debug("added EOT to send queue")
297
+ end
298
+ end
299
+
300
+ def process_message(data)
301
+ # Check we're getting the data we expect
302
+ return unless data["identifier"] == @channel
303
+
304
+ case
305
+ when data["message"].key?("confirm")
306
+ confirm_idents(data["message"]["confirm"])
307
+ else
308
+ # unhandled message
309
+ Buildkite::TestCollector.logger.debug("received unhandled message #{data["message"]}")
310
+ end
311
+ end
312
+
313
+ def retransmit
314
+ @idents_mutex.synchronize do
315
+ results = @unconfirmed_idents.values
316
+
317
+ # queue the contents of the buffer, unless it's empty
318
+ if results.any?
319
+ @send_queue << {
320
+ "action" => "record_results",
321
+ "results" => results
322
+ }
323
+
324
+ Buildkite::TestCollector.logger.debug("queueing up retransmitted results #{@unconfirmed_idents.keys}")
325
+ end
326
+ end
327
+
328
+ # if we were disconnected in the closing phase, then resend the EOT
329
+ # message so the server can persist the last upload part
330
+ send_eot if @closing
331
+ end
332
+ end
333
+ end
@@ -2,6 +2,8 @@
2
2
 
3
3
  module Buildkite::TestCollector
4
4
  class Uploader
5
+ MAX_UPLOAD_ATTEMPTS = 3
6
+
5
7
  def self.traces
6
8
  @traces ||= {}
7
9
  end
@@ -17,6 +19,14 @@ module Buildkite::TestCollector
17
19
  EOFError
18
20
  ]
19
21
 
22
+ RETRYABLE_UPLOAD_ERRORS = [
23
+ Net::ReadTimeout,
24
+ Net::OpenTimeout,
25
+ OpenSSL::SSL::SSLError,
26
+ OpenSSL::SSL::SSLErrorWaitReadable,
27
+ EOFError
28
+ ]
29
+
20
30
  def self.configure
21
31
  Buildkite::TestCollector.logger.debug("hello from main thread")
22
32
 
@@ -38,7 +48,7 @@ module Buildkite::TestCollector
38
48
  json = JSON.parse(response.body)
39
49
 
40
50
  if (socket_url = json["cable"]) && (channel = json["channel"])
41
- Buildkite::TestCollector.session = Buildkite::TestCollector::Session.new(socket_url, http.authorization_header, channel)
51
+ Buildkite::TestCollector.session = Buildkite::TestCollector::SocketSession.new(socket_url, http.authorization_header, channel)
42
52
  end
43
53
  else
44
54
  request_id = response.to_hash["x-request-id"]
@@ -54,5 +64,22 @@ module Buildkite::TestCollector
54
64
  def self.tracer
55
65
  Thread.current[:_buildkite_tracer]
56
66
  end
67
+
68
+ def self.upload(data)
69
+ return false unless Buildkite::TestCollector.api_token
70
+
71
+ http = Buildkite::TestCollector::HTTPClient.new(Buildkite::TestCollector.url)
72
+
73
+ Thread.new do
74
+ response = begin
75
+ upload_attempts ||= 0
76
+ http.post_json(data)
77
+ rescue *Buildkite::TestCollector::Uploader::RETRYABLE_UPLOAD_ERRORS => e
78
+ if (upload_attempts += 1) < MAX_UPLOAD_ATTEMPTS
79
+ retry
80
+ end
81
+ end
82
+ end
83
+ end
57
84
  end
58
85
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Buildkite
4
4
  module TestCollector
5
- VERSION = "1.5.0"
5
+ VERSION = "2.0.0.pre"
6
6
  NAME = "buildkite-test_collector"
7
7
  end
8
8
  end
@@ -30,11 +30,13 @@ require_relative "test_collector/network"
30
30
  require_relative "test_collector/object"
31
31
  require_relative "test_collector/tracer"
32
32
  require_relative "test_collector/socket_connection"
33
+ require_relative "test_collector/socket_session"
33
34
  require_relative "test_collector/session"
34
35
 
35
36
  module Buildkite
36
37
  module TestCollector
37
38
  DEFAULT_URL = "https://analytics-api.buildkite.com/v1/uploads"
39
+ DEFAULT_UPLOAD_BATCH_SIZE = 500
38
40
 
39
41
  class << self
40
42
  attr_accessor :api_token
@@ -45,6 +47,7 @@ module Buildkite
45
47
  attr_accessor :tracing_enabled
46
48
  attr_accessor :artifact_path
47
49
  attr_accessor :env
50
+ attr_accessor :batch_size
48
51
  end
49
52
 
50
53
  def self.configure(hook:, token: nil, url: nil, debug_enabled: false, tracing_enabled: true, artifact_path: nil, env: {})
@@ -54,7 +57,7 @@ module Buildkite
54
57
  self.tracing_enabled = tracing_enabled
55
58
  self.artifact_path = artifact_path
56
59
  self.env = env
57
-
60
+ self.batch_size = ENV.fetch("BUILDKITE_ANALYTICS_UPLOAD_BATCH_SIZE") { DEFAULT_UPLOAD_BATCH_SIZE }.to_i
58
61
  self.hook_into(hook)
59
62
  end
60
63
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: buildkite-test_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 2.0.0.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - Buildkite
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-29 00:00:00.000000000 Z
11
+ date: 2023-03-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -66,7 +66,7 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '3.10'
69
- description:
69
+ description:
70
70
  email:
71
71
  - support+analytics@buildkite.com
72
72
  executables: []
@@ -104,6 +104,7 @@ files:
104
104
  - lib/buildkite/test_collector/rspec_plugin/trace.rb
105
105
  - lib/buildkite/test_collector/session.rb
106
106
  - lib/buildkite/test_collector/socket_connection.rb
107
+ - lib/buildkite/test_collector/socket_session.rb
107
108
  - lib/buildkite/test_collector/tracer.rb
108
109
  - lib/buildkite/test_collector/uploader.rb
109
110
  - lib/buildkite/test_collector/version.rb
@@ -114,7 +115,7 @@ licenses:
114
115
  metadata:
115
116
  homepage_uri: https://github.com/buildkite/test-collector-ruby
116
117
  source_code_uri: https://github.com/buildkite/test-collector-ruby
117
- post_install_message:
118
+ post_install_message:
118
119
  rdoc_options: []
119
120
  require_paths:
120
121
  - lib
@@ -125,12 +126,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
125
126
  version: 2.3.0
126
127
  required_rubygems_version: !ruby/object:Gem::Requirement
127
128
  requirements:
128
- - - ">="
129
+ - - ">"
129
130
  - !ruby/object:Gem::Version
130
- version: '0'
131
+ version: 1.3.1
131
132
  requirements: []
132
- rubygems_version: 3.4.1
133
- signing_key:
133
+ rubygems_version: 3.1.6
134
+ signing_key:
134
135
  specification_version: 4
135
136
  summary: Track test executions and report to Buildkite Test Analytics
136
137
  test_files: []