buildkite-test_collector 1.4.2 → 2.0.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c89588cb63374f9a2bd84b29cd575133aa84c2a684e10e0e7d78dd63a54ad34e
4
- data.tar.gz: a05196237e4d03ae61f9953c94de3d54253a40c89a6f0557c3fa8e42b800e163
3
+ metadata.gz: b7fc00254b8a2002e8680ca265b00f5b2700dace5d6948aa56f779102f8ce0c6
4
+ data.tar.gz: 6c05b872c740262ca0c66f4d6c09b5b253a366466241513b74706c8d0a56328a
5
5
  SHA512:
6
- metadata.gz: ab0e5a4fc6a21adc7f8f240331f5a80b27fb7442ca1ae5f81b5d0721e4143007ee0b4c4d1b1122c83c0e1aeaa5bea48167a6b2c3c92b3ad4d1100f7e768e607e
7
- data.tar.gz: d7e0ea4e7ad204b9e5e6d11136008580a435bfe43e7ea9784926e6af60c0347dd556dd4d4410e542e29eb3c7a75be0fae3f6164a2bfbd7c451d37f121f62b932
6
+ metadata.gz: 2f646c1a4564960ba78494efef9ec64fa43eb9a4107d7c004884d977649d5cdf07aa19abec49b887d046d1c7a15ec7b61afbde60f4b12769c77f7578ba55df8e
7
+ data.tar.gz: b80f9fd21f045f396bceea9af860e471191669dc80b1d3e57ae38c5d1b7dcebe3fdfc1646774af043d6b8f243cd7335c9b8f044aff9232146e3c0a4dad8a58d8
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## v2.0.0.pre
4
+
5
+ - Major change: RSpec plugin to use HTTP Upload API instead of websocket connection to send test data #174 #175 - @niceking
6
+ - `identifier` field removed from trace #176 - @amybiyuliu
7
+
8
+ ## v1.5.0
9
+
10
+ - Send `failure_expanded` from minitest #171 - @nprizal
11
+
3
12
  ## v1.4.2
4
13
 
5
14
  - Update collector argument in the Analytics::API payload #170 - @KatieWright26
data/Gemfile.lock CHANGED
@@ -1,23 +1,23 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- buildkite-test_collector (1.4.2)
4
+ buildkite-test_collector (2.0.0.pre)
5
5
  activesupport (>= 4.2)
6
6
  websocket (~> 1.2)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
- activesupport (7.0.4.1)
11
+ activesupport (7.0.4.2)
12
12
  concurrent-ruby (~> 1.0, >= 1.0.2)
13
13
  i18n (>= 1.6, < 2)
14
14
  minitest (>= 5.1)
15
15
  tzinfo (~> 2.0)
16
- concurrent-ruby (1.1.10)
16
+ concurrent-ruby (1.2.2)
17
17
  diff-lcs (1.4.4)
18
18
  i18n (1.12.0)
19
19
  concurrent-ruby (~> 1.0)
20
- minitest (5.17.0)
20
+ minitest (5.18.0)
21
21
  rake (13.0.6)
22
22
  rspec (3.10.0)
23
23
  rspec-core (~> 3.10.0)
@@ -32,7 +32,7 @@ GEM
32
32
  diff-lcs (>= 1.2.0, < 2.0)
33
33
  rspec-support (~> 3.10.0)
34
34
  rspec-support (3.10.3)
35
- tzinfo (2.0.5)
35
+ tzinfo (2.0.6)
36
36
  concurrent-ruby (~> 1.0)
37
37
  websocket (1.2.9)
38
38
 
@@ -47,4 +47,4 @@ DEPENDENCIES
47
47
  rspec-expectations (~> 3.10)
48
48
 
49
49
  BUNDLED WITH
50
- 2.2.22
50
+ 2.3.25
@@ -28,6 +28,28 @@ module Buildkite::TestCollector
28
28
  http.request(contact)
29
29
  end
30
30
 
31
+ def post_json(data)
32
+ contact_uri = URI.parse(url)
33
+
34
+ http = Net::HTTP.new(contact_uri.host, contact_uri.port)
35
+ http.use_ssl = contact_uri.scheme == "https"
36
+
37
+ contact = Net::HTTP::Post.new(contact_uri.path, {
38
+ "Authorization" => authorization_header,
39
+ "Content-Type" => "application/json",
40
+ })
41
+
42
+ data_set = data.map(&:as_hash)
43
+
44
+ contact.body = {
45
+ run_env: Buildkite::TestCollector::CI.env,
46
+ format: "json",
47
+ data: data_set
48
+ }.to_json
49
+
50
+ http.request(contact)
51
+ end
52
+
31
53
  private
32
54
 
33
55
  attr :url
@@ -11,8 +11,6 @@ Buildkite::TestCollector.uploader = Buildkite::TestCollector::Uploader
11
11
  RSpec.configure do |config|
12
12
  config.before(:suite) do
13
13
  config.add_formatter Buildkite::TestCollector::RSpecPlugin::Reporter
14
-
15
- Buildkite::TestCollector.safe { Buildkite::TestCollector::Uploader.configure }
16
14
  end
17
15
 
18
16
  config.around(:each) do |example|
@@ -15,12 +15,10 @@ module Buildkite::TestCollector::MinitestPlugin
15
15
 
16
16
  FILE_PATH_REGEX = /^(.*?\.(rb|feature))/
17
17
 
18
- def initialize(example, history:, failure_reason: nil, failure_expanded: [])
18
+ def initialize(example, history:)
19
19
  @id = SecureRandom.uuid
20
20
  @example = example
21
21
  @history = history
22
- @failure_reason = failure_reason
23
- @failure_expanded = failure_expanded
24
22
  end
25
23
 
26
24
  def result
@@ -36,7 +34,6 @@ module Buildkite::TestCollector::MinitestPlugin
36
34
  id: id,
37
35
  scope: example.class.name,
38
36
  name: example.name,
39
- identifier: identifier,
40
37
  location: location,
41
38
  file_name: file_name,
42
39
  result: result,
@@ -53,7 +50,6 @@ module Buildkite::TestCollector::MinitestPlugin
53
50
  "#{file_name}:#{line_number}"
54
51
  end
55
52
  end
56
- alias_method :identifier, :location
57
53
 
58
54
  def file_name
59
55
  @file_name ||= File.join('./', source_location[0].delete_prefix(project_dir))
@@ -72,17 +68,20 @@ module Buildkite::TestCollector::MinitestPlugin
72
68
  end
73
69
 
74
70
  def failure_reason
75
- @failure_reason ||= example.failure&.message
71
+ @failure_reason ||= strip_invalid_utf8_chars(example.failure&.message)&.split("\n")&.first
76
72
  end
77
73
 
78
74
  def failure_expanded
79
- @failure_expanded ||= begin
80
- example.failures.map do |failure|
81
- {
82
- expanded: failure.message,
83
- backtrace: failure.backtrace,
84
- }
85
- end
75
+ @failure_expanded ||= example.failures.map.with_index do |failure, index|
76
+ # remove the first line of message from the first failure
77
+ # to avoid duplicate line in Test Analytics UI
78
+ messages = strip_invalid_utf8_chars(failure.message).split("\n")
79
+ messages = messages[1..] if index.zero?
80
+
81
+ {
82
+ expanded: messages,
83
+ backtrace: failure.backtrace
84
+ }
86
85
  end
87
86
  end
88
87
 
@@ -7,6 +7,7 @@ module Buildkite::TestCollector::RSpecPlugin
7
7
  attr_reader :output
8
8
 
9
9
  def initialize(output)
10
+ Buildkite::TestCollector.session = Buildkite::TestCollector::Session.new
10
11
  @output = output
11
12
  end
12
13
 
@@ -19,21 +20,13 @@ module Buildkite::TestCollector::RSpecPlugin
19
20
  if example.execution_result.status == :failed
20
21
  trace.failure_reason, trace.failure_expanded = failure_info(notification)
21
22
  end
22
- Buildkite::TestCollector.session&.write_result(trace)
23
+ Buildkite::TestCollector.session.add_example_to_send_queue(example.id)
23
24
  end
24
25
  end
25
26
 
26
- def dump_summary(notification)
27
- if Buildkite::TestCollector.session.present?
28
- examples_count = {
29
- examples: notification.examples.count,
30
- failed: notification.failed_examples.count,
31
- pending: notification.pending_examples.count,
32
- errors_outside_examples: notification.errors_outside_of_examples_count
33
- }
34
-
35
- Buildkite::TestCollector.session.close(examples_count)
36
- end
27
+ def dump_summary(_notification)
28
+ Buildkite::TestCollector.session.send_remaining_data
29
+ Buildkite::TestCollector.session.close
37
30
  end
38
31
 
39
32
  alias_method :example_passed, :handle_example
@@ -28,7 +28,6 @@ module Buildkite::TestCollector::RSpecPlugin
28
28
  id: id,
29
29
  scope: example.example_group.metadata[:full_description],
30
30
  name: example.description,
31
- identifier: example.id,
32
31
  location: example.location,
33
32
  file_name: file_name,
34
33
  result: result,
@@ -2,332 +2,56 @@
2
2
 
3
3
  module Buildkite::TestCollector
4
4
  class Session
5
- # Picked 75 as the magic timeout number as it's longer than the TCP timeout of 60s 🤷‍♀️
6
- CONFIRMATION_TIMEOUT = ENV.fetch("BUILDKITE_ANALYTICS_CONFIRMATION_TIMEOUT") { 75 }.to_i
7
- MAX_RECONNECTION_ATTEMPTS = ENV.fetch("BUILDKITE_ANALYTICS_RECONNECTION_ATTEMPTS") { 3 }.to_i
8
- WAIT_BETWEEN_RECONNECTIONS = ENV.fetch("BUILDKITE_ANALYTICS_RECONNECTION_WAIT") { 5 }.to_i
5
+ UPLOAD_THREAD_TIMEOUT = 60
6
+ UPLOAD_SESSION_TIMEOUT = 60
7
+ UPLOAD_API_MAX_RESULTS = 5000
9
8
 
10
- # We keep a private reference so that mocking libraries won't break JSON
11
- JSON_PARSE = JSON.method(:parse)
12
- private_constant :JSON_PARSE
13
-
14
- class RejectedSubscription < StandardError; end
15
- class InitialConnectionFailure < StandardError; end
16
-
17
- DISCONNECTED_EXCEPTIONS = [
18
- Buildkite::TestCollector::SocketConnection::HandshakeError,
19
- Buildkite::TestCollector::TimeoutError,
20
- Buildkite::TestCollector::SocketConnection::SocketError,
21
- RejectedSubscription,
22
- InitialConnectionFailure,
23
- ]
24
-
25
- def initialize(url, authorization_header, channel)
26
- @establish_subscription_queue = Queue.new
27
- @channel = channel
28
-
29
- @unconfirmed_idents = {}
30
- @idents_mutex = Mutex.new
31
- @send_queue = Queue.new
32
- @empty = ConditionVariable.new
33
- @closing = false
34
- @eot_queued = false
35
- @eot_queued_mutex = Mutex.new
36
- @reconnection_mutex = Mutex.new
37
-
38
- @url = url
39
- @authorization_header = authorization_header
40
-
41
- reconnection_count = 0
42
-
43
- begin
44
- reconnection_count += 1
45
- connect
46
- rescue Buildkite::TestCollector::TimeoutError, InitialConnectionFailure => e
47
- Buildkite::TestCollector.logger.warn("buildkite-test_collector could not establish an initial connection with Buildkite due to #{e}. Attempting retry #{reconnection_count} of #{MAX_RECONNECTION_ATTEMPTS}...")
48
- if reconnection_count > MAX_RECONNECTION_ATTEMPTS
49
- Buildkite::TestCollector.logger.error "buildkite-test_collector could not establish an initial connection with Buildkite due to #{e.message} after #{MAX_RECONNECTION_ATTEMPTS} attempts. You may be missing some data for this test suite, please contact support if this issue persists."
50
- else
51
- sleep(WAIT_BETWEEN_RECONNECTIONS)
52
- Buildkite::TestCollector.logger.warn("retrying reconnection")
53
- retry
54
- end
55
- end
56
- init_write_thread
9
+ def initialize
10
+ @send_queue_ids = []
11
+ @upload_threads = []
57
12
  end
58
13
 
59
- def disconnected(connection)
60
- @reconnection_mutex.synchronize do
61
- # When the first thread detects a disconnection, it calls the disconnect method
62
- # with the current connection. This thread grabs the reconnection mutex and does the
63
- # reconnection, which then updates the value of @connection.
64
- #
65
- # At some point in that process, the second thread would have detected the
66
- # disconnection too, and it also calls it with the current connection. However, the
67
- # second thread can't run the reconnection code because of the mutex. By the
68
- # time the mutex is released, the value of @connection has been refreshed, and so
69
- # the second thread returns early and does not reattempt the reconnection.
70
- return unless connection == @connection
71
- Buildkite::TestCollector.logger.debug("starting reconnection")
72
-
73
- reconnection_count = 0
14
+ def add_example_to_send_queue(id)
15
+ @send_queue_ids << id
74
16
 
75
- begin
76
- reconnection_count += 1
77
- connect
78
- init_write_thread
79
- rescue *DISCONNECTED_EXCEPTIONS => e
80
- Buildkite::TestCollector.logger.warn("failed reconnection attempt #{reconnection_count} due to #{e}")
81
- if reconnection_count > MAX_RECONNECTION_ATTEMPTS
82
- Buildkite::TestCollector.logger.error "buildkite-test_collector experienced a disconnection and could not reconnect to Buildkite due to #{e.message}. Please contact support."
83
- raise e
84
- else
85
- sleep(WAIT_BETWEEN_RECONNECTIONS)
86
- Buildkite::TestCollector.logger.warn("retrying reconnection")
87
- retry
88
- end
89
- end
17
+ if @send_queue_ids.size >= Buildkite::TestCollector.batch_size
18
+ send_ids = @send_queue_ids.shift(Buildkite::TestCollector.batch_size)
19
+ upload_data(send_ids)
90
20
  end
91
- retransmit
92
21
  end
93
22
 
94
- def close(examples_count)
95
- @closing = true
96
- @examples_count = examples_count
97
- Buildkite::TestCollector.logger.debug("closing socket connection")
23
+ def send_remaining_data
24
+ return if @send_queue_ids.empty?
98
25
 
99
- # Because the server only sends us confirmations after every 10mb of
100
- # data it uploads to S3, we'll never get confirmation of the
101
- # identifiers of the last upload part unless we send an explicit finish,
102
- # to which the server will respond with the last bits of data
103
- send_eot
104
-
105
- # After EOT, we wait for 75 seconds for the send queue to be drained and for the
106
- # server to confirm the last idents. If everything has already been confirmed we can
107
- # proceed without waiting.
108
- @idents_mutex.synchronize do
109
- if @unconfirmed_idents.any?
110
- Buildkite::TestCollector.logger.debug "Waiting for Buildkite Test Analytics to send results..."
111
- Buildkite::TestCollector.logger.debug("waiting for last confirm")
112
-
113
- @empty.wait(@idents_mutex, CONFIRMATION_TIMEOUT)
114
- end
115
- end
116
-
117
- # Then we always disconnect cos we can't wait forever? 🤷‍♀️
118
- @connection.close
119
- # We kill the write thread cos it's got a while loop in it, so it won't finish otherwise
120
- @write_thread&.kill
121
-
122
- Buildkite::TestCollector.logger.info "Buildkite Test Analytics completed"
123
- Buildkite::TestCollector.logger.debug("socket connection closed")
26
+ upload_data(@send_queue_ids)
124
27
  end
125
28
 
126
- def handle(_connection, data)
127
- data = JSON_PARSE.call(data)
128
- case data["type"]
129
- when "ping"
130
- # In absence of other message, the server sends us a ping every 3 seconds
131
- # We are currently not doing anything with these
132
- Buildkite::TestCollector.logger.debug("received ping")
133
- when "welcome", "confirm_subscription"
134
- # Push these two messages onto the queue, so that we block on waiting for the
135
- # initializing phase to complete
136
- @establish_subscription_queue.push(data)
137
- Buildkite::TestCollector.logger.debug("received #{data['type']}")
138
- when "reject_subscription"
139
- Buildkite::TestCollector.logger.debug("received rejected_subscription")
140
- raise RejectedSubscription
141
- else
142
- process_message(data)
143
- end
144
- end
29
+ def close
30
+ # There are two thread joins here, because the inner join will wait up to
31
+ # UPLOAD_THREAD_TIMEOUT seconds PER thread that is uploading data, i.e.
32
+ # n_threads x UPLOAD_THREAD_TIMEOUT latency if Buildkite happens to be
33
+ # down. By wrapping that in an outer thread join with the
34
+ # UPLOAD_SESSION_TIMEOUT, we ensure that we only wait a max of
35
+ # UPLOAD_SESSION_TIMEOUT seconds before the session exits.
36
+ Thread.new do
37
+ @upload_threads.each { |t| t.join(UPLOAD_THREAD_TIMEOUT) }
38
+ end.join(UPLOAD_SESSION_TIMEOUT)
145
39
 
146
- def write_result(result)
147
- queue_and_track_result(result.id, result.as_hash)
148
-
149
- Buildkite::TestCollector.logger.debug("added #{result.id} to send queue")
150
- end
151
-
152
- def unconfirmed_idents_count
153
- @idents_mutex.synchronize do
154
- @unconfirmed_idents.count
155
- end
40
+ @upload_threads.each { |t| t&.kill }
156
41
  end
157
42
 
158
43
  private
159
44
 
160
- def connect
161
- Buildkite::TestCollector.logger.debug("starting socket connection process")
162
-
163
- @connection = SocketConnection.new(self, @url, {
164
- "Authorization" => @authorization_header,
165
- })
166
-
167
- wait_for_welcome
168
-
169
- @connection.transmit({
170
- "command" => "subscribe",
171
- "identifier" => @channel
172
- })
173
-
174
- wait_for_confirm
175
-
176
- Buildkite::TestCollector.logger.info "Connected to Buildkite Test Analytics!"
177
- Buildkite::TestCollector.logger.debug("connected")
178
- end
179
-
180
- def init_write_thread
181
- # As this method can be called multiple times in the
182
- # reconnection process, kill prev write threads (if any) before
183
- # setting up the new one
184
- @write_thread&.kill
45
+ def upload_data(ids)
46
+ data = Buildkite::TestCollector.uploader.traces.values_at(*ids).compact
185
47
 
186
- @write_thread = Thread.new do
187
- Buildkite::TestCollector.logger.debug("hello from write thread")
188
- # Pretty sure this eternal loop is fine cos the call to queue.pop is blocking
189
- loop do
190
- data = @send_queue.pop
191
- message_type = data["action"]
192
-
193
- if message_type == "end_of_transmission"
194
- # Because of the unpredictable sequencing between the test suite finishing
195
- # (EOT gets queued) and disconnections happening (retransmit results gets
196
- # queued), we don't want to send an EOT before any retransmits are sent.
197
- if @send_queue.length > 0
198
- @send_queue << data
199
- Buildkite::TestCollector.logger.debug("putting eot at back of queue")
200
- next
201
- end
202
- @eot_queued_mutex.synchronize do
203
- @eot_queued = false
204
- end
205
- end
206
-
207
- @connection.transmit({
208
- "identifier" => @channel,
209
- "command" => "message",
210
- "data" => data.to_json
211
- })
212
-
213
- if Buildkite::TestCollector.debug_enabled
214
- ids = if message_type == "record_results"
215
- data["results"].map { |result| result["id"] }
216
- end
217
- Buildkite::TestCollector.logger.debug("transmitted #{message_type} #{ids}")
218
- end
219
- end
220
- end
221
- end
222
-
223
- def pop_with_timeout(message_type)
224
- Timeout.timeout(30, Buildkite::TestCollector::TimeoutError, "Timeout: Waited 30 seconds for #{message_type}") do
225
- @establish_subscription_queue.pop
48
+ # we do this in batches of UPLOAD_API_MAX_RESULTS in case the number of
49
+ # results exceeds this due to a bug, or user error in configuring the
50
+ # batch size
51
+ data.each_slice(UPLOAD_API_MAX_RESULTS) do |batch|
52
+ new_thread = Buildkite::TestCollector::Uploader.upload(batch)
53
+ @upload_threads << new_thread if new_thread
226
54
  end
227
55
  end
228
-
229
- def wait_for_welcome
230
- welcome = pop_with_timeout("welcome")
231
-
232
- if welcome && welcome != { "type" => "welcome" }
233
- raise InitialConnectionFailure.new("Wrong message received, expected a welcome, but received: #{welcome.inspect}")
234
- end
235
- end
236
-
237
- def wait_for_confirm
238
- confirm = pop_with_timeout("confirm")
239
-
240
- if confirm && confirm != { "type" => "confirm_subscription", "identifier" => @channel }
241
- raise InitialConnectionFailure.new("Wrong message received, expected a confirm, but received: #{confirm.inspect}")
242
- end
243
- end
244
-
245
- def queue_and_track_result(ident, result_as_hash)
246
- @idents_mutex.synchronize do
247
- @unconfirmed_idents[ident] = result_as_hash
248
-
249
- @send_queue << {
250
- "action" => "record_results",
251
- "results" => [result_as_hash]
252
- }
253
- end
254
- end
255
-
256
- def confirm_idents(idents)
257
- retransmit_required = @closing
258
-
259
- @idents_mutex.synchronize do
260
- # Remove received idents from unconfirmed_idents
261
- idents.each { |key| @unconfirmed_idents.delete(key) }
262
-
263
- Buildkite::TestCollector.logger.debug("received confirm for indentifiers: #{idents}")
264
-
265
- # This @empty ConditionVariable broadcasts every time that @unconfirmed_idents is
266
- # empty, which will happen about every 10mb of data as that's when the server
267
- # sends back confirmations.
268
- #
269
- # However, there aren't any threads waiting on this signal until after we
270
- # send the EOT message, so the prior broadcasts shouldn't do anything.
271
- if @unconfirmed_idents.empty?
272
- @empty.broadcast
273
-
274
- retransmit_required = false
275
-
276
- Buildkite::TestCollector.logger.debug("all identifiers have been confirmed")
277
- else
278
- Buildkite::TestCollector.logger.debug("still waiting on confirm for identifiers: #{@unconfirmed_idents.keys}")
279
- end
280
- end
281
-
282
- # If we're closing, any unconfirmed results need to be retransmitted.
283
- retransmit if retransmit_required
284
- end
285
-
286
- def send_eot
287
- @eot_queued_mutex.synchronize do
288
- return if @eot_queued
289
-
290
- @send_queue << {
291
- "action" => "end_of_transmission",
292
- "examples_count" => @examples_count.to_json
293
- }
294
- @eot_queued = true
295
-
296
- Buildkite::TestCollector.logger.debug("added EOT to send queue")
297
- end
298
- end
299
-
300
- def process_message(data)
301
- # Check we're getting the data we expect
302
- return unless data["identifier"] == @channel
303
-
304
- case
305
- when data["message"].key?("confirm")
306
- confirm_idents(data["message"]["confirm"])
307
- else
308
- # unhandled message
309
- Buildkite::TestCollector.logger.debug("received unhandled message #{data["message"]}")
310
- end
311
- end
312
-
313
- def retransmit
314
- @idents_mutex.synchronize do
315
- results = @unconfirmed_idents.values
316
-
317
- # queue the contents of the buffer, unless it's empty
318
- if results.any?
319
- @send_queue << {
320
- "action" => "record_results",
321
- "results" => results
322
- }
323
-
324
- Buildkite::TestCollector.logger.debug("queueing up retransmitted results #{@unconfirmed_idents.keys}")
325
- end
326
- end
327
-
328
- # if we were disconnected in the closing phase, then resend the EOT
329
- # message so the server can persist the last upload part
330
- send_eot if @closing
331
- end
332
56
  end
333
57
  end
@@ -73,12 +73,15 @@ module Buildkite::TestCollector
73
73
  end
74
74
  end
75
75
  # These get re-raise from session, we should fail gracefully
76
- rescue *Buildkite::TestCollector::Session::DISCONNECTED_EXCEPTIONS => e
76
+ rescue *Buildkite::TestCollector::SocketSession::DISCONNECTED_EXCEPTIONS => e
77
77
  Buildkite::TestCollector.logger.error("We could not establish a connection with Buildkite Test Analytics. The error was: #{e.message}. If this is a problem, please contact support.")
78
- rescue EOFError => e
78
+ rescue EOFError, OpenSSL::SSL::SSLError => e
79
+ # https://github.com/buildkite/test-collector-ruby/pull/147#issuecomment-1250485611
80
+ raise if e.class == OpenSSL::SSL::SSLError && e.message != "SSL_read: unexpected eof while reading"
81
+
79
82
  Buildkite::TestCollector.logger.warn("#{e}")
80
83
  if @socket
81
- Buildkite::TestCollector.logger.error("attempting disconnected flow")
84
+ Buildkite::TestCollector.logger.warn("attempting disconnected flow")
82
85
  @session.disconnected(self)
83
86
  disconnect
84
87
  end
@@ -0,0 +1,333 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Buildkite::TestCollector
4
+ class SocketSession
5
+ # Picked 75 as the magic timeout number as it's longer than the TCP timeout of 60s 🤷‍♀️
6
+ CONFIRMATION_TIMEOUT = ENV.fetch("BUILDKITE_ANALYTICS_CONFIRMATION_TIMEOUT") { 75 }.to_i
7
+ MAX_RECONNECTION_ATTEMPTS = ENV.fetch("BUILDKITE_ANALYTICS_RECONNECTION_ATTEMPTS") { 3 }.to_i
8
+ WAIT_BETWEEN_RECONNECTIONS = ENV.fetch("BUILDKITE_ANALYTICS_RECONNECTION_WAIT") { 5 }.to_i
9
+
10
+ # We keep a private reference so that mocking libraries won't break JSON
11
+ JSON_PARSE = JSON.method(:parse)
12
+ private_constant :JSON_PARSE
13
+
14
+ class RejectedSubscription < StandardError; end
15
+ class InitialConnectionFailure < StandardError; end
16
+
17
+ DISCONNECTED_EXCEPTIONS = [
18
+ Buildkite::TestCollector::SocketConnection::HandshakeError,
19
+ Buildkite::TestCollector::TimeoutError,
20
+ Buildkite::TestCollector::SocketConnection::SocketError,
21
+ RejectedSubscription,
22
+ InitialConnectionFailure,
23
+ ]
24
+
25
+ def initialize(url, authorization_header, channel)
26
+ @establish_subscription_queue = Queue.new
27
+ @channel = channel
28
+
29
+ @unconfirmed_idents = {}
30
+ @idents_mutex = Mutex.new
31
+ @send_queue = Queue.new
32
+ @empty = ConditionVariable.new
33
+ @closing = false
34
+ @eot_queued = false
35
+ @eot_queued_mutex = Mutex.new
36
+ @reconnection_mutex = Mutex.new
37
+
38
+ @url = url
39
+ @authorization_header = authorization_header
40
+
41
+ reconnection_count = 0
42
+
43
+ begin
44
+ reconnection_count += 1
45
+ connect
46
+ rescue Buildkite::TestCollector::TimeoutError, InitialConnectionFailure => e
47
+ Buildkite::TestCollector.logger.warn("buildkite-test_collector could not establish an initial connection with Buildkite due to #{e}. Attempting retry #{reconnection_count} of #{MAX_RECONNECTION_ATTEMPTS}...")
48
+ if reconnection_count > MAX_RECONNECTION_ATTEMPTS
49
+ Buildkite::TestCollector.logger.error "buildkite-test_collector could not establish an initial connection with Buildkite due to #{e.message} after #{MAX_RECONNECTION_ATTEMPTS} attempts. You may be missing some data for this test suite, please contact support if this issue persists."
50
+ else
51
+ sleep(WAIT_BETWEEN_RECONNECTIONS)
52
+ Buildkite::TestCollector.logger.warn("retrying reconnection")
53
+ retry
54
+ end
55
+ end
56
+ init_write_thread
57
+ end
58
+
59
+ def disconnected(connection)
60
+ @reconnection_mutex.synchronize do
61
+ # When the first thread detects a disconnection, it calls the disconnect method
62
+ # with the current connection. This thread grabs the reconnection mutex and does the
63
+ # reconnection, which then updates the value of @connection.
64
+ #
65
+ # At some point in that process, the second thread would have detected the
66
+ # disconnection too, and it also calls it with the current connection. However, the
67
+ # second thread can't run the reconnection code because of the mutex. By the
68
+ # time the mutex is released, the value of @connection has been refreshed, and so
69
+ # the second thread returns early and does not reattempt the reconnection.
70
+ return unless connection == @connection
71
+ Buildkite::TestCollector.logger.debug("starting reconnection")
72
+
73
+ reconnection_count = 0
74
+
75
+ begin
76
+ reconnection_count += 1
77
+ connect
78
+ init_write_thread
79
+ rescue *DISCONNECTED_EXCEPTIONS => e
80
+ Buildkite::TestCollector.logger.warn("failed reconnection attempt #{reconnection_count} due to #{e}")
81
+ if reconnection_count > MAX_RECONNECTION_ATTEMPTS
82
+ Buildkite::TestCollector.logger.error "buildkite-test_collector experienced a disconnection and could not reconnect to Buildkite due to #{e.message}. Please contact support."
83
+ raise e
84
+ else
85
+ sleep(WAIT_BETWEEN_RECONNECTIONS)
86
+ Buildkite::TestCollector.logger.warn("retrying reconnection")
87
+ retry
88
+ end
89
+ end
90
+ end
91
+ retransmit
92
+ end
93
+
94
+ def close(examples_count)
95
+ @closing = true
96
+ @examples_count = examples_count
97
+ Buildkite::TestCollector.logger.debug("closing socket connection")
98
+
99
+ # Because the server only sends us confirmations after every 10mb of
100
+ # data it uploads to S3, we'll never get confirmation of the
101
+ # identifiers of the last upload part unless we send an explicit finish,
102
+ # to which the server will respond with the last bits of data
103
+ send_eot
104
+
105
+ # After EOT, we wait for 75 seconds for the send queue to be drained and for the
106
+ # server to confirm the last idents. If everything has already been confirmed we can
107
+ # proceed without waiting.
108
+ @idents_mutex.synchronize do
109
+ if @unconfirmed_idents.any?
110
+ Buildkite::TestCollector.logger.debug "Waiting for Buildkite Test Analytics to send results..."
111
+ Buildkite::TestCollector.logger.debug("waiting for last confirm")
112
+
113
+ @empty.wait(@idents_mutex, CONFIRMATION_TIMEOUT)
114
+ end
115
+ end
116
+
117
+ # Then we always disconnect cos we can't wait forever? 🤷‍♀️
118
+ @connection.close
119
+ # We kill the write thread cos it's got a while loop in it, so it won't finish otherwise
120
+ @write_thread&.kill
121
+
122
+ Buildkite::TestCollector.logger.info "Buildkite Test Analytics completed"
123
+ Buildkite::TestCollector.logger.debug("socket connection closed")
124
+ end
125
+
126
+ def handle(_connection, data)
127
+ data = JSON_PARSE.call(data)
128
+ case data["type"]
129
+ when "ping"
130
+ # In absence of other message, the server sends us a ping every 3 seconds
131
+ # We are currently not doing anything with these
132
+ Buildkite::TestCollector.logger.debug("received ping")
133
+ when "welcome", "confirm_subscription"
134
+ # Push these two messages onto the queue, so that we block on waiting for the
135
+ # initializing phase to complete
136
+ @establish_subscription_queue.push(data)
137
+ Buildkite::TestCollector.logger.debug("received #{data['type']}")
138
+ when "reject_subscription"
139
+ Buildkite::TestCollector.logger.debug("received rejected_subscription")
140
+ raise RejectedSubscription
141
+ else
142
+ process_message(data)
143
+ end
144
+ end
145
+
146
+ def write_result(result)
147
+ queue_and_track_result(result.id, result.as_hash)
148
+
149
+ Buildkite::TestCollector.logger.debug("added #{result.id} to send queue")
150
+ end
151
+
152
+ def unconfirmed_idents_count
153
+ @idents_mutex.synchronize do
154
+ @unconfirmed_idents.count
155
+ end
156
+ end
157
+
158
+ private
159
+
160
+ def connect
161
+ Buildkite::TestCollector.logger.debug("starting socket connection process")
162
+
163
+ @connection = SocketConnection.new(self, @url, {
164
+ "Authorization" => @authorization_header,
165
+ })
166
+
167
+ wait_for_welcome
168
+
169
+ @connection.transmit({
170
+ "command" => "subscribe",
171
+ "identifier" => @channel
172
+ })
173
+
174
+ wait_for_confirm
175
+
176
+ Buildkite::TestCollector.logger.info "Connected to Buildkite Test Analytics!"
177
+ Buildkite::TestCollector.logger.debug("connected")
178
+ end
179
+
180
+ def init_write_thread
181
+ # As this method can be called multiple times in the
182
+ # reconnection process, kill prev write threads (if any) before
183
+ # setting up the new one
184
+ @write_thread&.kill
185
+
186
+ @write_thread = Thread.new do
187
+ Buildkite::TestCollector.logger.debug("hello from write thread")
188
+ # Pretty sure this eternal loop is fine cos the call to queue.pop is blocking
189
+ loop do
190
+ data = @send_queue.pop
191
+ message_type = data["action"]
192
+
193
+ if message_type == "end_of_transmission"
194
+ # Because of the unpredictable sequencing between the test suite finishing
195
+ # (EOT gets queued) and disconnections happening (retransmit results gets
196
+ # queued), we don't want to send an EOT before any retransmits are sent.
197
+ if @send_queue.length > 0
198
+ @send_queue << data
199
+ Buildkite::TestCollector.logger.debug("putting eot at back of queue")
200
+ next
201
+ end
202
+ @eot_queued_mutex.synchronize do
203
+ @eot_queued = false
204
+ end
205
+ end
206
+
207
+ @connection.transmit({
208
+ "identifier" => @channel,
209
+ "command" => "message",
210
+ "data" => data.to_json
211
+ })
212
+
213
+ if Buildkite::TestCollector.debug_enabled
214
+ ids = if message_type == "record_results"
215
+ data["results"].map { |result| result["id"] }
216
+ end
217
+ Buildkite::TestCollector.logger.debug("transmitted #{message_type} #{ids}")
218
+ end
219
+ end
220
+ end
221
+ end
222
+
223
+ def pop_with_timeout(message_type)
224
+ Timeout.timeout(30, Buildkite::TestCollector::TimeoutError, "Timeout: Waited 30 seconds for #{message_type}") do
225
+ @establish_subscription_queue.pop
226
+ end
227
+ end
228
+
229
+ def wait_for_welcome
230
+ welcome = pop_with_timeout("welcome")
231
+
232
+ if welcome && welcome != { "type" => "welcome" }
233
+ raise InitialConnectionFailure.new("Wrong message received, expected a welcome, but received: #{welcome.inspect}")
234
+ end
235
+ end
236
+
237
+ def wait_for_confirm
238
+ confirm = pop_with_timeout("confirm")
239
+
240
+ if confirm && confirm != { "type" => "confirm_subscription", "identifier" => @channel }
241
+ raise InitialConnectionFailure.new("Wrong message received, expected a confirm, but received: #{confirm.inspect}")
242
+ end
243
+ end
244
+
245
+ def queue_and_track_result(ident, result_as_hash)
246
+ @idents_mutex.synchronize do
247
+ @unconfirmed_idents[ident] = result_as_hash
248
+
249
+ @send_queue << {
250
+ "action" => "record_results",
251
+ "results" => [result_as_hash]
252
+ }
253
+ end
254
+ end
255
+
256
+ def confirm_idents(idents)
257
+ retransmit_required = @closing
258
+
259
+ @idents_mutex.synchronize do
260
+ # Remove received idents from unconfirmed_idents
261
+ idents.each { |key| @unconfirmed_idents.delete(key) }
262
+
263
+ Buildkite::TestCollector.logger.debug("received confirm for indentifiers: #{idents}")
264
+
265
+ # This @empty ConditionVariable broadcasts every time that @unconfirmed_idents is
266
+ # empty, which will happen about every 10mb of data as that's when the server
267
+ # sends back confirmations.
268
+ #
269
+ # However, there aren't any threads waiting on this signal until after we
270
+ # send the EOT message, so the prior broadcasts shouldn't do anything.
271
+ if @unconfirmed_idents.empty?
272
+ @empty.broadcast
273
+
274
+ retransmit_required = false
275
+
276
+ Buildkite::TestCollector.logger.debug("all identifiers have been confirmed")
277
+ else
278
+ Buildkite::TestCollector.logger.debug("still waiting on confirm for identifiers: #{@unconfirmed_idents.keys}")
279
+ end
280
+ end
281
+
282
+ # If we're closing, any unconfirmed results need to be retransmitted.
283
+ retransmit if retransmit_required
284
+ end
285
+
286
+ def send_eot
287
+ @eot_queued_mutex.synchronize do
288
+ return if @eot_queued
289
+
290
+ @send_queue << {
291
+ "action" => "end_of_transmission",
292
+ "examples_count" => @examples_count.to_json
293
+ }
294
+ @eot_queued = true
295
+
296
+ Buildkite::TestCollector.logger.debug("added EOT to send queue")
297
+ end
298
+ end
299
+
300
+ def process_message(data)
301
+ # Check we're getting the data we expect
302
+ return unless data["identifier"] == @channel
303
+
304
+ case
305
+ when data["message"].key?("confirm")
306
+ confirm_idents(data["message"]["confirm"])
307
+ else
308
+ # unhandled message
309
+ Buildkite::TestCollector.logger.debug("received unhandled message #{data["message"]}")
310
+ end
311
+ end
312
+
313
+ def retransmit
314
+ @idents_mutex.synchronize do
315
+ results = @unconfirmed_idents.values
316
+
317
+ # queue the contents of the buffer, unless it's empty
318
+ if results.any?
319
+ @send_queue << {
320
+ "action" => "record_results",
321
+ "results" => results
322
+ }
323
+
324
+ Buildkite::TestCollector.logger.debug("queueing up retransmitted results #{@unconfirmed_idents.keys}")
325
+ end
326
+ end
327
+
328
+ # if we were disconnected in the closing phase, then resend the EOT
329
+ # message so the server can persist the last upload part
330
+ send_eot if @closing
331
+ end
332
+ end
333
+ end
@@ -2,6 +2,8 @@
2
2
 
3
3
  module Buildkite::TestCollector
4
4
  class Uploader
5
+ MAX_UPLOAD_ATTEMPTS = 3
6
+
5
7
  def self.traces
6
8
  @traces ||= {}
7
9
  end
@@ -17,6 +19,14 @@ module Buildkite::TestCollector
17
19
  EOFError
18
20
  ]
19
21
 
22
+ RETRYABLE_UPLOAD_ERRORS = [
23
+ Net::ReadTimeout,
24
+ Net::OpenTimeout,
25
+ OpenSSL::SSL::SSLError,
26
+ OpenSSL::SSL::SSLErrorWaitReadable,
27
+ EOFError
28
+ ]
29
+
20
30
  def self.configure
21
31
  Buildkite::TestCollector.logger.debug("hello from main thread")
22
32
 
@@ -38,7 +48,7 @@ module Buildkite::TestCollector
38
48
  json = JSON.parse(response.body)
39
49
 
40
50
  if (socket_url = json["cable"]) && (channel = json["channel"])
41
- Buildkite::TestCollector.session = Buildkite::TestCollector::Session.new(socket_url, http.authorization_header, channel)
51
+ Buildkite::TestCollector.session = Buildkite::TestCollector::SocketSession.new(socket_url, http.authorization_header, channel)
42
52
  end
43
53
  else
44
54
  request_id = response.to_hash["x-request-id"]
@@ -54,5 +64,22 @@ module Buildkite::TestCollector
54
64
  def self.tracer
55
65
  Thread.current[:_buildkite_tracer]
56
66
  end
67
+
68
+ def self.upload(data)
69
+ return false unless Buildkite::TestCollector.api_token
70
+
71
+ http = Buildkite::TestCollector::HTTPClient.new(Buildkite::TestCollector.url)
72
+
73
+ Thread.new do
74
+ response = begin
75
+ upload_attempts ||= 0
76
+ http.post_json(data)
77
+ rescue *Buildkite::TestCollector::Uploader::RETRYABLE_UPLOAD_ERRORS => e
78
+ if (upload_attempts += 1) < MAX_UPLOAD_ATTEMPTS
79
+ retry
80
+ end
81
+ end
82
+ end
83
+ end
57
84
  end
58
85
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Buildkite
4
4
  module TestCollector
5
- VERSION = "1.4.2"
5
+ VERSION = "2.0.0.pre"
6
6
  NAME = "buildkite-test_collector"
7
7
  end
8
8
  end
@@ -30,11 +30,13 @@ require_relative "test_collector/network"
30
30
  require_relative "test_collector/object"
31
31
  require_relative "test_collector/tracer"
32
32
  require_relative "test_collector/socket_connection"
33
+ require_relative "test_collector/socket_session"
33
34
  require_relative "test_collector/session"
34
35
 
35
36
  module Buildkite
36
37
  module TestCollector
37
38
  DEFAULT_URL = "https://analytics-api.buildkite.com/v1/uploads"
39
+ DEFAULT_UPLOAD_BATCH_SIZE = 500
38
40
 
39
41
  class << self
40
42
  attr_accessor :api_token
@@ -45,6 +47,7 @@ module Buildkite
45
47
  attr_accessor :tracing_enabled
46
48
  attr_accessor :artifact_path
47
49
  attr_accessor :env
50
+ attr_accessor :batch_size
48
51
  end
49
52
 
50
53
  def self.configure(hook:, token: nil, url: nil, debug_enabled: false, tracing_enabled: true, artifact_path: nil, env: {})
@@ -54,7 +57,7 @@ module Buildkite
54
57
  self.tracing_enabled = tracing_enabled
55
58
  self.artifact_path = artifact_path
56
59
  self.env = env
57
-
60
+ self.batch_size = ENV.fetch("BUILDKITE_ANALYTICS_UPLOAD_BATCH_SIZE") { DEFAULT_UPLOAD_BATCH_SIZE }.to_i
58
61
  self.hook_into(hook)
59
62
  end
60
63
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: buildkite-test_collector
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.2
4
+ version: 2.0.0.pre
5
5
  platform: ruby
6
6
  authors:
7
7
  - Buildkite
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-19 00:00:00.000000000 Z
11
+ date: 2023-03-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -104,6 +104,7 @@ files:
104
104
  - lib/buildkite/test_collector/rspec_plugin/trace.rb
105
105
  - lib/buildkite/test_collector/session.rb
106
106
  - lib/buildkite/test_collector/socket_connection.rb
107
+ - lib/buildkite/test_collector/socket_session.rb
107
108
  - lib/buildkite/test_collector/tracer.rb
108
109
  - lib/buildkite/test_collector/uploader.rb
109
110
  - lib/buildkite/test_collector/version.rb
@@ -125,11 +126,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
125
126
  version: 2.3.0
126
127
  required_rubygems_version: !ruby/object:Gem::Requirement
127
128
  requirements:
128
- - - ">="
129
+ - - ">"
129
130
  - !ruby/object:Gem::Version
130
- version: '0'
131
+ version: 1.3.1
131
132
  requirements: []
132
- rubygems_version: 3.3.26
133
+ rubygems_version: 3.1.6
133
134
  signing_key:
134
135
  specification_version: 4
135
136
  summary: Track test executions and report to Buildkite Test Analytics