omq 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +129 -0
  3. data/README.md +28 -3
  4. data/lib/omq/channel.rb +5 -5
  5. data/lib/omq/client_server.rb +10 -10
  6. data/lib/omq/engine.rb +702 -0
  7. data/lib/omq/options.rb +48 -0
  8. data/lib/omq/pair.rb +4 -4
  9. data/lib/omq/peer.rb +5 -5
  10. data/lib/omq/pub_sub.rb +18 -18
  11. data/lib/omq/push_pull.rb +6 -6
  12. data/lib/omq/queue_interface.rb +73 -0
  13. data/lib/omq/radio_dish.rb +6 -6
  14. data/lib/omq/reactor.rb +128 -0
  15. data/lib/omq/readable.rb +44 -0
  16. data/lib/omq/req_rep.rb +8 -8
  17. data/lib/omq/router_dealer.rb +8 -8
  18. data/lib/omq/routing/channel.rb +83 -0
  19. data/lib/omq/routing/client.rb +56 -0
  20. data/lib/omq/routing/dealer.rb +57 -0
  21. data/lib/omq/routing/dish.rb +78 -0
  22. data/lib/omq/routing/fan_out.rb +140 -0
  23. data/lib/omq/routing/gather.rb +46 -0
  24. data/lib/omq/routing/pair.rb +86 -0
  25. data/lib/omq/routing/peer.rb +101 -0
  26. data/lib/omq/routing/pub.rb +60 -0
  27. data/lib/omq/routing/pull.rb +46 -0
  28. data/lib/omq/routing/push.rb +81 -0
  29. data/lib/omq/routing/radio.rb +150 -0
  30. data/lib/omq/routing/rep.rb +101 -0
  31. data/lib/omq/routing/req.rb +65 -0
  32. data/lib/omq/routing/round_robin.rb +168 -0
  33. data/lib/omq/routing/router.rb +110 -0
  34. data/lib/omq/routing/scatter.rb +82 -0
  35. data/lib/omq/routing/server.rb +101 -0
  36. data/lib/omq/routing/sub.rb +78 -0
  37. data/lib/omq/routing/xpub.rb +72 -0
  38. data/lib/omq/routing/xsub.rb +83 -0
  39. data/lib/omq/routing.rb +66 -0
  40. data/lib/omq/scatter_gather.rb +8 -8
  41. data/lib/omq/single_frame.rb +18 -0
  42. data/lib/omq/socket.rb +32 -11
  43. data/lib/omq/transport/inproc.rb +355 -0
  44. data/lib/omq/transport/ipc.rb +117 -0
  45. data/lib/omq/transport/tcp.rb +111 -0
  46. data/lib/omq/transport/tls.rb +146 -0
  47. data/lib/omq/version.rb +1 -1
  48. data/lib/omq/writable.rb +66 -0
  49. data/lib/omq.rb +64 -4
  50. metadata +34 -33
  51. data/lib/omq/zmtp/engine.rb +0 -551
  52. data/lib/omq/zmtp/options.rb +0 -48
  53. data/lib/omq/zmtp/reactor.rb +0 -131
  54. data/lib/omq/zmtp/readable.rb +0 -29
  55. data/lib/omq/zmtp/routing/channel.rb +0 -81
  56. data/lib/omq/zmtp/routing/client.rb +0 -56
  57. data/lib/omq/zmtp/routing/dealer.rb +0 -57
  58. data/lib/omq/zmtp/routing/dish.rb +0 -80
  59. data/lib/omq/zmtp/routing/fan_out.rb +0 -131
  60. data/lib/omq/zmtp/routing/gather.rb +0 -48
  61. data/lib/omq/zmtp/routing/pair.rb +0 -84
  62. data/lib/omq/zmtp/routing/peer.rb +0 -100
  63. data/lib/omq/zmtp/routing/pub.rb +0 -62
  64. data/lib/omq/zmtp/routing/pull.rb +0 -48
  65. data/lib/omq/zmtp/routing/push.rb +0 -80
  66. data/lib/omq/zmtp/routing/radio.rb +0 -139
  67. data/lib/omq/zmtp/routing/rep.rb +0 -101
  68. data/lib/omq/zmtp/routing/req.rb +0 -65
  69. data/lib/omq/zmtp/routing/round_robin.rb +0 -143
  70. data/lib/omq/zmtp/routing/router.rb +0 -109
  71. data/lib/omq/zmtp/routing/scatter.rb +0 -81
  72. data/lib/omq/zmtp/routing/server.rb +0 -100
  73. data/lib/omq/zmtp/routing/sub.rb +0 -80
  74. data/lib/omq/zmtp/routing/xpub.rb +0 -74
  75. data/lib/omq/zmtp/routing/xsub.rb +0 -86
  76. data/lib/omq/zmtp/routing.rb +0 -65
  77. data/lib/omq/zmtp/single_frame.rb +0 -20
  78. data/lib/omq/zmtp/transport/inproc.rb +0 -359
  79. data/lib/omq/zmtp/transport/ipc.rb +0 -118
  80. data/lib/omq/zmtp/transport/tcp.rb +0 -117
  81. data/lib/omq/zmtp/writable.rb +0 -61
  82. data/lib/omq/zmtp.rb +0 -81
data/lib/omq/engine.rb ADDED
@@ -0,0 +1,702 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "async"
4
+
5
+ module OMQ
6
+ # Per-socket orchestrator.
7
+ #
8
+ # Manages connections, transports, and the routing strategy for one
9
+ # OMQ::Socket instance. Each socket type creates one Engine.
10
+ #
11
+ class Engine
12
+ # @return [Symbol] socket type (e.g. :REQ, :PAIR)
13
+ #
14
+ attr_reader :socket_type
15
+
16
+
17
+ # @return [Options] socket options
18
+ #
19
+ attr_reader :options
20
+
21
+
22
+ # @return [Routing] routing strategy
23
+ #
24
+ attr_reader :routing
25
+
26
+
27
+ # @return [String, nil] last bound endpoint
28
+ #
29
+ attr_reader :last_endpoint
30
+
31
+
32
+ # @return [Integer, nil] last auto-selected TCP port
33
+ #
34
+ attr_reader :last_tcp_port
35
+
36
+
37
+ # @param socket_type [Symbol] e.g. :REQ, :REP, :PAIR
38
+ # @param options [Options]
39
+ #
40
+ def initialize(socket_type, options)
41
+ @socket_type = socket_type
42
+ @options = options
43
+ @routing = Routing.for(socket_type).new(self)
44
+ @connections = []
45
+ @connection_endpoints = {} # connection => endpoint (for reconnection)
46
+ @connected_endpoints = [] # endpoints we connected to (not bound)
47
+ @listeners = []
48
+ @tasks = []
49
+ @closed = false
50
+ @closing = false
51
+ @last_endpoint = nil
52
+ @last_tcp_port = nil
53
+ @peer_connected = Async::Promise.new
54
+ @all_peers_gone = Async::Promise.new
55
+ @reconnect_enabled = true
56
+ @parent_task = nil
57
+ @on_io_thread = false
58
+ @connection_promises = {} # connection => Async::Promise
59
+ @fatal_error = nil
60
+ end
61
+
62
+
63
+ attr_reader :peer_connected, :all_peers_gone, :connections, :parent_task
64
+
65
+ attr_writer :reconnect_enabled
66
+
67
+ # Optional proc that wraps new connections (e.g. for serialization).
68
+ # Called with the raw connection; must return the (possibly wrapped) connection.
69
+ #
70
+ attr_accessor :connection_wrapper
71
+
72
+
73
+ # Spawns an inproc reconnect retry task under @parent_task.
74
+ #
75
+ # @param endpoint [String]
76
+ # @yield [interval] the retry loop body
77
+ #
78
+ def spawn_inproc_retry(endpoint)
79
+ ri = @options.reconnect_interval
80
+ ivl = ri.is_a?(Range) ? ri.begin : ri
81
+ @tasks << @parent_task.async(transient: true, annotation: "inproc reconnect #{endpoint}") do
82
+ yield ivl
83
+ rescue Async::Stop
84
+ end
85
+ end
86
+
87
+
88
+ # Binds to an endpoint.
89
+ #
90
+ # @param endpoint [String] e.g. "tcp://127.0.0.1:5555", "inproc://foo"
91
+ # @return [void]
92
+ # @raise [ArgumentError] on unsupported transport
93
+ #
94
+ def bind(endpoint)
95
+ transport = transport_for(endpoint)
96
+ listener = transport.bind(endpoint, self)
97
+ start_accept_loops(listener)
98
+ @listeners << listener
99
+ @last_endpoint = listener.endpoint
100
+ @last_tcp_port = extract_tcp_port(listener.endpoint)
101
+ end
102
+
103
+
104
+ # Connects to an endpoint.
105
+ #
106
+ # @param endpoint [String]
107
+ # @return [void]
108
+ #
109
+ def connect(endpoint)
110
+ validate_endpoint!(endpoint)
111
+ @connected_endpoints << endpoint
112
+ if endpoint.start_with?("inproc://")
113
+ # Inproc connect is synchronous and instant
114
+ transport = transport_for(endpoint)
115
+ transport.connect(endpoint, self)
116
+ else
117
+ # TCP/IPC connect in background — never blocks the caller
118
+ schedule_reconnect(endpoint, delay: 0)
119
+ end
120
+ end
121
+
122
+
123
+ # Disconnects from an endpoint. Closes connections to that endpoint
124
+ # and stops auto-reconnection for it.
125
+ #
126
+ # @param endpoint [String]
127
+ # @return [void]
128
+ #
129
+ def disconnect(endpoint)
130
+ @connected_endpoints.delete(endpoint)
131
+ conns = @connection_endpoints.select { |_, ep| ep == endpoint }.keys
132
+ conns.each do |conn|
133
+ @connection_endpoints.delete(conn)
134
+ @connections.delete(conn)
135
+ @routing.connection_removed(conn)
136
+ conn.close
137
+ end
138
+ end
139
+
140
+
141
+ # Unbinds from an endpoint. Stops the listener and closes all
142
+ # connections that were accepted on it.
143
+ #
144
+ # @param endpoint [String]
145
+ # @return [void]
146
+ #
147
+ def unbind(endpoint)
148
+ listener = @listeners.find { |l| l.endpoint == endpoint }
149
+ return unless listener
150
+ listener.stop
151
+ @listeners.delete(listener)
152
+
153
+ # Close connections accepted on this endpoint
154
+ conns = @connection_endpoints.select { |_, ep| ep == endpoint }.keys
155
+ conns.each do |conn|
156
+ @connection_endpoints.delete(conn)
157
+ @connections.delete(conn)
158
+ @routing.connection_removed(conn)
159
+ conn.close
160
+ end
161
+ end
162
+
163
+
164
+ # Called by a transport when an incoming connection is accepted.
165
+ #
166
+ # @param io [#read, #write, #close]
167
+ # @param endpoint [String, nil] the endpoint this was accepted on
168
+ # @return [void]
169
+ #
170
+ def handle_accepted(io, endpoint: nil)
171
+ spawn_connection(io, as_server: true, endpoint: endpoint)
172
+ end
173
+
174
+
175
+ # Called by a transport when an outgoing connection is established.
176
+ #
177
+ # @param io [#read, #write, #close]
178
+ # @return [void]
179
+ #
180
+ def handle_connected(io, endpoint: nil)
181
+ spawn_connection(io, as_server: false, endpoint: endpoint)
182
+ end
183
+
184
+
185
+ # Called by inproc transport with a pre-validated DirectPipe.
186
+ # Skips ZMTP handshake — just registers with routing strategy.
187
+ #
188
+ # @param pipe [Transport::Inproc::DirectPipe]
189
+ # @return [void]
190
+ #
191
+ def connection_ready(pipe, endpoint: nil)
192
+ pipe = @connection_wrapper.call(pipe) if @connection_wrapper
193
+ @connections << pipe
194
+ @connection_endpoints[pipe] = endpoint if endpoint
195
+ @routing.connection_added(pipe)
196
+ @peer_connected.resolve(pipe)
197
+ end
198
+
199
+
200
+ # Dequeues the next received message. Blocks until available.
201
+ #
202
+ # @return [Array<String>] message parts
203
+ # @raise if a background pump task crashed
204
+ #
205
+ def dequeue_recv
206
+ raise @fatal_error if @fatal_error
207
+ msg = @routing.recv_queue.dequeue
208
+ raise @fatal_error if msg.nil? && @fatal_error
209
+ msg
210
+ end
211
+
212
+
213
+ # Dequeues up to +max+ messages. Blocks on the first, then
214
+ # drains non-blocking.
215
+ #
216
+ # @param max [Integer]
217
+ # @return [Array<Array<String>>]
218
+ #
219
+ def dequeue_recv_batch(max)
220
+ raise @fatal_error if @fatal_error
221
+ queue = @routing.recv_queue
222
+ msg = queue.dequeue
223
+ raise @fatal_error if msg.nil? && @fatal_error
224
+ batch = [msg]
225
+ while batch.size < max
226
+ msg = queue.dequeue(timeout: 0)
227
+ break unless msg
228
+ batch << msg
229
+ end
230
+ batch
231
+ end
232
+
233
+
234
+ # Pushes a nil sentinel into the recv queue, unblocking a
235
+ # pending {#dequeue_recv} with a nil return value.
236
+ #
237
+ def dequeue_recv_sentinel
238
+ @routing.recv_queue.push(nil)
239
+ end
240
+
241
+
242
+ # Enqueues a message for sending. Blocks at HWM.
243
+ #
244
+ # @param parts [Array<String>]
245
+ # @return [void]
246
+ # @raise if a background pump task crashed
247
+ #
248
+ def enqueue_send(parts)
249
+ raise @fatal_error if @fatal_error
250
+ @routing.enqueue(parts)
251
+ end
252
+
253
+
254
+ # Starts a recv pump for a connection, or wires the inproc
255
+ # fast path when the connection is a DirectPipe.
256
+ #
257
+ # @param conn [Connection, Transport::Inproc::DirectPipe]
258
+ # Starts a recv pump that dequeues messages from a connection
259
+ # and enqueues them into the routing strategy's recv queue.
260
+ #
261
+ # When a block is given, each message is yielded for transformation
262
+ # before enqueueing. The block is compiled at the call site, giving
263
+ # YJIT a monomorphic call per routing strategy instead of a shared
264
+ # megamorphic `transform.call` dispatch.
265
+ #
266
+ # @param conn [Connection, Transport::Inproc::DirectPipe]
267
+ # @param recv_queue [Async::LimitedQueue] routing strategy's recv queue
268
+ # @yield [msg] optional per-message transform
269
+ # @return [#stop, nil] pump task handle, or nil for DirectPipe bypass
270
+ #
271
+ # Fairness limits for the recv pump. Yield to the scheduler
272
+ # after reading this many messages or bytes from one connection,
273
+ # whichever comes first. Prevents a fast or large-message
274
+ # connection from starving slower peers.
275
+ RECV_FAIRNESS_MESSAGES = 64
276
+ RECV_FAIRNESS_BYTES = 1 << 20 # 1 MB
277
+
278
+ def start_recv_pump(conn, recv_queue, &transform)
279
+ if conn.is_a?(Transport::Inproc::DirectPipe) && conn.peer
280
+ conn.peer.direct_recv_queue = recv_queue
281
+ conn.peer.direct_recv_transform = transform
282
+ return nil
283
+ end
284
+
285
+ if transform
286
+ @parent_task.async(transient: true, annotation: "recv pump") do |task|
287
+ loop do
288
+ count = 0
289
+ bytes = 0
290
+ while count < RECV_FAIRNESS_MESSAGES && bytes < RECV_FAIRNESS_BYTES
291
+ msg = conn.receive_message
292
+ msg = transform.call(msg).freeze
293
+ recv_queue.enqueue(msg)
294
+ count += 1
295
+ bytes += msg.is_a?(Array) && msg.first.is_a?(String) ? msg.sum(&:bytesize) : 0
296
+ end
297
+ task.yield
298
+ end
299
+ rescue Async::Stop
300
+ rescue Protocol::ZMTP::Error, *CONNECTION_LOST
301
+ connection_lost(conn)
302
+ rescue => error
303
+ signal_fatal_error(error)
304
+ end
305
+ else
306
+ @parent_task.async(transient: true, annotation: "recv pump") do |task|
307
+ loop do
308
+ count = 0
309
+ bytes = 0
310
+ while count < RECV_FAIRNESS_MESSAGES && bytes < RECV_FAIRNESS_BYTES
311
+ msg = conn.receive_message
312
+ recv_queue.enqueue(msg)
313
+ count += 1
314
+ bytes += msg.is_a?(Array) && msg.first.is_a?(String) ? msg.sum(&:bytesize) : 0
315
+ end
316
+ task.yield
317
+ end
318
+ rescue Async::Stop
319
+ rescue Protocol::ZMTP::Error, *CONNECTION_LOST
320
+ connection_lost(conn)
321
+ rescue => error
322
+ signal_fatal_error(error)
323
+ end
324
+ end
325
+ end
326
+
327
+
328
+ # Called when a connection is lost.
329
+ #
330
+ # @param connection [Connection]
331
+ # @return [void]
332
+ #
333
+ def connection_lost(connection)
334
+ endpoint = @connection_endpoints.delete(connection)
335
+ @connections.delete(connection)
336
+ @routing.connection_removed(connection)
337
+ connection.close
338
+
339
+ # Signal the connection task to exit.
340
+ done = @connection_promises.delete(connection)
341
+ done&.resolve(true)
342
+
343
+ # Resolve all_peers_gone once: had peers, now have none.
344
+ if @peer_connected.resolved? && @connections.empty?
345
+ @all_peers_gone.resolve(true)
346
+ end
347
+
348
+ # Auto-reconnect if this was a connected (not bound) endpoint
349
+ if endpoint && @connected_endpoints.include?(endpoint) && !@closed && !@closing && @reconnect_enabled
350
+ schedule_reconnect(endpoint)
351
+ end
352
+ end
353
+
354
+
355
+ # Closes all connections and listeners.
356
+ #
357
+ # @return [void]
358
+ #
359
+ def close
360
+ return if @closed || @closing
361
+ @closing = true
362
+
363
+ # Stop accepting new connections — but only if we already have
364
+ # peers to drain to. With zero connections the listeners must
365
+ # stay open so late-arriving peers can still receive queued
366
+ # messages during the linger period.
367
+ unless @connections.empty?
368
+ @listeners.each(&:stop)
369
+ @listeners.clear
370
+ end
371
+
372
+ # Linger: wait for send queues to drain before closing.
373
+ # linger=0 → close immediately, linger=nil → wait forever.
374
+ # @closed is set AFTER draining so reconnect tasks keep
375
+ # running during the linger period.
376
+ linger = @options.linger
377
+ if linger.nil? || linger > 0
378
+ drain_timeout = linger # nil = wait forever, >0 = seconds
379
+ drain_send_queues(drain_timeout)
380
+ end
381
+
382
+ @closed = true
383
+ Reactor.untrack_linger(@options.linger) if @on_io_thread
384
+
385
+ # Stop any remaining listeners.
386
+ @listeners.each(&:stop)
387
+ @listeners.clear
388
+
389
+ # Close connections — causes pump tasks to get EOFError/IOError
390
+ @connections.each(&:close)
391
+ @connections.clear
392
+ # Stop any remaining pump tasks
393
+ @routing.stop rescue nil
394
+ @tasks.each { |t| t.stop rescue nil }
395
+ @tasks.clear
396
+ end
397
+
398
+
399
+ # Spawns a transient pump task with error propagation.
400
+ #
401
+ # Unexpected exceptions are caught and forwarded to
402
+ # {#signal_fatal_error} so blocked callers (send/recv)
403
+ # see the real error instead of deadlocking.
404
+ #
405
+ # @param annotation [String] task annotation for debugging
406
+ # @yield the pump loop body
407
+ # @return [Async::Task]
408
+ #
409
+ def spawn_pump_task(annotation:, &block)
410
+ @parent_task.async(transient: true, annotation: annotation) do
411
+ yield
412
+ rescue Async::Stop, Protocol::ZMTP::Error, *CONNECTION_LOST
413
+ # normal shutdown / expected disconnect
414
+ rescue => error
415
+ signal_fatal_error(error)
416
+ end
417
+ end
418
+
419
+
420
+ # Wraps an unexpected pump error as {OMQ::SocketDeadError} and
421
+ # unblocks any callers waiting on the recv queue.
422
+ #
423
+ # Must be called from inside a rescue block so that +error+ is
424
+ # +$!+ and Ruby sets it as +#cause+ on the new exception.
425
+ #
426
+ # @param error [Exception]
427
+ #
428
+ def signal_fatal_error(error)
429
+ return if @closing || @closed
430
+ @fatal_error = begin
431
+ raise OMQ::SocketDeadError, "internal error killed #{@socket_type} socket"
432
+ rescue => wrapped
433
+ wrapped
434
+ end
435
+ @routing.recv_queue.enqueue(nil) rescue nil
436
+ @peer_connected.resolve(nil) rescue nil
437
+ end
438
+
439
+
440
+ # Saves the current Async task so connection subtrees can be
441
+ # spawned under the caller's task tree. Called by Socket before
442
+ # the first bind/connect — outside Reactor.run so non-Async
443
+ # callers get the IO thread's root task, not an ephemeral work task.
444
+ #
445
+ def capture_parent_task
446
+ return if @parent_task
447
+ if Async::Task.current?
448
+ @parent_task = Async::Task.current
449
+ else
450
+ @parent_task = Reactor.root_task
451
+ @on_io_thread = true
452
+ Reactor.track_linger(@options.linger)
453
+ end
454
+ end
455
+
456
+
457
+ private
458
+
459
+
460
+ # Spawns an isolated connection task as a sibling of accept/reconnect
461
+ # tasks. All per-connection children (heartbeat, recv pump, reaper)
462
+ # live inside this task. When the connection dies, the entire subtree
463
+ # is cleaned up by Async.
464
+ #
465
+ def spawn_connection(io, as_server:, endpoint: nil)
466
+ task = @parent_task&.async(transient: true, annotation: "conn #{endpoint}") do
467
+ done = Async::Promise.new
468
+ conn = setup_connection(io, as_server: as_server, endpoint: endpoint, done: done)
469
+ done.wait
470
+ rescue Protocol::ZMTP::Error, *CONNECTION_LOST
471
+ # handshake failed or connection lost — subtree cleaned up
472
+ ensure
473
+ conn&.close rescue nil
474
+ end
475
+ @tasks << task if task
476
+ end
477
+
478
+
479
+ # Waits for the send queue to drain.
480
+ #
481
+ # @param timeout [Numeric, nil] max seconds to wait (nil = forever)
482
+ #
483
+ def drain_send_queues(timeout)
484
+ return unless @routing.respond_to?(:send_queue)
485
+ deadline = timeout ? Async::Clock.now + timeout : nil
486
+
487
+ until @routing.send_queue.empty? && @routing.send_pump_idle?
488
+ if deadline
489
+ remaining = deadline - Async::Clock.now
490
+ break if remaining <= 0
491
+ end
492
+ sleep 0.001
493
+ end
494
+ end
495
+
496
+
497
+ # Performs the ZMTP handshake, starts heartbeating, and registers
498
+ # the new connection with the routing strategy.
499
+ #
500
+ # @param io [#read, #write, #close] underlying transport stream
501
+ # @param as_server [Boolean] whether we are the ZMTP server side
502
+ # @param endpoint [String, nil] endpoint for reconnection tracking
503
+ # @param done [Async::Promise, nil] resolved when the connection is lost
504
+ #
505
+ def setup_connection(io, as_server:, endpoint: nil, done: nil)
506
+ conn = Protocol::ZMTP::Connection.new(
507
+ io,
508
+ socket_type: @socket_type.to_s,
509
+ identity: @options.identity,
510
+ as_server: as_server,
511
+ mechanism: @options.mechanism&.dup,
512
+ max_message_size: @options.max_message_size,
513
+ )
514
+ conn.handshake!
515
+ start_heartbeat(conn)
516
+ conn = @connection_wrapper.call(conn) if @connection_wrapper
517
+ @connections << conn
518
+ @connection_endpoints[conn] = endpoint if endpoint
519
+ @connection_promises[conn] = done if done
520
+ @routing.connection_added(conn)
521
+ @peer_connected.resolve(conn)
522
+ conn
523
+ rescue Protocol::ZMTP::Error, *CONNECTION_LOST
524
+ conn&.close
525
+ raise
526
+ end
527
+
528
+
529
+ # Spawns a heartbeat task for the connection.
530
+ # The connection only tracks timestamps — the engine drives the loop.
531
+ #
532
+ # @param conn [Connection]
533
+ # @return [void]
534
+ #
535
+ def start_heartbeat(conn)
536
+ interval = @options.heartbeat_interval
537
+ return unless interval
538
+
539
+ ttl = @options.heartbeat_ttl || interval
540
+ timeout = @options.heartbeat_timeout || interval
541
+ conn.touch_heartbeat
542
+
543
+ @tasks << @parent_task.async(transient: true, annotation: "heartbeat") do
544
+ loop do
545
+ sleep interval
546
+ conn.send_command(Protocol::ZMTP::Codec::Command.ping(ttl: ttl, context: "".b))
547
+ if conn.heartbeat_expired?(timeout)
548
+ conn.close
549
+ break
550
+ end
551
+ end
552
+ rescue Async::Stop
553
+ rescue *CONNECTION_LOST
554
+ # connection closed
555
+ end
556
+ end
557
+
558
+
559
+ # Spawns a background task that reconnects to the given endpoint
560
+ # with exponential back-off based on the reconnect_interval option.
561
+ #
562
+ # @param endpoint [String] endpoint to reconnect to
563
+ # @param delay [Numeric, nil] initial delay in seconds (defaults to reconnect_interval)
564
+ #
565
+ def schedule_reconnect(endpoint, delay: nil)
566
+ ri = @options.reconnect_interval
567
+ if ri.is_a?(Range)
568
+ delay ||= ri.begin
569
+ max_delay = ri.end
570
+ else
571
+ delay ||= ri
572
+ max_delay = nil
573
+ end
574
+
575
+ @tasks << @parent_task.async(transient: true, annotation: "reconnect #{endpoint}") do
576
+ loop do
577
+ break if @closed
578
+ sleep delay if delay > 0
579
+ break if @closed
580
+ begin
581
+ transport = transport_for(endpoint)
582
+ transport.connect(endpoint, self)
583
+ break # connected successfully
584
+ rescue *CONNECTION_LOST, *CONNECTION_FAILED, Protocol::ZMTP::Error
585
+ delay = [delay * 2, max_delay].min if max_delay
586
+ # After first attempt with delay: 0, use the configured interval
587
+ delay = ri.is_a?(Range) ? ri.begin : ri if delay == 0
588
+ end
589
+ end
590
+ rescue Async::Stop
591
+ # normal shutdown
592
+ rescue => error
593
+ signal_fatal_error(error)
594
+ end
595
+ end
596
+
597
+
598
+ # Eagerly validates TCP hostnames so resolution errors fail
599
+ # on connect, not silently in the background reconnect loop.
600
+ # Reconnects still re-resolve (DNS may change), and transient
601
+ # resolution failures during reconnect are retried with backoff.
602
+ #
603
+ def validate_endpoint!(endpoint)
604
+ case endpoint
605
+ when /\Atcp:\/\//
606
+ host = URI.parse(endpoint.sub("tcp://", "http://")).hostname
607
+ when /\Atls\+tcp:\/\//
608
+ host = URI.parse("http://#{endpoint.delete_prefix("tls+tcp://")}").hostname
609
+ else
610
+ return
611
+ end
612
+ Addrinfo.getaddrinfo(host, nil, nil, :STREAM) if host
613
+ end
614
+
615
+
616
+ def transport_for(endpoint)
617
+ case endpoint
618
+ when /\Atls\+tcp:\/\// then Transport::TLS
619
+ when /\Atcp:\/\// then Transport::TCP
620
+ when /\Aipc:\/\// then Transport::IPC
621
+ when /\Ainproc:\/\// then Transport::Inproc
622
+ else raise ArgumentError, "unsupported transport: #{endpoint}"
623
+ end
624
+ end
625
+
626
+
627
+ def extract_tcp_port(endpoint)
628
+ return nil unless endpoint&.start_with?("tcp://") || endpoint&.start_with?("tls+tcp://")
629
+ port = endpoint.split(":").last.to_i
630
+ port.positive? ? port : nil
631
+ end
632
+
633
+
634
+ # Spawns accept loops for a listener under @parent_task.
635
+ #
636
+ # TCP listeners have multiple server sockets (IPv4/IPv6);
637
+ # IPC listeners have one. Inproc listeners have none.
638
+ #
639
+ def start_accept_loops(listener)
640
+ case listener
641
+ when Transport::TLS::Listener
642
+ tasks = listener.servers.map do |server|
643
+ @parent_task.async(transient: true, annotation: "tls accept #{listener.endpoint}") do
644
+ loop do
645
+ client = server.accept
646
+ Async::Task.current.defer_stop do
647
+ ssl = OpenSSL::SSL::SSLSocket.new(client, listener.ssl_context)
648
+ ssl.sync_close = true
649
+ ssl.accept
650
+ handle_accepted(IO::Stream::Buffered.wrap(ssl), endpoint: listener.endpoint)
651
+ rescue OpenSSL::SSL::SSLError
652
+ # Bad certificate, protocol mismatch, etc. — drop this
653
+ # connection but keep the accept loop running.
654
+ ssl&.close rescue nil
655
+ end
656
+ end
657
+ rescue Async::Stop
658
+ rescue IOError
659
+ # server closed
660
+ ensure
661
+ server.close rescue nil
662
+ end
663
+ end
664
+ listener.accept_tasks = tasks
665
+
666
+ when Transport::TCP::Listener
667
+ tasks = listener.servers.map do |server|
668
+ @parent_task.async(transient: true, annotation: "tcp accept #{listener.endpoint}") do
669
+ loop do
670
+ client = server.accept
671
+ Async::Task.current.defer_stop do
672
+ handle_accepted(IO::Stream::Buffered.wrap(client), endpoint: listener.endpoint)
673
+ end
674
+ end
675
+ rescue Async::Stop
676
+ rescue IOError
677
+ # server closed
678
+ ensure
679
+ server.close rescue nil
680
+ end
681
+ end
682
+ listener.accept_tasks = tasks
683
+
684
+ when Transport::IPC::Listener
685
+ task = @parent_task.async(transient: true, annotation: "ipc accept #{listener.endpoint}") do
686
+ loop do
687
+ client = listener.server.accept
688
+ Async::Task.current.defer_stop do
689
+ handle_accepted(IO::Stream::Buffered.wrap(client), endpoint: listener.endpoint)
690
+ end
691
+ end
692
+ rescue Async::Stop
693
+ rescue IOError
694
+ # server closed
695
+ ensure
696
+ listener.server.close rescue nil
697
+ end
698
+ listener.accept_task = task
699
+ end
700
+ end
701
+ end
702
+ end