nnq 0.2.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/nnq/engine.rb CHANGED
@@ -2,12 +2,15 @@
2
2
 
3
3
  require "async"
4
4
  require "async/clock"
5
+ require "set"
5
6
  require "protocol/sp"
6
7
  require_relative "error"
7
8
  require_relative "connection"
9
+ require_relative "monitor_event"
8
10
  require_relative "reactor"
9
11
  require_relative "engine/socket_lifecycle"
10
12
  require_relative "engine/connection_lifecycle"
13
+ require_relative "engine/reconnect"
11
14
  require_relative "transport/tcp"
12
15
  require_relative "transport/ipc"
13
16
  require_relative "transport/inproc"
@@ -32,54 +35,153 @@ module NNQ
32
35
  # @return [Integer] our SP protocol id (e.g. Protocols::PUSH_V0)
33
36
  attr_reader :protocol
34
37
 
38
+
35
39
  # @return [Options]
36
40
  attr_reader :options
37
41
 
42
+
43
+ # @return [Routing strategy]
44
+ attr_reader :routing
45
+
46
+
38
47
  # @return [Hash{NNQ::Connection => ConnectionLifecycle}]
39
48
  attr_reader :connections
40
49
 
50
+
41
51
  # @return [SocketLifecycle]
42
52
  attr_reader :lifecycle
43
53
 
54
+
44
55
  # @return [String, nil]
45
56
  attr_reader :last_endpoint
46
57
 
58
+
47
59
  # @return [Async::Condition] signaled when a new pipe is registered
48
60
  attr_reader :new_pipe
49
61
 
50
62
 
63
+ # @return [Set<String>] endpoints we have called #connect on; used
64
+ # to decide whether to schedule a reconnect after a connection
65
+ # is lost.
66
+ attr_reader :dialed
67
+
68
+
69
+ # @return [Async::Queue, nil] monitor event queue (set by Socket#monitor)
70
+ attr_accessor :monitor_queue
71
+
72
+
73
+ # @return [Boolean] when true, {#emit_verbose_monitor_event} forwards
74
+ # per-message traces (:message_sent / :message_received) to the
75
+ # monitor queue. Set by {Socket#monitor} via its +verbose:+ kwarg.
76
+ attr_accessor :verbose_monitor
77
+
78
+
51
79
  # @param protocol [Integer] our SP protocol id (e.g. Protocols::PUSH_V0)
52
80
  # @param options [Options]
53
81
  # @yieldparam engine [Engine] used by the caller to build a routing
54
82
  # strategy with access to the engine's connection map
55
83
  def initialize(protocol:, options:)
56
- @protocol = protocol
57
- @options = options
58
- @connections = {}
59
- @listeners = []
60
- @lifecycle = SocketLifecycle.new
61
- @last_endpoint = nil
62
- @new_pipe = Async::Condition.new
63
- @routing = yield(self)
84
+ @protocol = protocol
85
+ @options = options
86
+ @connections = {}
87
+ @listeners = []
88
+ @lifecycle = SocketLifecycle.new
89
+ @last_endpoint = nil
90
+ @new_pipe = Async::Condition.new
91
+ @monitor_queue = nil
92
+ @verbose_monitor = false
93
+ @dialed = Set.new
94
+ @routing = yield(self)
64
95
  end
65
96
 
66
97
 
67
- # @return [Routing strategy]
68
- attr_reader :routing
98
+ # Emits a monitor event to the attached queue (if any).
99
+ def emit_monitor_event(type, endpoint: nil, detail: nil)
100
+ return unless @monitor_queue
101
+ @monitor_queue.enqueue(MonitorEvent.new(type: type, endpoint: endpoint, detail: detail))
102
+ rescue Async::Stop
103
+ end
104
+
105
+
106
+ # Emits a :message_sent verbose event. Early-returns before
107
+ # allocating the detail hash so the hot send path pays nothing
108
+ # when verbose monitoring is off.
109
+ def emit_verbose_msg_sent(body)
110
+ return unless @verbose_monitor
111
+ emit_monitor_event(:message_sent, detail: { body: body })
112
+ end
113
+
114
+
115
+ # Emits a :message_received verbose event. Same early-return
116
+ # discipline as {#emit_verbose_msg_sent}.
117
+ def emit_verbose_msg_received(body)
118
+ return unless @verbose_monitor
119
+ emit_monitor_event(:message_received, detail: { body: body })
120
+ end
69
121
 
70
122
 
71
123
  # @return [Async::Task, nil]
72
- def parent_task = @lifecycle.parent_task
124
+ def parent_task
125
+ @lifecycle.parent_task
126
+ end
127
+
128
+
129
+ # @return [Async::Barrier, nil]
130
+ def barrier
131
+ @lifecycle.barrier
132
+ end
133
+
134
+
135
+ def closed?
136
+ @lifecycle.closed?
137
+ end
138
+
139
+
140
+ # @return [Async::Promise] resolves with the first connected peer
141
+ def peer_connected
142
+ @lifecycle.peer_connected
143
+ end
144
+
145
+
146
+ # @return [Async::Promise] resolves when all peers have disconnected
147
+ # (edge-triggered, after at least one peer connected)
148
+ def all_peers_gone
149
+ @lifecycle.all_peers_gone
150
+ end
73
151
 
74
152
 
75
- def closed? = @lifecycle.closed?
153
+ # Called by ConnectionLifecycle teardown. Resolves `all_peers_gone`
154
+ # if the connection set is now empty and we had peers.
155
+ def resolve_all_peers_gone_if_empty
156
+ @lifecycle.resolve_all_peers_gone_if_empty(@connections)
157
+ end
158
+
159
+
160
+ # @return [Boolean]
161
+ def reconnect_enabled
162
+ @lifecycle.reconnect_enabled
163
+ end
164
+
165
+
166
+ # Disables or re-enables automatic reconnect. nnq has no reconnect
167
+ # loop yet, so this is forward-looking — {TransientMonitor} flips
168
+ # it before draining.
169
+ def reconnect_enabled=(value)
170
+ @lifecycle.reconnect_enabled = value
171
+ end
172
+
173
+
174
+ # Closes only the recv side. Buffered messages drain, then
175
+ # {Socket#receive} returns nil. Send side remains operational.
176
+ def close_read
177
+ @routing.close_read if @routing.respond_to?(:close_read)
178
+ end
76
179
 
77
180
 
78
181
  # Stores the parent Async task that long-lived NNQ fibers will
79
182
  # attach to. The caller (Socket) is responsible for picking the
80
183
  # right one (the user's current task, or Reactor.root_task).
81
- def capture_parent_task(task)
82
- on_io_thread = task.equal?(Reactor.root_task)
184
+ def capture_parent_task(task, on_io_thread:)
83
185
  @lifecycle.capture_parent_task(task, on_io_thread: on_io_thread)
84
186
  end
85
187
 
@@ -88,22 +190,48 @@ module NNQ
88
190
  def bind(endpoint)
89
191
  transport = transport_for(endpoint)
90
192
  listener = transport.bind(endpoint, self)
91
- listener.start_accept_loop(@lifecycle.parent_task) do |io, framing = :tcp|
193
+ listener.start_accept_loop(@lifecycle.barrier) do |io, framing = :tcp|
92
194
  handle_accepted(io, endpoint: endpoint, framing: framing)
93
195
  end
94
196
  @listeners << listener
95
197
  @last_endpoint = listener.endpoint
198
+ emit_monitor_event(:listening, endpoint: @last_endpoint)
96
199
  end
97
200
 
98
201
 
99
- # Connects to +endpoint+. Synchronous on first attempt; reconnect
100
- # is wired in Phase 1.1.
202
+ # Connects to +endpoint+. Non-blocking for tcp:// and ipc:// — the
203
+ # actual dial happens inside a background reconnect task that
204
+ # retries with exponential back-off until the peer becomes
205
+ # reachable. Inproc connect is synchronous and instant.
101
206
  def connect(endpoint)
102
- transport = transport_for(endpoint)
103
- transport.connect(endpoint, self)
207
+ @dialed << endpoint
104
208
  @last_endpoint = endpoint
105
- rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH => e
106
- raise Error, "could not connect to #{endpoint}: #{e.class}: #{e.message}"
209
+
210
+ if endpoint.start_with?("inproc://")
211
+ transport_for(endpoint).connect(endpoint, self)
212
+ else
213
+ emit_monitor_event(:connect_delayed, endpoint: endpoint)
214
+ Reconnect.schedule(endpoint, @options, @lifecycle.barrier, self, delay: 0)
215
+ end
216
+ end
217
+
218
+
219
+ # Schedules a reconnect for +endpoint+ if auto-reconnect is enabled
220
+ # and the endpoint is still in the dialed set. Called from the
221
+ # connection lifecycle's `lost!` path.
222
+ def maybe_reconnect(endpoint)
223
+ return unless endpoint && @dialed.include?(endpoint)
224
+ return unless @lifecycle.alive? && @lifecycle.reconnect_enabled
225
+ return if endpoint.start_with?("inproc://")
226
+ Reconnect.schedule(endpoint, @options, @lifecycle.barrier, self)
227
+ end
228
+
229
+
230
+ # Public so {Reconnect} can dial directly without re-deriving the
231
+ # transport from the URL each iteration.
232
+ def transport_for(endpoint)
233
+ scheme = endpoint[/\A([a-z+]+):\/\//i, 1] or raise Error, "no scheme: #{endpoint}"
234
+ TRANSPORTS[scheme] or raise Error, "unsupported transport: #{scheme}"
107
235
  end
108
236
 
109
237
 
@@ -112,6 +240,7 @@ module NNQ
112
240
  lifecycle = ConnectionLifecycle.new(self, endpoint: endpoint, framing: framing)
113
241
  lifecycle.handshake!(io)
114
242
  spawn_recv_loop(lifecycle.conn) if @routing.respond_to?(:enqueue) && @connections.key?(lifecycle.conn)
243
+ lifecycle.start_supervisor!
115
244
  rescue ConnectionRejected
116
245
  # routing rejected this peer (e.g. PAIR already bonded) — lifecycle cleaned up
117
246
  rescue => e
@@ -124,16 +253,19 @@ module NNQ
124
253
  lifecycle = ConnectionLifecycle.new(self, endpoint: endpoint, framing: framing)
125
254
  lifecycle.handshake!(io)
126
255
  spawn_recv_loop(lifecycle.conn) if @routing.respond_to?(:enqueue) && @connections.key?(lifecycle.conn)
256
+ lifecycle.start_supervisor!
127
257
  rescue ConnectionRejected
128
258
  # unusual on connect side, but handled identically
129
259
  end
130
260
 
131
261
 
132
- # Spawns a task under the socket's parent task. Used by routing
133
- # strategies (e.g. PUSH send pump) to attach long-lived fibers to
134
- # the engine's lifecycle without going through transient: true.
135
- def spawn_task(annotation:, &block)
136
- @lifecycle.parent_task.async(annotation: annotation, &block)
262
+ # Spawns a task under the given parent barrier (defaults to the
263
+ # socket-level barrier). Used by routing strategies (e.g. PUSH send
264
+ # pump) to attach long-lived fibers to the engine's lifecycle. The
265
+ # parent barrier tracks every spawned task so teardown is a single
266
+ # barrier.stop call.
267
+ def spawn_task(annotation:, parent: @lifecycle.barrier, &block)
268
+ parent.async(annotation: annotation, &block)
137
269
  end
138
270
 
139
271
 
@@ -144,15 +276,27 @@ module NNQ
144
276
  # to abort with IOError.
145
277
  def close
146
278
  return unless @lifecycle.alive?
279
+
147
280
  @lifecycle.start_closing!
148
281
  @listeners.each(&:stop)
149
282
  drain_send_queue(@options.linger)
150
283
  @routing.close if @routing.respond_to?(:close)
284
+
151
285
  # Tear down each remaining connection via its lifecycle. The
152
286
  # collection mutates during iteration, so snapshot the values.
153
287
  @connections.values.each(&:close!)
288
+
289
+ # Cascade-cancel every remaining task (reconnect loops, accept
290
+ # loops, supervisors) in one shot.
291
+ @lifecycle.barrier&.stop
154
292
  @lifecycle.finish_closing!
155
293
  @new_pipe.signal
294
+
295
+ # Unblock anyone waiting on peer_connected when the socket is
296
+ # closed before a peer ever arrived.
297
+ @lifecycle.peer_connected.resolve(nil) unless @lifecycle.peer_connected.resolved?
298
+ emit_monitor_event(:closed)
299
+ close_monitor_queue
156
300
  end
157
301
 
158
302
 
@@ -165,34 +309,41 @@ module NNQ
165
309
 
166
310
  private
167
311
 
312
+
313
+ def close_monitor_queue
314
+ return unless @monitor_queue
315
+ @monitor_queue.enqueue(nil)
316
+ end
317
+
318
+
168
319
  def drain_send_queue(timeout)
169
320
  return unless @routing.respond_to?(:send_queue_drained?)
170
321
  return if @connections.empty?
322
+
171
323
  deadline = timeout ? Async::Clock.now + timeout : nil
324
+
172
325
  until @routing.send_queue_drained?
173
326
  break if deadline && (deadline - Async::Clock.now) <= 0
174
327
  sleep 0.001
175
328
  end
329
+ rescue Async::Stop
330
+ # Parent task is being cancelled — stop draining and let close
331
+ # proceed with the rest of teardown instead of propagating the
332
+ # cancellation out of the ensure path.
176
333
  end
177
334
 
178
335
 
179
336
  def spawn_recv_loop(conn)
180
- @lifecycle.parent_task.async(annotation: "nnq recv #{conn.endpoint}") do
337
+ @connections[conn].barrier.async(annotation: "nnq recv #{conn.endpoint}") do
181
338
  loop do
182
339
  body = conn.receive_message
340
+ emit_verbose_msg_received(body)
183
341
  @routing.enqueue(body, conn)
184
- rescue EOFError, IOError, Errno::ECONNRESET, Async::Stop
342
+ rescue *CONNECTION_LOST, Async::Stop
185
343
  break
186
344
  end
187
- ensure
188
- handle_connection_lost(conn)
189
345
  end
190
346
  end
191
347
 
192
-
193
- def transport_for(endpoint)
194
- scheme = endpoint[/\A([a-z+]+):\/\//i, 1] or raise Error, "no scheme: #{endpoint}"
195
- TRANSPORTS[scheme] or raise Error, "unsupported transport: #{scheme}"
196
- end
197
348
  end
198
349
  end
data/lib/nnq/error.rb CHANGED
@@ -1,10 +1,30 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module NNQ
4
- class Error < RuntimeError; end
5
- class ClosedError < Error; end
6
- class ProtocolError < Error; end
7
- class TimeoutError < Error; end
8
- class RequestCancelled < Error; end
9
- class ConnectionRejected < Error; end
4
+ class Error < RuntimeError
5
+ end
6
+
7
+
8
+ class ClosedError < Error
9
+ end
10
+
11
+
12
+ class ProtocolError < Error
13
+ end
14
+
15
+
16
+ class TimeoutError < Error
17
+ end
18
+
19
+
20
+ class RequestCancelled < Error
21
+ end
22
+
23
+
24
+ class ConnectionRejected < Error
25
+ end
26
+
27
+
28
+ class TimedOut < Error
29
+ end
10
30
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NNQ
4
+ # Lifecycle event emitted by {Socket#monitor}.
5
+ #
6
+ # @!attribute [r] type
7
+ # @return [Symbol] event type (:listening, :connected, :disconnected, ...)
8
+ # @!attribute [r] endpoint
9
+ # @return [String, nil] the endpoint involved
10
+ # @!attribute [r] detail
11
+ # @return [Hash, nil] extra context
12
+ #
13
+ MonitorEvent = Data.define(:type, :endpoint, :detail) do
14
+ def initialize(type:, endpoint: nil, detail: nil)
15
+ super
16
+ end
17
+ end
18
+ end
data/lib/nnq/options.rb CHANGED
@@ -18,14 +18,21 @@ module NNQ
18
18
  attr_accessor :reconnect_interval
19
19
  attr_accessor :max_message_size
20
20
  attr_accessor :send_hwm
21
+ attr_accessor :survey_time
21
22
 
22
- def initialize(linger: nil, send_hwm: DEFAULT_HWM)
23
+
24
+ # @param linger [Numeric] linger period in seconds on close
25
+ # (default Float::INFINITY = wait forever, matching libzmq).
26
+ # Pass 0 for immediate drop-on-close.
27
+ def initialize(linger: Float::INFINITY, send_hwm: DEFAULT_HWM)
23
28
  @linger = linger
24
29
  @read_timeout = nil
25
30
  @write_timeout = nil
26
31
  @reconnect_interval = 0.1
27
32
  @max_message_size = nil
28
33
  @send_hwm = send_hwm
34
+ @survey_time = 1.0
29
35
  end
36
+
30
37
  end
31
38
  end
data/lib/nnq/pair.rb CHANGED
@@ -8,8 +8,9 @@ module NNQ
8
8
  # peer. First peer to connect wins; subsequent peers are dropped
9
9
  # until the current one disconnects. No SP header on the wire.
10
10
  #
11
- class PAIR < Socket
11
+ class PAIR0 < Socket
12
12
  def send(body)
13
+ body = frozen_binary(body)
13
14
  Reactor.run { @engine.routing.send(body) }
14
15
  end
15
16
 
@@ -21,6 +22,7 @@ module NNQ
21
22
 
22
23
  private
23
24
 
25
+
24
26
  def protocol
25
27
  Protocol::SP::Protocols::PAIR_V0
26
28
  end
@@ -30,4 +32,7 @@ module NNQ
30
32
  Routing::Pair.new(engine)
31
33
  end
32
34
  end
35
+
36
+
37
+ PAIR = PAIR0
33
38
  end
data/lib/nnq/pub_sub.rb CHANGED
@@ -10,14 +10,16 @@ module NNQ
10
10
  # a slow peer drops messages instead of blocking fast peers.
11
11
  # Defaults to listening.
12
12
  #
13
- class PUB < Socket
13
+ class PUB0 < Socket
14
14
  def send(body)
15
+ body = frozen_binary(body)
15
16
  Reactor.run { @engine.routing.send(body) }
16
17
  end
17
18
 
18
19
 
19
20
  private
20
21
 
22
+
21
23
  def protocol
22
24
  Protocol::SP::Protocols::PUB_V0
23
25
  end
@@ -34,7 +36,7 @@ module NNQ
34
36
  # are delivered — matching nng (unlike pre-4.x ZeroMQ). Defaults
35
37
  # to dialing.
36
38
  #
37
- class SUB < Socket
39
+ class SUB0 < Socket
38
40
  # Subscribes to +prefix+. Bytes-level match. The empty string
39
41
  # matches everything.
40
42
  #
@@ -60,6 +62,7 @@ module NNQ
60
62
 
61
63
  private
62
64
 
65
+
63
66
  def protocol
64
67
  Protocol::SP::Protocols::SUB_V0
65
68
  end
@@ -69,4 +72,8 @@ module NNQ
69
72
  Routing::Sub.new
70
73
  end
71
74
  end
75
+
76
+
77
+ PUB = PUB0
78
+ SUB = SUB0
72
79
  end
data/lib/nnq/push_pull.rb CHANGED
@@ -9,14 +9,16 @@ module NNQ
9
9
  # bounded send queue (`send_hwm`); per-peer send pumps work-steal from
10
10
  # it. Defaults to dialing.
11
11
  #
12
- class PUSH < Socket
12
+ class PUSH0 < Socket
13
13
  def send(body)
14
+ body = frozen_binary(body)
14
15
  Reactor.run { @engine.routing.send(body) }
15
16
  end
16
17
 
17
18
 
18
19
  private
19
20
 
21
+
20
22
  def protocol
21
23
  Protocol::SP::Protocols::PUSH_V0
22
24
  end
@@ -32,14 +34,21 @@ module NNQ
32
34
  # from all live PUSH peers into one unbounded receive queue. Defaults
33
35
  # to listening.
34
36
  #
35
- class PULL < Socket
37
+ class PULL0 < Socket
36
38
  def receive
37
- Reactor.run { @engine.routing.receive }
39
+ Reactor.run do
40
+ if (timeout = @engine.options.read_timeout)
41
+ Fiber.scheduler.with_timeout(timeout) { @engine.routing.receive }
42
+ else
43
+ @engine.routing.receive
44
+ end
45
+ end
38
46
  end
39
47
 
40
48
 
41
49
  private
42
50
 
51
+
43
52
  def protocol
44
53
  Protocol::SP::Protocols::PULL_V0
45
54
  end
@@ -49,4 +58,8 @@ module NNQ
49
58
  Routing::Pull.new
50
59
  end
51
60
  end
61
+
62
+
63
+ PUSH = PUSH0
64
+ PULL = PULL0
52
65
  end
data/lib/nnq/reactor.rb CHANGED
@@ -22,11 +22,14 @@ module NNQ
22
22
  @root_task = nil
23
23
  @work_queue = nil
24
24
 
25
+
25
26
  class << self
26
27
  def root_task
27
28
  return @root_task if @root_task
29
+
28
30
  @mutex.synchronize do
29
31
  return @root_task if @root_task
32
+
30
33
  ready = Thread::Queue.new
31
34
  @work_queue = Async::Queue.new
32
35
  @thread = Thread.new { run_reactor(ready) }
@@ -34,6 +37,7 @@ module NNQ
34
37
  @root_task = ready.pop
35
38
  at_exit { stop! }
36
39
  end
40
+
37
41
  @root_task
38
42
  end
39
43
 
@@ -42,12 +46,10 @@ module NNQ
42
46
  if Async::Task.current?
43
47
  yield
44
48
  else
45
- result = Thread::Queue.new
49
+ result = Async::Promise.new
46
50
  root_task # ensure started
47
51
  @work_queue.push([block, result])
48
- status, value = result.pop
49
- raise value if status == :error
50
- value
52
+ result.wait
51
53
  end
52
54
  end
53
55
 
@@ -61,23 +63,22 @@ module NNQ
61
63
  @work_queue = nil
62
64
  end
63
65
 
66
+
64
67
  private
65
68
 
69
+
66
70
  def run_reactor(ready)
67
71
  Async do |task|
68
72
  ready.push(task)
73
+
69
74
  loop do
70
- item = @work_queue.dequeue
71
- break if item.nil?
75
+ item = @work_queue.dequeue or break
72
76
  block, result = item
73
- task.async do
74
- result.push([:ok, block.call])
75
- rescue => e
76
- result.push([:error, e])
77
- end
77
+ task.async { result.fulfill { block.call } }
78
78
  end
79
79
  end
80
80
  end
81
+
81
82
  end
82
83
  end
83
84
  end
data/lib/nnq/req_rep.rb CHANGED
@@ -9,16 +9,18 @@ module NNQ
9
9
  # request per socket. #send_request blocks until the matching reply
10
10
  # comes back.
11
11
  #
12
- class REQ < Socket
12
+ class REQ0 < Socket
13
13
  # Sends +body+ as a request, blocks until the matching reply
14
14
  # arrives. Returns the reply body (without the id header).
15
15
  def send_request(body)
16
+ body = frozen_binary(body)
16
17
  Reactor.run { @engine.routing.send_request(body) }
17
18
  end
18
19
 
19
20
 
20
21
  private
21
22
 
23
+
22
24
  def protocol
23
25
  Protocol::SP::Protocols::REQ_V0
24
26
  end
@@ -33,7 +35,7 @@ module NNQ
33
35
  # REP (nng rep0): server side of request/reply. Strict alternation
34
36
  # of #receive then #send_reply, per request.
35
37
  #
36
- class REP < Socket
38
+ class REP0 < Socket
37
39
  # Blocks until the next request arrives. Returns the request body.
38
40
  def receive
39
41
  Reactor.run { @engine.routing.receive }
@@ -42,12 +44,14 @@ module NNQ
42
44
 
43
45
  # Routes +body+ back to the pipe the most recent #receive came from.
44
46
  def send_reply(body)
47
+ body = frozen_binary(body)
45
48
  Reactor.run { @engine.routing.send_reply(body) }
46
49
  end
47
50
 
48
51
 
49
52
  private
50
53
 
54
+
51
55
  def protocol
52
56
  Protocol::SP::Protocols::REP_V0
53
57
  end
@@ -57,4 +61,8 @@ module NNQ
57
61
  Routing::Rep.new(engine)
58
62
  end
59
63
  end
64
+
65
+
66
+ REQ = REQ0
67
+ REP = REP0
60
68
  end