nnq 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +77 -0
- data/lib/nnq/bus.rb +37 -0
- data/lib/nnq/connection.rb +9 -2
- data/lib/nnq/engine/connection_lifecycle.rb +61 -10
- data/lib/nnq/engine/reconnect.rb +12 -3
- data/lib/nnq/engine/socket_lifecycle.rb +10 -2
- data/lib/nnq/engine.rb +77 -30
- data/lib/nnq/error.rb +26 -6
- data/lib/nnq/monitor_event.rb +3 -1
- data/lib/nnq/options.rb +8 -1
- data/lib/nnq/pair.rb +6 -1
- data/lib/nnq/pub_sub.rb +9 -2
- data/lib/nnq/push_pull.rb +9 -2
- data/lib/nnq/reactor.rb +12 -11
- data/lib/nnq/req_rep.rb +10 -2
- data/lib/nnq/routing/backtrace.rb +39 -0
- data/lib/nnq/routing/bus.rb +108 -0
- data/lib/nnq/routing/pair.rb +4 -1
- data/lib/nnq/routing/pub.rb +9 -5
- data/lib/nnq/routing/pull.rb +2 -1
- data/lib/nnq/routing/push.rb +2 -0
- data/lib/nnq/routing/rep.rb +6 -21
- data/lib/nnq/routing/req.rb +6 -2
- data/lib/nnq/routing/respondent.rb +84 -0
- data/lib/nnq/routing/send_pump.rb +27 -6
- data/lib/nnq/routing/sub.rb +4 -0
- data/lib/nnq/routing/surveyor.rb +138 -0
- data/lib/nnq/socket.rb +50 -7
- data/lib/nnq/surveyor_respondent.rb +78 -0
- data/lib/nnq/transport/inproc.rb +5 -0
- data/lib/nnq/transport/ipc.rb +3 -0
- data/lib/nnq/transport/tcp.rb +27 -5
- data/lib/nnq/version.rb +1 -1
- data/lib/nnq.rb +2 -0
- metadata +7 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 68a6dd62dc097b93740827c44f95bbfa983d5c7d6072b4625257bd2350ba23fe
|
|
4
|
+
data.tar.gz: 376c1ef08eda16a8950ae703d1798256c8c5ba720cd1e5d0e988f1a87067c093
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f88c29c241ea5922930342a11c00181007cf698045b219c6e7cc111d2563e37e7e56c64efbb61b48efa608eaa6ab3e7104bc33983c6202410f1745a50a082012
|
|
7
|
+
data.tar.gz: e1e42983059b5a280495216a08b6e6ac6a9e6eff9385a2093ad62b9ac7698a80d24a2f36f14c4f4f9579962b7fde9c83a042f04e1819e5963ba2ae94b5cbed4f
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,82 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.5.0 — 2026-04-15
|
|
4
|
+
|
|
5
|
+
- **Send-path freezes the body** — every public send method (PUSH,
|
|
6
|
+
PUB, PAIR, BUS, REQ, REP, SURVEYOR, RESPONDENT) routes the body
|
|
7
|
+
through `Socket#frozen_binary`, which coerces to a frozen binary
|
|
8
|
+
string. Fast path: already frozen and binary → returned as-is, no
|
|
9
|
+
allocation. Slow path: `body.b.freeze` (one copy). Prevents a
|
|
10
|
+
caller from mutating the string after it has been enqueued (the
|
|
11
|
+
body can sit in a send queue or per-peer queue until a pump
|
|
12
|
+
writes it).
|
|
13
|
+
- **Hot-path: no kwargs splat on verbose monitor emit** —
|
|
14
|
+
`emit_verbose_monitor_event(type, **detail)` replaced with dedicated
|
|
15
|
+
`emit_verbose_msg_sent(body)` / `emit_verbose_msg_received(body)`
|
|
16
|
+
helpers. Early-returns before allocating the detail hash, so the
|
|
17
|
+
send/recv loops pay nothing when `-vvv` is off. Send pump also
|
|
18
|
+
hoists the `verbose_monitor` check out of the batch `.each`.
|
|
19
|
+
- **YJIT-friendly `all?` blocks** — `@queues.each_value.all?(&:empty?)`
|
|
20
|
+
→ explicit `{ |q| q.empty? }` in pub/bus/surveyor `drained?`
|
|
21
|
+
(YJIT specializes explicit blocks, not `Symbol#to_proc`).
|
|
22
|
+
- **`Reactor.run` uses `Async::Promise`** — replaces the
|
|
23
|
+
`Thread::Queue` + manual `[:ok,val]`/`[:error,exc]` tagging with a
|
|
24
|
+
single `result.fulfill { block.call }` + `result.wait` pair.
|
|
25
|
+
- **`Engine#spawn_task(parent:)`** — renamed from `barrier:` to make it
|
|
26
|
+
clear any parent barrier is accepted, not just the socket-level one.
|
|
27
|
+
- **`linger` default → `Float::INFINITY`** — matches libzmq parity.
|
|
28
|
+
`Socket#close` waits forever for the send queue to drain. Pass
|
|
29
|
+
`linger: 0` for the old drop-on-close behavior.
|
|
30
|
+
- **`Socket.new` accepts a block** — File.open-style. The socket is
|
|
31
|
+
yielded to the block and `#close`d when the block returns (or
|
|
32
|
+
raises).
|
|
33
|
+
- **`drain_send_queue` rescues `Async::Stop`** — parent-task
|
|
34
|
+
cancellation during close no longer propagates out of the ensure
|
|
35
|
+
path; the rest of teardown runs.
|
|
36
|
+
- **Hot-path `Array#first`** — `send_pump` uses `Array#first` instead
|
|
37
|
+
of `[0]` for YJIT specialization.
|
|
38
|
+
- **Barrier-based cascading teardown** — `SocketLifecycle` owns a
|
|
39
|
+
socket-level `Async::Barrier`; `ConnectionLifecycle` creates a nested
|
|
40
|
+
per-connection barrier. All pumps, accept loops, reconnect loops, and
|
|
41
|
+
supervisors live under these barriers. `Engine#close` calls
|
|
42
|
+
`barrier.stop` once and every descendant unwinds atomically. Replaces
|
|
43
|
+
the manual `@tasks` array.
|
|
44
|
+
- **Per-connection supervisor** — each connection spawns a supervisor
|
|
45
|
+
task (on the socket barrier) that watches for the first pump exit and
|
|
46
|
+
runs `lost!` in `ensure`. Placing the supervisor outside the
|
|
47
|
+
per-connection barrier avoids the self-stop footgun.
|
|
48
|
+
- **Connect timeout** — `Transport::TCP.connect` uses
|
|
49
|
+
`Socket.tcp(host, port, connect_timeout:)` instead of `TCPSocket.new`.
|
|
50
|
+
Timeout derived from `reconnect_interval` (floor 0.5s). Fixes macOS
|
|
51
|
+
hang where IPv6 `connect(2)` never delivers `ECONNREFUSED`.
|
|
52
|
+
- **Handshake timeout** — SP greeting exchange wrapped in
|
|
53
|
+
`Async::Task#with_timeout(handshake_timeout)`. Prevents a hang when a
|
|
54
|
+
non-NNG service accepts the TCP connection but never sends a greeting.
|
|
55
|
+
- **Reconnect after handshake failure** — `ConnectionLifecycle#handshake!`
|
|
56
|
+
now calls `tear_down!(reconnect: true)` on error instead of bare
|
|
57
|
+
`transition!(:closed)`, so the endpoint doesn't go dead when a peer
|
|
58
|
+
RSTs mid-handshake.
|
|
59
|
+
- **Quantized reconnect sleeps** — `Reconnect#quantized_wait` aligns
|
|
60
|
+
retries to wall-clock grid boundaries. Multiple clients reconnecting
|
|
61
|
+
with the same interval wake at the same instant.
|
|
62
|
+
- **Send pump fairness yield** — `Async::Task.current.yield` after each
|
|
63
|
+
batch write ensures peer pumps get a turn when the queue stays
|
|
64
|
+
non-empty.
|
|
65
|
+
- Add `DESIGN.md` documenting the architecture.
|
|
66
|
+
- **Versioned socket names** — `PUSH` → `PUSH0`, `PULL` → `PULL0`, etc.
|
|
67
|
+
Canonical names now include the SP protocol version. Unversioned
|
|
68
|
+
aliases (`NNQ::PUSH = NNQ::PUSH0`) are kept for backward compat.
|
|
69
|
+
- **`raw:` kwarg** — `Socket#initialize` accepts `raw: false`. Plumbing
|
|
70
|
+
for raw-mode routing (device/proxy support). No functional raw
|
|
71
|
+
routing yet.
|
|
72
|
+
- **`NNQ::BUS0`** — best-effort bidirectional mesh (bus0). Fan-out send
|
|
73
|
+
to all peers (drop when full), shared recv queue. Self-pairing.
|
|
74
|
+
- **`NNQ::SURVEYOR0` / `NNQ::RESPONDENT0`** — survey/response pattern
|
|
75
|
+
(survey0). Surveyor broadcasts a survey with a timed reply window
|
|
76
|
+
(`options.survey_time`, default 1s). Respondent echoes the backtrace
|
|
77
|
+
like REP. Shared `Routing::Backtrace` module extracted from REP.
|
|
78
|
+
- **`NNQ::TimedOut`** error raised when the survey window expires.
|
|
79
|
+
|
|
3
80
|
## 0.4.0 — 2026-04-09
|
|
4
81
|
|
|
5
82
|
- `Socket#all_peers_gone` — `Async::Promise` resolving the first time
|
data/lib/nnq/bus.rb
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "socket"
|
|
4
|
+
require_relative "routing/bus"
|
|
5
|
+
|
|
6
|
+
module NNQ
|
|
7
|
+
# BUS (nng bus0): best-effort bidirectional mesh. Every message sent
|
|
8
|
+
# goes to all directly connected peers. Every message received from
|
|
9
|
+
# any peer is delivered to the application. Self-pairing (BUS ↔ BUS).
|
|
10
|
+
#
|
|
11
|
+
# Send never blocks — if a peer's queue is full, the message is
|
|
12
|
+
# dropped for that peer (matching nng's best-effort semantics).
|
|
13
|
+
#
|
|
14
|
+
class BUS0 < Socket
|
|
15
|
+
def send(body)
|
|
16
|
+
body = frozen_binary(body)
|
|
17
|
+
Reactor.run { @engine.routing.send(body) }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def receive
|
|
22
|
+
Reactor.run { @engine.routing.receive }
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def protocol
|
|
29
|
+
Protocol::SP::Protocols::BUS_V0
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def build_routing(engine)
|
|
34
|
+
Routing::Bus.new(engine)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
data/lib/nnq/connection.rb
CHANGED
|
@@ -12,9 +12,11 @@ module NNQ
|
|
|
12
12
|
# @return [Protocol::SP::Connection]
|
|
13
13
|
attr_reader :sp
|
|
14
14
|
|
|
15
|
+
|
|
15
16
|
# @return [String, nil] endpoint URI we connected to / accepted from
|
|
16
17
|
attr_reader :endpoint
|
|
17
18
|
|
|
19
|
+
|
|
18
20
|
# @param sp [Protocol::SP::Connection] handshake-completed SP connection
|
|
19
21
|
# @param endpoint [String, nil]
|
|
20
22
|
def initialize(sp, endpoint: nil)
|
|
@@ -25,7 +27,9 @@ module NNQ
|
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
# @return [Integer] peer protocol id (e.g. Protocols::PULL_V0)
|
|
28
|
-
def peer_protocol
|
|
30
|
+
def peer_protocol
|
|
31
|
+
@sp.peer_protocol
|
|
32
|
+
end
|
|
29
33
|
|
|
30
34
|
|
|
31
35
|
# Writes one message into the SP connection's send buffer (no flush).
|
|
@@ -77,7 +81,9 @@ module NNQ
|
|
|
77
81
|
|
|
78
82
|
|
|
79
83
|
# @return [Boolean]
|
|
80
|
-
def closed?
|
|
84
|
+
def closed?
|
|
85
|
+
@closed
|
|
86
|
+
end
|
|
81
87
|
|
|
82
88
|
|
|
83
89
|
# Closes the underlying SP connection. Safe to call twice.
|
|
@@ -86,5 +92,6 @@ module NNQ
|
|
|
86
92
|
@closed = true
|
|
87
93
|
@sp.close
|
|
88
94
|
end
|
|
95
|
+
|
|
89
96
|
end
|
|
90
97
|
end
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "async/barrier"
|
|
3
4
|
require "protocol/sp"
|
|
4
5
|
require_relative "../connection"
|
|
5
6
|
|
|
@@ -42,6 +43,12 @@ module NNQ
|
|
|
42
43
|
# @return [Symbol]
|
|
43
44
|
attr_reader :state
|
|
44
45
|
|
|
46
|
+
# @return [Async::Barrier] holds all per-connection pump tasks
|
|
47
|
+
# (send pump, recv pump). When the connection is torn down,
|
|
48
|
+
# {#tear_down!} calls `@barrier.stop` to cancel every sibling
|
|
49
|
+
# task atomically.
|
|
50
|
+
attr_reader :barrier
|
|
51
|
+
|
|
45
52
|
|
|
46
53
|
# @param engine [Engine]
|
|
47
54
|
# @param endpoint [String, nil]
|
|
@@ -52,6 +59,7 @@ module NNQ
|
|
|
52
59
|
@framing = framing
|
|
53
60
|
@state = :new
|
|
54
61
|
@conn = nil
|
|
62
|
+
@barrier = Async::Barrier.new(parent: engine.barrier)
|
|
55
63
|
end
|
|
56
64
|
|
|
57
65
|
|
|
@@ -68,13 +76,15 @@ module NNQ
|
|
|
68
76
|
max_message_size: @engine.options.max_message_size,
|
|
69
77
|
framing: @framing,
|
|
70
78
|
)
|
|
71
|
-
sp.handshake!
|
|
79
|
+
Async::Task.current.with_timeout(handshake_timeout) { sp.handshake! }
|
|
72
80
|
ready!(NNQ::Connection.new(sp, endpoint: @endpoint))
|
|
73
81
|
@conn
|
|
74
|
-
rescue =>
|
|
75
|
-
@engine.emit_monitor_event(:handshake_failed, endpoint: @endpoint, detail: { error:
|
|
82
|
+
rescue Protocol::SP::Error, *CONNECTION_LOST, Async::TimeoutError => error
|
|
83
|
+
@engine.emit_monitor_event(:handshake_failed, endpoint: @endpoint, detail: { error: error })
|
|
76
84
|
io.close rescue nil
|
|
77
|
-
|
|
85
|
+
# Full tear-down with reconnect: without this, the endpoint
|
|
86
|
+
# goes dead when a peer RSTs mid-handshake.
|
|
87
|
+
tear_down!(reconnect: true)
|
|
78
88
|
raise
|
|
79
89
|
end
|
|
80
90
|
|
|
@@ -83,16 +93,28 @@ module NNQ
|
|
|
83
93
|
# asks the engine to schedule a reconnect (if the endpoint is in
|
|
84
94
|
# the dialed set and reconnect is still enabled).
|
|
85
95
|
def lost!
|
|
86
|
-
|
|
87
|
-
tear_down!
|
|
88
|
-
@engine.maybe_reconnect(ep)
|
|
96
|
+
tear_down!(reconnect: true)
|
|
89
97
|
end
|
|
90
98
|
|
|
91
99
|
|
|
92
100
|
# Deliberate close (engine shutdown or routing eviction). Does
|
|
93
101
|
# not trigger reconnect.
|
|
94
102
|
def close!
|
|
95
|
-
tear_down!
|
|
103
|
+
tear_down!(reconnect: false)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# Starts a supervisor for this connection. Must be called after
|
|
108
|
+
# all per-connection pumps (recv loop, send pump) have been
|
|
109
|
+
# spawned on the connection barrier. The supervisor blocks until
|
|
110
|
+
# the first pump exits, then runs tear_down! via lost!.
|
|
111
|
+
#
|
|
112
|
+
# Called by Engine#handle_accepted / Engine#handle_connected after
|
|
113
|
+
# spawning the recv loop — routing's connection_added may have
|
|
114
|
+
# already spawned send pumps during ready!, so the barrier is
|
|
115
|
+
# guaranteed non-empty by then.
|
|
116
|
+
def start_supervisor!
|
|
117
|
+
start_supervisor unless @barrier.empty?
|
|
96
118
|
end
|
|
97
119
|
|
|
98
120
|
|
|
@@ -106,7 +128,7 @@ module NNQ
|
|
|
106
128
|
@engine.routing.connection_added(conn) if @engine.routing.respond_to?(:connection_added)
|
|
107
129
|
rescue ConnectionRejected
|
|
108
130
|
@engine.emit_monitor_event(:connection_rejected, endpoint: @endpoint)
|
|
109
|
-
tear_down!
|
|
131
|
+
tear_down!(reconnect: false)
|
|
110
132
|
raise
|
|
111
133
|
end
|
|
112
134
|
@engine.lifecycle.peer_connected.resolve(conn) unless @engine.lifecycle.peer_connected.resolved?
|
|
@@ -116,7 +138,7 @@ module NNQ
|
|
|
116
138
|
end
|
|
117
139
|
|
|
118
140
|
|
|
119
|
-
def tear_down!
|
|
141
|
+
def tear_down!(reconnect: false)
|
|
120
142
|
return if @state == :closed
|
|
121
143
|
transition!(:closed)
|
|
122
144
|
if @conn
|
|
@@ -126,6 +148,35 @@ module NNQ
|
|
|
126
148
|
@engine.emit_monitor_event(:disconnected, endpoint: @endpoint)
|
|
127
149
|
@engine.resolve_all_peers_gone_if_empty
|
|
128
150
|
end
|
|
151
|
+
@engine.maybe_reconnect(@endpoint) if reconnect
|
|
152
|
+
# Cancel every sibling pump of this connection. The caller is
|
|
153
|
+
# the supervisor task, which is NOT in the barrier — so there
|
|
154
|
+
# is no self-stop risk.
|
|
155
|
+
@barrier.stop
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# Spawns a supervisor task on the *socket-level* barrier (not the
|
|
160
|
+
# per-connection barrier) that blocks on the first pump to finish
|
|
161
|
+
# and then triggers teardown.
|
|
162
|
+
def start_supervisor
|
|
163
|
+
@engine.barrier.async(transient: true, annotation: "conn supervisor") do
|
|
164
|
+
@barrier.wait { |task| task.wait; break }
|
|
165
|
+
rescue Async::Stop, Async::Cancel
|
|
166
|
+
rescue *CONNECTION_LOST
|
|
167
|
+
ensure
|
|
168
|
+
lost!
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# Handshake timeout: same logic as TCP.connect_timeout — derived
|
|
174
|
+
# from reconnect_interval (floor 0.5s). Prevents a hang when the
|
|
175
|
+
# peer accepts the TCP connection but never sends an SP greeting.
|
|
176
|
+
def handshake_timeout
|
|
177
|
+
ri = @engine.options.reconnect_interval
|
|
178
|
+
ri = ri.end if ri.is_a?(Range)
|
|
179
|
+
[ri, 0.5].max
|
|
129
180
|
end
|
|
130
181
|
|
|
131
182
|
|
data/lib/nnq/engine/reconnect.rb
CHANGED
|
@@ -55,10 +55,10 @@ module NNQ
|
|
|
55
55
|
def run(parent_task, delay: nil)
|
|
56
56
|
delay, max_delay = init_delay(delay)
|
|
57
57
|
|
|
58
|
-
|
|
58
|
+
parent_task.async(transient: true, annotation: "nnq reconnect #{@endpoint}") do
|
|
59
59
|
loop do
|
|
60
60
|
break if @engine.closed?
|
|
61
|
-
sleep delay if delay > 0
|
|
61
|
+
sleep quantized_wait(delay) if delay > 0
|
|
62
62
|
break if @engine.closed?
|
|
63
63
|
begin
|
|
64
64
|
@engine.transport_for(@endpoint).connect(@endpoint, @engine)
|
|
@@ -70,13 +70,22 @@ module NNQ
|
|
|
70
70
|
end
|
|
71
71
|
rescue Async::Stop
|
|
72
72
|
end
|
|
73
|
-
@engine.tasks << task
|
|
74
73
|
end
|
|
75
74
|
|
|
76
75
|
|
|
77
76
|
private
|
|
78
77
|
|
|
79
78
|
|
|
79
|
+
# Wall-clock quantized sleep: wait until the next +delay+-sized
|
|
80
|
+
# grid tick. Multiple clients reconnecting with the same interval
|
|
81
|
+
# wake up at the same instant, collapsing staggered retries into
|
|
82
|
+
# aligned waves.
|
|
83
|
+
def quantized_wait(delay, now = Time.now.to_f)
|
|
84
|
+
wait = delay - (now % delay)
|
|
85
|
+
wait.positive? ? wait : delay
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
|
|
80
89
|
def init_delay(delay)
|
|
81
90
|
ri = @options.reconnect_interval
|
|
82
91
|
if ri.is_a?(Range)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "async/barrier"
|
|
3
4
|
require "async/promise"
|
|
4
5
|
|
|
5
6
|
module NNQ
|
|
@@ -42,9 +43,14 @@ module NNQ
|
|
|
42
43
|
# Edge-triggered: does not re-arm on reconnect.
|
|
43
44
|
attr_reader :all_peers_gone
|
|
44
45
|
|
|
46
|
+
# @return [Async::Barrier, nil] holds every socket-scoped task
|
|
47
|
+
# (connection supervisors, reconnect loops, accept loops).
|
|
48
|
+
# {Engine#close} calls +barrier.stop+ to cascade teardown
|
|
49
|
+
# through every per-connection barrier in one shot.
|
|
50
|
+
attr_reader :barrier
|
|
51
|
+
|
|
45
52
|
# @return [Boolean] when false, the engine must not schedule new
|
|
46
|
-
# reconnect attempts. Default true.
|
|
47
|
-
# reconnect loop yet, so this currently just records intent.
|
|
53
|
+
# reconnect attempts. Default true.
|
|
48
54
|
attr_accessor :reconnect_enabled
|
|
49
55
|
|
|
50
56
|
|
|
@@ -55,6 +61,7 @@ module NNQ
|
|
|
55
61
|
@peer_connected = Async::Promise.new
|
|
56
62
|
@all_peers_gone = Async::Promise.new
|
|
57
63
|
@reconnect_enabled = true
|
|
64
|
+
@barrier = nil
|
|
58
65
|
end
|
|
59
66
|
|
|
60
67
|
|
|
@@ -75,6 +82,7 @@ module NNQ
|
|
|
75
82
|
return false if @parent_task
|
|
76
83
|
@parent_task = task
|
|
77
84
|
@on_io_thread = on_io_thread
|
|
85
|
+
@barrier = Async::Barrier.new(parent: @parent_task)
|
|
78
86
|
transition!(:open)
|
|
79
87
|
true
|
|
80
88
|
end
|
data/lib/nnq/engine.rb
CHANGED
|
@@ -35,33 +35,44 @@ module NNQ
|
|
|
35
35
|
# @return [Integer] our SP protocol id (e.g. Protocols::PUSH_V0)
|
|
36
36
|
attr_reader :protocol
|
|
37
37
|
|
|
38
|
+
|
|
38
39
|
# @return [Options]
|
|
39
40
|
attr_reader :options
|
|
40
41
|
|
|
42
|
+
|
|
43
|
+
# @return [Routing strategy]
|
|
44
|
+
attr_reader :routing
|
|
45
|
+
|
|
46
|
+
|
|
41
47
|
# @return [Hash{NNQ::Connection => ConnectionLifecycle}]
|
|
42
48
|
attr_reader :connections
|
|
43
49
|
|
|
50
|
+
|
|
44
51
|
# @return [SocketLifecycle]
|
|
45
52
|
attr_reader :lifecycle
|
|
46
53
|
|
|
54
|
+
|
|
47
55
|
# @return [String, nil]
|
|
48
56
|
attr_reader :last_endpoint
|
|
49
57
|
|
|
58
|
+
|
|
50
59
|
# @return [Async::Condition] signaled when a new pipe is registered
|
|
51
60
|
attr_reader :new_pipe
|
|
52
61
|
|
|
62
|
+
|
|
53
63
|
# @return [Set<String>] endpoints we have called #connect on; used
|
|
54
64
|
# to decide whether to schedule a reconnect after a connection
|
|
55
65
|
# is lost.
|
|
56
66
|
attr_reader :dialed
|
|
57
67
|
|
|
58
|
-
# @return [Array<Async::Task>] transient tasks owned by the engine
|
|
59
|
-
# (currently just background reconnect loops). Stopped at #close.
|
|
60
|
-
attr_reader :tasks
|
|
61
|
-
|
|
62
68
|
|
|
63
69
|
# @return [Async::Queue, nil] monitor event queue (set by Socket#monitor)
|
|
64
70
|
attr_accessor :monitor_queue
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# @return [Boolean] when true, {#emit_verbose_monitor_event} forwards
|
|
74
|
+
# per-message traces (:message_sent / :message_received) to the
|
|
75
|
+
# monitor queue. Set by {Socket#monitor} via its +verbose:+ kwarg.
|
|
65
76
|
attr_accessor :verbose_monitor
|
|
66
77
|
|
|
67
78
|
|
|
@@ -80,7 +91,6 @@ module NNQ
|
|
|
80
91
|
@monitor_queue = nil
|
|
81
92
|
@verbose_monitor = false
|
|
82
93
|
@dialed = Set.new
|
|
83
|
-
@tasks = []
|
|
84
94
|
@routing = yield(self)
|
|
85
95
|
end
|
|
86
96
|
|
|
@@ -93,32 +103,51 @@ module NNQ
|
|
|
93
103
|
end
|
|
94
104
|
|
|
95
105
|
|
|
96
|
-
# Emits a verbose
|
|
97
|
-
#
|
|
98
|
-
|
|
106
|
+
# Emits a :message_sent verbose event. Early-returns before
|
|
107
|
+
# allocating the detail hash so the hot send path pays nothing
|
|
108
|
+
# when verbose monitoring is off.
|
|
109
|
+
def emit_verbose_msg_sent(body)
|
|
99
110
|
return unless @verbose_monitor
|
|
100
|
-
emit_monitor_event(
|
|
111
|
+
emit_monitor_event(:message_sent, detail: { body: body })
|
|
101
112
|
end
|
|
102
113
|
|
|
103
114
|
|
|
104
|
-
#
|
|
105
|
-
|
|
115
|
+
# Emits a :message_received verbose event. Same early-return
|
|
116
|
+
# discipline as {#emit_verbose_msg_sent}.
|
|
117
|
+
def emit_verbose_msg_received(body)
|
|
118
|
+
return unless @verbose_monitor
|
|
119
|
+
emit_monitor_event(:message_received, detail: { body: body })
|
|
120
|
+
end
|
|
106
121
|
|
|
107
122
|
|
|
108
123
|
# @return [Async::Task, nil]
|
|
109
|
-
def parent_task
|
|
124
|
+
def parent_task
|
|
125
|
+
@lifecycle.parent_task
|
|
126
|
+
end
|
|
110
127
|
|
|
111
128
|
|
|
112
|
-
|
|
129
|
+
# @return [Async::Barrier, nil]
|
|
130
|
+
def barrier
|
|
131
|
+
@lifecycle.barrier
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def closed?
|
|
136
|
+
@lifecycle.closed?
|
|
137
|
+
end
|
|
113
138
|
|
|
114
139
|
|
|
115
140
|
# @return [Async::Promise] resolves with the first connected peer
|
|
116
|
-
def peer_connected
|
|
141
|
+
def peer_connected
|
|
142
|
+
@lifecycle.peer_connected
|
|
143
|
+
end
|
|
117
144
|
|
|
118
145
|
|
|
119
146
|
# @return [Async::Promise] resolves when all peers have disconnected
|
|
120
147
|
# (edge-triggered, after at least one peer connected)
|
|
121
|
-
def all_peers_gone
|
|
148
|
+
def all_peers_gone
|
|
149
|
+
@lifecycle.all_peers_gone
|
|
150
|
+
end
|
|
122
151
|
|
|
123
152
|
|
|
124
153
|
# Called by ConnectionLifecycle teardown. Resolves `all_peers_gone`
|
|
@@ -129,7 +158,9 @@ module NNQ
|
|
|
129
158
|
|
|
130
159
|
|
|
131
160
|
# @return [Boolean]
|
|
132
|
-
def reconnect_enabled
|
|
161
|
+
def reconnect_enabled
|
|
162
|
+
@lifecycle.reconnect_enabled
|
|
163
|
+
end
|
|
133
164
|
|
|
134
165
|
|
|
135
166
|
# Disables or re-enables automatic reconnect. nnq has no reconnect
|
|
@@ -159,7 +190,7 @@ module NNQ
|
|
|
159
190
|
def bind(endpoint)
|
|
160
191
|
transport = transport_for(endpoint)
|
|
161
192
|
listener = transport.bind(endpoint, self)
|
|
162
|
-
listener.start_accept_loop(@lifecycle.
|
|
193
|
+
listener.start_accept_loop(@lifecycle.barrier) do |io, framing = :tcp|
|
|
163
194
|
handle_accepted(io, endpoint: endpoint, framing: framing)
|
|
164
195
|
end
|
|
165
196
|
@listeners << listener
|
|
@@ -175,11 +206,12 @@ module NNQ
|
|
|
175
206
|
def connect(endpoint)
|
|
176
207
|
@dialed << endpoint
|
|
177
208
|
@last_endpoint = endpoint
|
|
209
|
+
|
|
178
210
|
if endpoint.start_with?("inproc://")
|
|
179
211
|
transport_for(endpoint).connect(endpoint, self)
|
|
180
212
|
else
|
|
181
213
|
emit_monitor_event(:connect_delayed, endpoint: endpoint)
|
|
182
|
-
Reconnect.schedule(endpoint, @options, @lifecycle.
|
|
214
|
+
Reconnect.schedule(endpoint, @options, @lifecycle.barrier, self, delay: 0)
|
|
183
215
|
end
|
|
184
216
|
end
|
|
185
217
|
|
|
@@ -191,7 +223,7 @@ module NNQ
|
|
|
191
223
|
return unless endpoint && @dialed.include?(endpoint)
|
|
192
224
|
return unless @lifecycle.alive? && @lifecycle.reconnect_enabled
|
|
193
225
|
return if endpoint.start_with?("inproc://")
|
|
194
|
-
Reconnect.schedule(endpoint, @options, @lifecycle.
|
|
226
|
+
Reconnect.schedule(endpoint, @options, @lifecycle.barrier, self)
|
|
195
227
|
end
|
|
196
228
|
|
|
197
229
|
|
|
@@ -208,6 +240,7 @@ module NNQ
|
|
|
208
240
|
lifecycle = ConnectionLifecycle.new(self, endpoint: endpoint, framing: framing)
|
|
209
241
|
lifecycle.handshake!(io)
|
|
210
242
|
spawn_recv_loop(lifecycle.conn) if @routing.respond_to?(:enqueue) && @connections.key?(lifecycle.conn)
|
|
243
|
+
lifecycle.start_supervisor!
|
|
211
244
|
rescue ConnectionRejected
|
|
212
245
|
# routing rejected this peer (e.g. PAIR already bonded) — lifecycle cleaned up
|
|
213
246
|
rescue => e
|
|
@@ -220,16 +253,19 @@ module NNQ
|
|
|
220
253
|
lifecycle = ConnectionLifecycle.new(self, endpoint: endpoint, framing: framing)
|
|
221
254
|
lifecycle.handshake!(io)
|
|
222
255
|
spawn_recv_loop(lifecycle.conn) if @routing.respond_to?(:enqueue) && @connections.key?(lifecycle.conn)
|
|
256
|
+
lifecycle.start_supervisor!
|
|
223
257
|
rescue ConnectionRejected
|
|
224
258
|
# unusual on connect side, but handled identically
|
|
225
259
|
end
|
|
226
260
|
|
|
227
261
|
|
|
228
|
-
# Spawns a task under the
|
|
229
|
-
# strategies (e.g. PUSH send
|
|
230
|
-
# the engine's lifecycle
|
|
231
|
-
|
|
232
|
-
|
|
262
|
+
# Spawns a task under the given parent barrier (defaults to the
|
|
263
|
+
# socket-level barrier). Used by routing strategies (e.g. PUSH send
|
|
264
|
+
# pump) to attach long-lived fibers to the engine's lifecycle. The
|
|
265
|
+
# parent barrier tracks every spawned task so teardown is a single
|
|
266
|
+
# barrier.stop call.
|
|
267
|
+
def spawn_task(annotation:, parent: @lifecycle.barrier, &block)
|
|
268
|
+
parent.async(annotation: annotation, &block)
|
|
233
269
|
end
|
|
234
270
|
|
|
235
271
|
|
|
@@ -240,17 +276,22 @@ module NNQ
|
|
|
240
276
|
# to abort with IOError.
|
|
241
277
|
def close
|
|
242
278
|
return unless @lifecycle.alive?
|
|
279
|
+
|
|
243
280
|
@lifecycle.start_closing!
|
|
244
281
|
@listeners.each(&:stop)
|
|
245
|
-
@tasks.each { |t| t.stop rescue nil }
|
|
246
|
-
@tasks.clear
|
|
247
282
|
drain_send_queue(@options.linger)
|
|
248
283
|
@routing.close if @routing.respond_to?(:close)
|
|
284
|
+
|
|
249
285
|
# Tear down each remaining connection via its lifecycle. The
|
|
250
286
|
# collection mutates during iteration, so snapshot the values.
|
|
251
287
|
@connections.values.each(&:close!)
|
|
288
|
+
|
|
289
|
+
# Cascade-cancel every remaining task (reconnect loops, accept
|
|
290
|
+
# loops, supervisors) in one shot.
|
|
291
|
+
@lifecycle.barrier&.stop
|
|
252
292
|
@lifecycle.finish_closing!
|
|
253
293
|
@new_pipe.signal
|
|
294
|
+
|
|
254
295
|
# Unblock anyone waiting on peer_connected when the socket is
|
|
255
296
|
# closed before a peer ever arrived.
|
|
256
297
|
@lifecycle.peer_connected.resolve(nil) unless @lifecycle.peer_connected.resolved?
|
|
@@ -268,6 +309,7 @@ module NNQ
|
|
|
268
309
|
|
|
269
310
|
private
|
|
270
311
|
|
|
312
|
+
|
|
271
313
|
def close_monitor_queue
|
|
272
314
|
return unless @monitor_queue
|
|
273
315
|
@monitor_queue.enqueue(nil)
|
|
@@ -277,26 +319,31 @@ module NNQ
|
|
|
277
319
|
def drain_send_queue(timeout)
|
|
278
320
|
return unless @routing.respond_to?(:send_queue_drained?)
|
|
279
321
|
return if @connections.empty?
|
|
322
|
+
|
|
280
323
|
deadline = timeout ? Async::Clock.now + timeout : nil
|
|
324
|
+
|
|
281
325
|
until @routing.send_queue_drained?
|
|
282
326
|
break if deadline && (deadline - Async::Clock.now) <= 0
|
|
283
327
|
sleep 0.001
|
|
284
328
|
end
|
|
329
|
+
rescue Async::Stop
|
|
330
|
+
# Parent task is being cancelled — stop draining and let close
|
|
331
|
+
# proceed with the rest of teardown instead of propagating the
|
|
332
|
+
# cancellation out of the ensure path.
|
|
285
333
|
end
|
|
286
334
|
|
|
287
335
|
|
|
288
336
|
def spawn_recv_loop(conn)
|
|
289
|
-
@
|
|
337
|
+
@connections[conn].barrier.async(annotation: "nnq recv #{conn.endpoint}") do
|
|
290
338
|
loop do
|
|
291
339
|
body = conn.receive_message
|
|
292
|
-
|
|
340
|
+
emit_verbose_msg_received(body)
|
|
293
341
|
@routing.enqueue(body, conn)
|
|
294
342
|
rescue *CONNECTION_LOST, Async::Stop
|
|
295
343
|
break
|
|
296
344
|
end
|
|
297
|
-
ensure
|
|
298
|
-
handle_connection_lost(conn)
|
|
299
345
|
end
|
|
300
346
|
end
|
|
347
|
+
|
|
301
348
|
end
|
|
302
349
|
end
|
data/lib/nnq/error.rb
CHANGED
|
@@ -1,10 +1,30 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module NNQ
|
|
4
|
-
class Error
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class
|
|
9
|
-
|
|
4
|
+
class Error < RuntimeError
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ClosedError < Error
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ProtocolError < Error
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TimeoutError < Error
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RequestCancelled < Error
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ConnectionRejected < Error
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TimedOut < Error
|
|
29
|
+
end
|
|
10
30
|
end
|
data/lib/nnq/monitor_event.rb
CHANGED