pgnudge 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pgnudge/__init__.py +17 -0
- pgnudge/core.py +39 -0
- pgnudge/engine.py +286 -0
- pgnudge/proto.py +248 -0
- pgnudge/py.typed +0 -0
- pgnudge/wal.py +229 -0
- pgnudge-1.0.0.dist-info/METADATA +219 -0
- pgnudge-1.0.0.dist-info/RECORD +10 -0
- pgnudge-1.0.0.dist-info/WHEEL +4 -0
- pgnudge-1.0.0.dist-info/licenses/LICENSE +21 -0
pgnudge/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Push-only change nudges from PostgreSQL; nothing on the server outlives the connection."""
|
|
2
|
+
|
|
3
|
+
from pgnudge.core import Batch, Event, FeedItem, Resync
|
|
4
|
+
from pgnudge.proto import PgServerError
|
|
5
|
+
from pgnudge.wal import WalFeed
|
|
6
|
+
|
|
7
|
+
__version__ = "1.0.0"
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"Batch",
|
|
11
|
+
"Event",
|
|
12
|
+
"FeedItem",
|
|
13
|
+
"PgServerError",
|
|
14
|
+
"Resync",
|
|
15
|
+
"WalFeed",
|
|
16
|
+
"__version__",
|
|
17
|
+
]
|
pgnudge/core.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Feed contract: the two item types every feed emits.
|
|
2
|
+
|
|
3
|
+
``Resync`` (reload everything) and ``Batch`` (coalesced wakeups) —
|
|
4
|
+
at-least-once, every gap bracketed by a Resync. See README.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import TypeAlias
|
|
9
|
+
|
|
10
|
+
__all__ = ["Event", "Batch", "Resync", "FeedItem"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True, slots=True)
|
|
14
|
+
class Event:
|
|
15
|
+
"""One coalesced wakeup; ``count`` = arrivals of this payload in the window."""
|
|
16
|
+
|
|
17
|
+
payload: str
|
|
18
|
+
first_seen: float # time.time() of first arrival in this batch
|
|
19
|
+
count: int = 1
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True, slots=True)
|
|
23
|
+
class Batch:
|
|
24
|
+
"""A debounce window's worth of events, deduplicated, in arrival order."""
|
|
25
|
+
|
|
26
|
+
events: tuple[Event, ...]
|
|
27
|
+
|
|
28
|
+
def payloads(self) -> tuple[str, ...]:
|
|
29
|
+
return tuple(e.payload for e in self.events)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True, slots=True)
|
|
33
|
+
class Resync:
|
|
34
|
+
"""Reload-everything signal; reason is "connected" | "reconnected" | "overflow" | "failsafe"."""
|
|
35
|
+
|
|
36
|
+
reason: str
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
FeedItem: TypeAlias = Resync | Batch
|
pgnudge/engine.py
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""The machinery behind a feed, one class per concern, pure stdlib.
|
|
2
|
+
|
|
3
|
+
``Intake`` buffers raw wakeups, ``Coalescer`` dedups them, ``Debouncer``
|
|
4
|
+
decides when a window closes, ``Backoff`` paces reconnects, and
|
|
5
|
+
``FeedService`` wires them together behind the async-iterator surface
|
|
6
|
+
that ``BaseFeed`` exposes.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import contextlib
|
|
11
|
+
import random
|
|
12
|
+
import time
|
|
13
|
+
from collections.abc import AsyncIterator, Callable, Coroutine
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from types import TracebackType
|
|
16
|
+
from typing import Self
|
|
17
|
+
|
|
18
|
+
from pgnudge.core import Batch, Event, FeedItem, Resync
|
|
19
|
+
|
|
20
|
+
__all__ = ["Wakeup", "Intake", "Coalescer", "Debouncer", "Backoff", "FeedService", "BaseFeed"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass(frozen=True, slots=True)
|
|
24
|
+
class Wakeup:
|
|
25
|
+
"""One raw arrival from a transport, pre-coalescing."""
|
|
26
|
+
|
|
27
|
+
payload: str
|
|
28
|
+
at: float # time.time() of arrival
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(slots=True, kw_only=True)
|
|
32
|
+
class Intake:
|
|
33
|
+
"""Bounded wakeup buffer; overflow is flagged, never blocks the producer."""
|
|
34
|
+
|
|
35
|
+
maxsize: int
|
|
36
|
+
queue: asyncio.Queue[Wakeup] = field(init=False, repr=False)
|
|
37
|
+
overflowed: bool = field(init=False, default=False)
|
|
38
|
+
|
|
39
|
+
def __post_init__(self) -> None:
|
|
40
|
+
self.queue = asyncio.Queue(maxsize=self.maxsize)
|
|
41
|
+
|
|
42
|
+
def push(self, payload: str) -> None:
|
|
43
|
+
try:
|
|
44
|
+
self.queue.put_nowait(Wakeup(payload=payload, at=time.time()))
|
|
45
|
+
except asyncio.QueueFull:
|
|
46
|
+
self.overflowed = True
|
|
47
|
+
|
|
48
|
+
async def get(self) -> Wakeup:
|
|
49
|
+
return await self.queue.get()
|
|
50
|
+
|
|
51
|
+
async def get_within(self, timeout: float) -> Wakeup | None:
|
|
52
|
+
# asyncio.timeout, not wait_for: 3.11's wait_for can swallow an
|
|
53
|
+
# external cancel when the inner get() already has an item, leaving
|
|
54
|
+
# the pump task uncancellable and aclose() hanging
|
|
55
|
+
try:
|
|
56
|
+
async with asyncio.timeout(timeout):
|
|
57
|
+
return await self.queue.get()
|
|
58
|
+
except TimeoutError:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
def consume_overflow(self) -> bool:
|
|
62
|
+
"""Check-and-clear the overflow flag (set concurrently by push); drains on overflow."""
|
|
63
|
+
if self.overflowed:
|
|
64
|
+
self.overflowed = False
|
|
65
|
+
self.drain()
|
|
66
|
+
return True
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
def drain(self) -> None:
|
|
70
|
+
while True:
|
|
71
|
+
try:
|
|
72
|
+
self.queue.get_nowait()
|
|
73
|
+
except asyncio.QueueEmpty:
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass(slots=True)
|
|
78
|
+
class Coalescer:
|
|
79
|
+
"""Dedup buffer: one ``Event`` per payload, counting arrivals."""
|
|
80
|
+
|
|
81
|
+
pending: dict[str, Event] = field(init=False, default_factory=dict)
|
|
82
|
+
|
|
83
|
+
def add(self, wakeup: Wakeup) -> None:
|
|
84
|
+
prev = self.pending.get(wakeup.payload)
|
|
85
|
+
if prev is None:
|
|
86
|
+
self.pending[wakeup.payload] = Event(payload=wakeup.payload, first_seen=wakeup.at)
|
|
87
|
+
else:
|
|
88
|
+
self.pending[wakeup.payload] = Event(
|
|
89
|
+
payload=prev.payload, first_seen=prev.first_seen, count=prev.count + 1
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def flush(self) -> Batch:
|
|
93
|
+
"""Return the buffered window as a ``Batch`` and reset."""
|
|
94
|
+
batch = Batch(tuple(self.pending.values()))
|
|
95
|
+
self.pending.clear()
|
|
96
|
+
return batch
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass(frozen=True, slots=True, kw_only=True)
|
|
100
|
+
class Debouncer:
|
|
101
|
+
"""Window policy: rolling ``debounce`` quiet period, hard-capped at ``max_batch_wait``."""
|
|
102
|
+
|
|
103
|
+
debounce: float
|
|
104
|
+
max_batch_wait: float
|
|
105
|
+
|
|
106
|
+
async def next_item(self, intake: Intake) -> FeedItem:
|
|
107
|
+
"""Collect one window from ``intake``; overflow yields ``Resync("overflow")``."""
|
|
108
|
+
wakeup = await intake.get()
|
|
109
|
+
if intake.consume_overflow():
|
|
110
|
+
return Resync("overflow")
|
|
111
|
+
|
|
112
|
+
coalescer = Coalescer()
|
|
113
|
+
coalescer.add(wakeup)
|
|
114
|
+
hard_deadline = time.monotonic() + self.max_batch_wait
|
|
115
|
+
while True:
|
|
116
|
+
remaining = min(self.debounce, hard_deadline - time.monotonic())
|
|
117
|
+
if remaining <= 0:
|
|
118
|
+
break
|
|
119
|
+
more = await intake.get_within(remaining)
|
|
120
|
+
if more is None:
|
|
121
|
+
break
|
|
122
|
+
coalescer.add(more)
|
|
123
|
+
|
|
124
|
+
if intake.consume_overflow():
|
|
125
|
+
return Resync("overflow")
|
|
126
|
+
return coalescer.flush()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@dataclass(frozen=True, slots=True)
|
|
130
|
+
class Backoff:
|
|
131
|
+
"""Jittered exponential reconnect delay."""
|
|
132
|
+
|
|
133
|
+
initial: float = 0.1
|
|
134
|
+
maximum: float = 5.0
|
|
135
|
+
|
|
136
|
+
def delay(self, attempt: int) -> float:
|
|
137
|
+
base = min(self.maximum, self.initial * (2.0 ** min(attempt - 1, 16)))
|
|
138
|
+
return base * random.uniform(0.5, 1.5)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclass(slots=True, kw_only=True)
|
|
142
|
+
class FeedService:
|
|
143
|
+
"""Manages the moving parts: intake -> debouncer -> output, tasks, shutdown."""
|
|
144
|
+
|
|
145
|
+
intake: Intake
|
|
146
|
+
debouncer: Debouncer
|
|
147
|
+
failsafe: float | None = None
|
|
148
|
+
out: asyncio.Queue[FeedItem | None] = field(init=False, repr=False) # None = closed
|
|
149
|
+
tasks: list[asyncio.Task[None]] = field(init=False, default_factory=list)
|
|
150
|
+
started: bool = field(init=False, default=False)
|
|
151
|
+
closing: bool = field(init=False, default=False)
|
|
152
|
+
|
|
153
|
+
def __post_init__(self) -> None:
|
|
154
|
+
self.out = asyncio.Queue()
|
|
155
|
+
|
|
156
|
+
# -- transport side --
|
|
157
|
+
|
|
158
|
+
def push(self, payload: str) -> None:
|
|
159
|
+
self.intake.push(payload)
|
|
160
|
+
|
|
161
|
+
def emit(self, item: FeedItem) -> None:
|
|
162
|
+
self.out.put_nowait(item)
|
|
163
|
+
|
|
164
|
+
# -- lifecycle --
|
|
165
|
+
|
|
166
|
+
def start(self, supervisor: Callable[[], Coroutine[None, None, None]], name: str) -> None:
|
|
167
|
+
if self.started:
|
|
168
|
+
return
|
|
169
|
+
self.started = True
|
|
170
|
+
self.tasks.append(asyncio.create_task(supervisor(), name=f"{name}-supervisor"))
|
|
171
|
+
self.tasks.append(asyncio.create_task(self._pump(), name=f"{name}-pump"))
|
|
172
|
+
if self.failsafe is not None:
|
|
173
|
+
self.tasks.append(asyncio.create_task(self._failsafe_loop(self.failsafe), name=f"{name}-failsafe"))
|
|
174
|
+
|
|
175
|
+
async def aclose(self) -> None:
|
|
176
|
+
if self.closing:
|
|
177
|
+
return
|
|
178
|
+
self.closing = True
|
|
179
|
+
for t in self.tasks:
|
|
180
|
+
t.cancel()
|
|
181
|
+
for t in self.tasks:
|
|
182
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
183
|
+
await t
|
|
184
|
+
self.out.put_nowait(None)
|
|
185
|
+
|
|
186
|
+
# -- consumer side --
|
|
187
|
+
|
|
188
|
+
async def next_item(self) -> FeedItem | None:
|
|
189
|
+
"""Next item, or ``None`` once closed."""
|
|
190
|
+
return await self.out.get()
|
|
191
|
+
|
|
192
|
+
# -- internal loops --
|
|
193
|
+
|
|
194
|
+
async def _pump(self) -> None:
|
|
195
|
+
while True:
|
|
196
|
+
self.emit(await self.debouncer.next_item(self.intake))
|
|
197
|
+
|
|
198
|
+
async def _failsafe_loop(self, interval: float) -> None:
|
|
199
|
+
while True:
|
|
200
|
+
await asyncio.sleep(interval)
|
|
201
|
+
self.emit(Resync("failsafe"))
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class BaseFeed:
|
|
205
|
+
"""Async-iterator surface over a ``FeedService``; subclasses provide the transport.
|
|
206
|
+
|
|
207
|
+
Subclasses implement ``_supervisor`` (call ``_emit_resync`` per
|
|
208
|
+
(re)connect, ``_push_raw`` per wakeup) and may override ``_extra_close``.
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
def __init__(
|
|
212
|
+
self,
|
|
213
|
+
*,
|
|
214
|
+
debounce: float = 0.05,
|
|
215
|
+
max_batch_wait: float | None = None,
|
|
216
|
+
failsafe: float | None = None,
|
|
217
|
+
backoff: tuple[float, float] = (0.1, 5.0),
|
|
218
|
+
raw_queue_size: int = 8192,
|
|
219
|
+
) -> None:
|
|
220
|
+
self._service = FeedService(
|
|
221
|
+
intake=Intake(maxsize=raw_queue_size),
|
|
222
|
+
debouncer=Debouncer(
|
|
223
|
+
debounce=debounce,
|
|
224
|
+
max_batch_wait=max_batch_wait if max_batch_wait is not None else debounce * 20,
|
|
225
|
+
),
|
|
226
|
+
failsafe=failsafe,
|
|
227
|
+
)
|
|
228
|
+
self._backoff = Backoff(initial=backoff[0], maximum=backoff[1])
|
|
229
|
+
self.connection_pid: int | None = None # server backend pid while connected
|
|
230
|
+
|
|
231
|
+
# -- lifecycle --
|
|
232
|
+
|
|
233
|
+
async def __aenter__(self) -> Self:
|
|
234
|
+
self._ensure_started()
|
|
235
|
+
return self
|
|
236
|
+
|
|
237
|
+
async def __aexit__(
|
|
238
|
+
self,
|
|
239
|
+
exc_type: type[BaseException] | None,
|
|
240
|
+
exc: BaseException | None,
|
|
241
|
+
tb: TracebackType | None,
|
|
242
|
+
) -> None:
|
|
243
|
+
await self.aclose()
|
|
244
|
+
|
|
245
|
+
def _ensure_started(self) -> None:
|
|
246
|
+
self._service.start(self._supervisor, name=type(self).__name__)
|
|
247
|
+
|
|
248
|
+
async def aclose(self) -> None:
|
|
249
|
+
if self._service.closing:
|
|
250
|
+
return
|
|
251
|
+
await self._service.aclose()
|
|
252
|
+
await self._extra_close()
|
|
253
|
+
self.connection_pid = None
|
|
254
|
+
|
|
255
|
+
async def _extra_close(self) -> None: # pragma: no cover - subclass hook
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
# -- consumer side --
|
|
259
|
+
|
|
260
|
+
def __aiter__(self) -> AsyncIterator[FeedItem]:
|
|
261
|
+
self._ensure_started()
|
|
262
|
+
return self
|
|
263
|
+
|
|
264
|
+
async def __anext__(self) -> FeedItem:
|
|
265
|
+
item = await self._service.next_item()
|
|
266
|
+
if item is None:
|
|
267
|
+
raise StopAsyncIteration
|
|
268
|
+
return item
|
|
269
|
+
|
|
270
|
+
# -- transport-facing helpers --
|
|
271
|
+
|
|
272
|
+
@property
|
|
273
|
+
def _closing(self) -> bool:
|
|
274
|
+
return self._service.closing
|
|
275
|
+
|
|
276
|
+
def _push_raw(self, payload: str) -> None:
|
|
277
|
+
self._service.push(payload)
|
|
278
|
+
|
|
279
|
+
def _emit_resync(self, reason: str) -> None:
|
|
280
|
+
self._service.emit(Resync(reason))
|
|
281
|
+
|
|
282
|
+
def _backoff_delay(self, attempt: int) -> float:
|
|
283
|
+
return self._backoff.delay(attempt)
|
|
284
|
+
|
|
285
|
+
async def _supervisor(self) -> None: # pragma: no cover - abstract
|
|
286
|
+
raise NotImplementedError
|
pgnudge/proto.py
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""Minimal walsender-mode protocol client, stdlib asyncio + scramp.
|
|
2
|
+
|
|
3
|
+
Startup with ``replication=database``, optional TLS, trust/cleartext/SCRAM
|
|
4
|
+
auth, simple query, CopyBoth streaming. See PostgreSQL docs: "Streaming
|
|
5
|
+
Replication Protocol", "Message Formats".
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import contextlib
|
|
10
|
+
import ssl as ssl_module
|
|
11
|
+
import struct
|
|
12
|
+
import time
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from typing import ClassVar, Self
|
|
15
|
+
|
|
16
|
+
from scramp import ScramClient
|
|
17
|
+
|
|
18
|
+
__all__ = ["PgServerError", "XLogData", "Keepalive", "WalsenderConnection"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PgServerError(Exception):
|
|
22
|
+
"""ErrorResponse from the server, with the field map preserved."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, fields: dict[str, str]) -> None:
|
|
25
|
+
self.fields = fields
|
|
26
|
+
super().__init__(f"{fields.get('S', 'ERROR')} {fields.get('C', '?????')}: {fields.get('M', 'unknown')}")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True, slots=True)
|
|
30
|
+
class XLogData:
|
|
31
|
+
end_lsn: int
|
|
32
|
+
payload: bytes
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True, slots=True)
|
|
36
|
+
class Keepalive:
|
|
37
|
+
end_lsn: int
|
|
38
|
+
reply_requested: bool
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _parse_error(body: bytes) -> dict[str, str]:
|
|
42
|
+
fields: dict[str, str] = {}
|
|
43
|
+
i = 0
|
|
44
|
+
while i < len(body) and body[i : i + 1] != b"\x00":
|
|
45
|
+
code = chr(body[i])
|
|
46
|
+
j = body.index(b"\x00", i + 1)
|
|
47
|
+
fields[code] = body[i + 1 : j].decode("utf-8", "replace")
|
|
48
|
+
i = j + 1
|
|
49
|
+
return fields
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _default_ssl_context() -> ssl_module.SSLContext:
|
|
53
|
+
ctx = ssl_module.create_default_context()
|
|
54
|
+
# Azure/managed endpoints commonly need verify-full with the platform CA
|
|
55
|
+
# bundle, which create_default_context gives you. For self-signed dev
|
|
56
|
+
# servers pass your own context with CERT_NONE.
|
|
57
|
+
return ctx
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class WalsenderConnection:
|
|
61
|
+
"""One logical-replication walsender session."""
|
|
62
|
+
|
|
63
|
+
_PROTOCOL_V3: ClassVar[int] = 196608
|
|
64
|
+
_SSL_REQUEST: ClassVar[int] = 80877103
|
|
65
|
+
_PG_EPOCH_UNIX: ClassVar[int] = 946_684_800 # 2000-01-01 00:00:00 UTC
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter, *, tls: bool = False
|
|
69
|
+
) -> None:
|
|
70
|
+
self._reader = reader
|
|
71
|
+
self._writer = writer
|
|
72
|
+
self.tls = tls
|
|
73
|
+
self.backend_pid: int | None = None
|
|
74
|
+
|
|
75
|
+
# -- connection & auth ----------------------------------------------------
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
async def connect(
|
|
79
|
+
cls,
|
|
80
|
+
*,
|
|
81
|
+
host: str,
|
|
82
|
+
port: int,
|
|
83
|
+
user: str,
|
|
84
|
+
database: str,
|
|
85
|
+
password: str | None = None,
|
|
86
|
+
ssl: bool | ssl_module.SSLContext = False,
|
|
87
|
+
application_name: str = "pgnudge",
|
|
88
|
+
connect_timeout: float = 10.0,
|
|
89
|
+
) -> Self:
|
|
90
|
+
reader, writer = await asyncio.wait_for(asyncio.open_connection(host, port), connect_timeout)
|
|
91
|
+
if ssl:
|
|
92
|
+
writer.write(struct.pack("!ii", 8, cls._SSL_REQUEST))
|
|
93
|
+
await writer.drain()
|
|
94
|
+
answer = await reader.readexactly(1)
|
|
95
|
+
if answer != b"S":
|
|
96
|
+
writer.close()
|
|
97
|
+
raise ConnectionError("server refused SSL")
|
|
98
|
+
ctx = ssl if isinstance(ssl, ssl_module.SSLContext) else _default_ssl_context()
|
|
99
|
+
await writer.start_tls(ctx, server_hostname=host)
|
|
100
|
+
conn = cls(reader, writer, tls=bool(ssl))
|
|
101
|
+
try:
|
|
102
|
+
await asyncio.wait_for(
|
|
103
|
+
conn._startup(user=user, database=database, password=password, application_name=application_name),
|
|
104
|
+
connect_timeout,
|
|
105
|
+
)
|
|
106
|
+
except BaseException:
|
|
107
|
+
conn.abort()
|
|
108
|
+
raise
|
|
109
|
+
return conn
|
|
110
|
+
|
|
111
|
+
async def _startup(self, *, user: str, database: str, password: str | None, application_name: str) -> None:
|
|
112
|
+
params = {
|
|
113
|
+
"user": user,
|
|
114
|
+
"database": database,
|
|
115
|
+
"replication": "database",
|
|
116
|
+
"application_name": application_name,
|
|
117
|
+
"client_encoding": "UTF8",
|
|
118
|
+
}
|
|
119
|
+
body = b"".join(k.encode() + b"\x00" + v.encode() + b"\x00" for k, v in params.items()) + b"\x00"
|
|
120
|
+
self._writer.write(struct.pack("!ii", 8 + len(body), self._PROTOCOL_V3) + body)
|
|
121
|
+
await self._writer.drain()
|
|
122
|
+
|
|
123
|
+
scram: ScramClient | None = None
|
|
124
|
+
while True:
|
|
125
|
+
mtype, mbody = await self._read_message()
|
|
126
|
+
if mtype == b"R":
|
|
127
|
+
(code,) = struct.unpack("!i", mbody[:4])
|
|
128
|
+
if code == 0: # AuthenticationOk
|
|
129
|
+
break
|
|
130
|
+
if code == 3: # CleartextPassword
|
|
131
|
+
if not self.tls:
|
|
132
|
+
raise PgServerError(
|
|
133
|
+
{
|
|
134
|
+
"M": "refusing cleartext password on an unencrypted connection; "
|
|
135
|
+
"enable ssl= or use SCRAM-SHA-256"
|
|
136
|
+
}
|
|
137
|
+
)
|
|
138
|
+
if password is None:
|
|
139
|
+
raise PgServerError({"M": "server requested a password but none was given"})
|
|
140
|
+
self._write_message(b"p", password.encode() + b"\x00")
|
|
141
|
+
await self._writer.drain()
|
|
142
|
+
elif code == 10: # SASL
|
|
143
|
+
mechanisms = [m.decode() for m in mbody[4:].split(b"\x00") if m]
|
|
144
|
+
plain = [m for m in mechanisms if not m.endswith("-PLUS")]
|
|
145
|
+
if not plain or password is None:
|
|
146
|
+
raise PgServerError({"M": f"unsupported SASL mechanisms {mechanisms} or missing password"})
|
|
147
|
+
scram = ScramClient(plain, user, password)
|
|
148
|
+
first = scram.get_client_first().encode()
|
|
149
|
+
self._write_message(b"p", scram.mechanism_name.encode() + b"\x00" + struct.pack("!i", len(first)) + first)
|
|
150
|
+
await self._writer.drain()
|
|
151
|
+
elif code == 11: # SASLContinue
|
|
152
|
+
assert scram is not None
|
|
153
|
+
scram.set_server_first(mbody[4:].decode())
|
|
154
|
+
self._write_message(b"p", scram.get_client_final().encode())
|
|
155
|
+
await self._writer.drain()
|
|
156
|
+
elif code == 12: # SASLFinal
|
|
157
|
+
assert scram is not None
|
|
158
|
+
scram.set_server_final(mbody[4:].decode())
|
|
159
|
+
else:
|
|
160
|
+
raise PgServerError({"M": f"unsupported authentication request (code {code}); pgnudge speaks trust, cleartext and SCRAM-SHA-256"})
|
|
161
|
+
elif mtype == b"E":
|
|
162
|
+
raise PgServerError(_parse_error(mbody))
|
|
163
|
+
else: # NoticeResponse etc.
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
while True: # post-auth: BackendKeyData / ReadyForQuery ('S' ParameterStatus skipped)
|
|
167
|
+
mtype, mbody = await self._read_message()
|
|
168
|
+
if mtype == b"K":
|
|
169
|
+
self.backend_pid = struct.unpack("!i", mbody[:4])[0]
|
|
170
|
+
elif mtype == b"Z":
|
|
171
|
+
return
|
|
172
|
+
elif mtype == b"E":
|
|
173
|
+
raise PgServerError(_parse_error(mbody))
|
|
174
|
+
|
|
175
|
+
# -- framing ---------------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
async def _read_message(self) -> tuple[bytes, bytes]:
|
|
178
|
+
header = await self._reader.readexactly(5)
|
|
179
|
+
mtype = header[:1]
|
|
180
|
+
(length,) = struct.unpack("!i", header[1:5])
|
|
181
|
+
body = await self._reader.readexactly(length - 4)
|
|
182
|
+
return mtype, body
|
|
183
|
+
|
|
184
|
+
def _write_message(self, mtype: bytes, body: bytes) -> None:
|
|
185
|
+
self._writer.write(mtype + struct.pack("!i", 4 + len(body)) + body)
|
|
186
|
+
|
|
187
|
+
# -- simple query (the only subprotocol walsender mode speaks) --------------
|
|
188
|
+
|
|
189
|
+
async def simple_query(self, sql: str) -> None:
|
|
190
|
+
"""Run a command and drain to ReadyForQuery; result rows are ignored."""
|
|
191
|
+
self._write_message(b"Q", sql.encode() + b"\x00")
|
|
192
|
+
await self._writer.drain()
|
|
193
|
+
error: dict[str, str] | None = None
|
|
194
|
+
while True:
|
|
195
|
+
mtype, mbody = await self._read_message()
|
|
196
|
+
if mtype == b"E":
|
|
197
|
+
error = _parse_error(mbody)
|
|
198
|
+
elif mtype == b"Z":
|
|
199
|
+
if error is not None:
|
|
200
|
+
raise PgServerError(error)
|
|
201
|
+
return
|
|
202
|
+
# 'T' RowDescription, 'D' DataRow, 'C' CommandComplete, 'N' Notice: skipped
|
|
203
|
+
|
|
204
|
+
# -- CopyBoth streaming ------------------------------------------------------
|
|
205
|
+
|
|
206
|
+
async def start_replication(self, command: str) -> None:
|
|
207
|
+
"""Send START_REPLICATION and consume up to CopyBothResponse."""
|
|
208
|
+
self._write_message(b"Q", command.encode() + b"\x00")
|
|
209
|
+
await self._writer.drain()
|
|
210
|
+
while True:
|
|
211
|
+
mtype, mbody = await self._read_message()
|
|
212
|
+
if mtype == b"W":
|
|
213
|
+
return
|
|
214
|
+
if mtype == b"E":
|
|
215
|
+
raise PgServerError(_parse_error(mbody))
|
|
216
|
+
|
|
217
|
+
async def read_stream(self) -> XLogData | Keepalive:
|
|
218
|
+
"""Read the next replication message. Raises on stream end or error."""
|
|
219
|
+
while True:
|
|
220
|
+
mtype, mbody = await self._read_message()
|
|
221
|
+
if mtype == b"d":
|
|
222
|
+
kind = mbody[:1]
|
|
223
|
+
if kind == b"w":
|
|
224
|
+
_start, end, _ts = struct.unpack("!QQQ", mbody[1:25])
|
|
225
|
+
return XLogData(end_lsn=end, payload=mbody[25:])
|
|
226
|
+
if kind == b"k":
|
|
227
|
+
end, _ts, reply = struct.unpack("!QQB", mbody[1:18])
|
|
228
|
+
return Keepalive(end_lsn=end, reply_requested=bool(reply))
|
|
229
|
+
continue # unknown CopyData subtype
|
|
230
|
+
if mtype == b"E":
|
|
231
|
+
raise PgServerError(_parse_error(mbody))
|
|
232
|
+
if mtype in (b"c", b"C", b"Z"):
|
|
233
|
+
raise ConnectionResetError("replication stream ended")
|
|
234
|
+
|
|
235
|
+
async def send_standby_status(self, lsn: int, *, reply: bool = False) -> None:
|
|
236
|
+
"""Acknowledge everything up to ``lsn``; ``reply`` asks the server to answer with a keepalive."""
|
|
237
|
+
ts = int((time.time() - self._PG_EPOCH_UNIX) * 1_000_000)
|
|
238
|
+
self._write_message(b"d", b"r" + struct.pack("!QQQQB", lsn, lsn, lsn, ts, int(reply)))
|
|
239
|
+
await self._writer.drain()
|
|
240
|
+
|
|
241
|
+
# -- teardown ----------------------------------------------------------------
|
|
242
|
+
|
|
243
|
+
def abort(self) -> None:
|
|
244
|
+
"""Hard-close the socket, no protocol goodbye — slot cleanup must survive crashes."""
|
|
245
|
+
with contextlib.suppress(Exception):
|
|
246
|
+
transport = self._writer.transport
|
|
247
|
+
if isinstance(transport, asyncio.WriteTransport):
|
|
248
|
+
transport.abort()
|
pgnudge/py.typed
ADDED
|
File without changes
|
pgnudge/wal.py
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""WalFeed: logical decoding from a TEMPORARY replication slot.
|
|
2
|
+
|
|
3
|
+
The slot auto-drops when the session ends, cleanly or not — nothing pgnudge
|
|
4
|
+
creates outlives the connection. From-connect-only: a fresh slot per
|
|
5
|
+
(re)connect, no history, no backfill. Semantics and the gap-free handshake
|
|
6
|
+
argument: README. Server needs ``wal_level=logical``, a REPLICATION role,
|
|
7
|
+
and an output plugin (wal2json or test_decoding).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import contextlib
|
|
12
|
+
import json
|
|
13
|
+
import logging
|
|
14
|
+
import os
|
|
15
|
+
import re
|
|
16
|
+
import secrets
|
|
17
|
+
import ssl as ssl_module
|
|
18
|
+
import time
|
|
19
|
+
from typing import ClassVar
|
|
20
|
+
|
|
21
|
+
from pgnudge.engine import BaseFeed
|
|
22
|
+
from pgnudge.proto import WalsenderConnection, XLogData
|
|
23
|
+
|
|
24
|
+
__all__ = ["WalFeed"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _quote_value(v: str) -> str:
|
|
28
|
+
return "'" + v.replace("'", "''") + "'"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class WalFeed(BaseFeed):
|
|
32
|
+
"""Async-iterable ``Resync | Batch`` feed from a temporary logical slot.
|
|
33
|
+
|
|
34
|
+
Payloads are ``schema.table``. ``tables`` filters server-side (wal2json
|
|
35
|
+
only); ``ssl`` takes True or an ``ssl.SSLContext``; ``status_interval``
|
|
36
|
+
must stay under the server's ``wal_sender_timeout`` (default 60 s).
|
|
37
|
+
``liveness_timeout`` (must exceed ``status_interval``; None disables)
|
|
38
|
+
bounds how long the feed tolerates a silent server before reconnecting.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
log: ClassVar[logging.Logger] = logging.getLogger("pgnudge.wal")
|
|
42
|
+
|
|
43
|
+
_TEST_DECODING_RE: ClassVar[re.Pattern[str]] = re.compile(
|
|
44
|
+
r"^table (.+?): (?:INSERT|UPDATE|DELETE|TRUNCATE)"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
*,
|
|
50
|
+
host: str = "127.0.0.1",
|
|
51
|
+
port: int = 5432,
|
|
52
|
+
user: str,
|
|
53
|
+
database: str,
|
|
54
|
+
password: str | None = None,
|
|
55
|
+
ssl: bool | ssl_module.SSLContext = False,
|
|
56
|
+
tables: list[str] | None = None,
|
|
57
|
+
plugin: str = "wal2json",
|
|
58
|
+
application_name: str = "pgnudge",
|
|
59
|
+
status_interval: float = 10.0,
|
|
60
|
+
liveness_timeout: float | None = 30.0,
|
|
61
|
+
connect_timeout: float = 10.0,
|
|
62
|
+
debounce: float = 0.05,
|
|
63
|
+
max_batch_wait: float | None = None,
|
|
64
|
+
failsafe: float | None = None,
|
|
65
|
+
backoff: tuple[float, float] = (0.1, 5.0),
|
|
66
|
+
raw_queue_size: int = 8192,
|
|
67
|
+
) -> None:
|
|
68
|
+
super().__init__(
|
|
69
|
+
debounce=debounce,
|
|
70
|
+
max_batch_wait=max_batch_wait,
|
|
71
|
+
failsafe=failsafe,
|
|
72
|
+
backoff=backoff,
|
|
73
|
+
raw_queue_size=raw_queue_size,
|
|
74
|
+
)
|
|
75
|
+
if plugin not in ("wal2json", "test_decoding"):
|
|
76
|
+
raise ValueError(f"unsupported plugin {plugin!r}")
|
|
77
|
+
self._host = host
|
|
78
|
+
self._port = port
|
|
79
|
+
self._user = user
|
|
80
|
+
self._database = database
|
|
81
|
+
self._password = password
|
|
82
|
+
self._ssl = ssl
|
|
83
|
+
self._tables = list(tables) if tables else None
|
|
84
|
+
self._plugin = plugin
|
|
85
|
+
self._application_name = application_name
|
|
86
|
+
self._status_interval = status_interval
|
|
87
|
+
self.liveness_timeout = liveness_timeout
|
|
88
|
+
self._connect_timeout = connect_timeout
|
|
89
|
+
|
|
90
|
+
self._conn: WalsenderConnection | None = None
|
|
91
|
+
self._last_lsn = 0
|
|
92
|
+
self.last_inbound = time.monotonic()
|
|
93
|
+
self.slot_name: str | None = None
|
|
94
|
+
|
|
95
|
+
# -- payload parsing ----------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
@staticmethod
|
|
98
|
+
def _parse_wal2json_v2(payload: bytes) -> list[str]:
|
|
99
|
+
try:
|
|
100
|
+
obj: object = json.loads(payload)
|
|
101
|
+
except ValueError:
|
|
102
|
+
return []
|
|
103
|
+
if isinstance(obj, dict) and obj.get("action") in ("I", "U", "D", "T"):
|
|
104
|
+
return [f"{obj.get('schema', '?')}.{obj.get('table', '?')}"]
|
|
105
|
+
return []
|
|
106
|
+
|
|
107
|
+
@classmethod
|
|
108
|
+
def _parse_test_decoding(cls, payload: bytes) -> list[str]:
|
|
109
|
+
# TRUNCATE lists every affected table on one line, ", "-joined
|
|
110
|
+
m = cls._TEST_DECODING_RE.match(payload.decode("utf-8", "replace"))
|
|
111
|
+
return m.group(1).split(", ") if m else []
|
|
112
|
+
|
|
113
|
+
# -- teardown ---------------------------------------------------------------
|
|
114
|
+
|
|
115
|
+
async def _extra_close(self) -> None:
|
|
116
|
+
# Hard-close on purpose, no DROP: crash and clean exit must exercise
|
|
117
|
+
# the same server-side cleanup path.
|
|
118
|
+
if self._conn is not None:
|
|
119
|
+
self._conn.abort()
|
|
120
|
+
self._conn = None
|
|
121
|
+
self.slot_name = None
|
|
122
|
+
|
|
123
|
+
# -- replication command assembly --------------------------------------------
|
|
124
|
+
|
|
125
|
+
def _plugin_options(self) -> str:
|
|
126
|
+
if self._plugin == "wal2json":
|
|
127
|
+
opts = [('"format-version"', "2"), ('"include-transaction"', "false")]
|
|
128
|
+
if self._tables:
|
|
129
|
+
opts.append(('"add-tables"', ",".join(self._tables)))
|
|
130
|
+
return ", ".join(f"{name} {_quote_value(value)}" for name, value in opts)
|
|
131
|
+
return '"skip-empty-xacts" \'1\''
|
|
132
|
+
|
|
133
|
+
# -- supervisor ---------------------------------------------------------------
|
|
134
|
+
|
|
135
|
+
async def _supervisor(self) -> None:
|
|
136
|
+
parse = self._parse_wal2json_v2 if self._plugin == "wal2json" else self._parse_test_decoding
|
|
137
|
+
attempt = 0
|
|
138
|
+
first = True
|
|
139
|
+
while not self._closing:
|
|
140
|
+
try:
|
|
141
|
+
conn = await WalsenderConnection.connect(
|
|
142
|
+
host=self._host,
|
|
143
|
+
port=self._port,
|
|
144
|
+
user=self._user,
|
|
145
|
+
database=self._database,
|
|
146
|
+
password=self._password,
|
|
147
|
+
ssl=self._ssl,
|
|
148
|
+
application_name=self._application_name,
|
|
149
|
+
connect_timeout=self._connect_timeout,
|
|
150
|
+
)
|
|
151
|
+
except Exception as exc:
|
|
152
|
+
self.log.warning("connect to %s:%d failed: %s", self._host, self._port, exc)
|
|
153
|
+
attempt += 1
|
|
154
|
+
delay = self._backoff_delay(attempt)
|
|
155
|
+
self.log.debug("reconnect attempt %d in %.2fs", attempt, delay)
|
|
156
|
+
await asyncio.sleep(delay)
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
self._conn = conn
|
|
160
|
+
self.connection_pid = conn.backend_pid
|
|
161
|
+
slot = f"pgnudge_{os.getpid()}_{secrets.token_hex(3)}"
|
|
162
|
+
feedback: asyncio.Task[None] | None = None
|
|
163
|
+
try:
|
|
164
|
+
await conn.simple_query(
|
|
165
|
+
# SNAPSHOT 'nothing': from-connect-only, the Resync refetch is the backfill
|
|
166
|
+
f'CREATE_REPLICATION_SLOT "{slot}" TEMPORARY LOGICAL {self._plugin} (SNAPSHOT \'nothing\')'
|
|
167
|
+
)
|
|
168
|
+
await conn.start_replication(
|
|
169
|
+
f'START_REPLICATION SLOT "{slot}" LOGICAL 0/0 ({self._plugin_options()})'
|
|
170
|
+
)
|
|
171
|
+
self.slot_name = slot
|
|
172
|
+
attempt = 0
|
|
173
|
+
self._emit_resync("connected" if first else "reconnected")
|
|
174
|
+
self.log.info("streaming from slot %s (backend pid %s)", slot, conn.backend_pid)
|
|
175
|
+
first = False
|
|
176
|
+
|
|
177
|
+
self._last_lsn = 0
|
|
178
|
+
self.last_inbound = time.monotonic()
|
|
179
|
+
feedback = asyncio.create_task(self._feedback_loop(conn))
|
|
180
|
+
while True:
|
|
181
|
+
msg = await conn.read_stream()
|
|
182
|
+
self.last_inbound = time.monotonic()
|
|
183
|
+
if isinstance(msg, XLogData):
|
|
184
|
+
self._last_lsn = max(self._last_lsn, msg.end_lsn)
|
|
185
|
+
for table in parse(msg.payload):
|
|
186
|
+
self._push_raw(table)
|
|
187
|
+
else: # Keepalive — read_stream returns nothing else
|
|
188
|
+
self._last_lsn = max(self._last_lsn, msg.end_lsn)
|
|
189
|
+
if msg.reply_requested:
|
|
190
|
+
await conn.send_standby_status(self._last_lsn)
|
|
191
|
+
except asyncio.CancelledError:
|
|
192
|
+
raise
|
|
193
|
+
except Exception as exc:
|
|
194
|
+
# fall through to reconnect with a fresh slot
|
|
195
|
+
self.log.warning("stream error on slot %s, reconnecting: %s", slot, exc)
|
|
196
|
+
finally:
|
|
197
|
+
if feedback is not None:
|
|
198
|
+
feedback.cancel()
|
|
199
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
200
|
+
await feedback
|
|
201
|
+
self.connection_pid = None
|
|
202
|
+
self.slot_name = None
|
|
203
|
+
self._conn = None
|
|
204
|
+
conn.abort()
|
|
205
|
+
|
|
206
|
+
if not self._closing:
|
|
207
|
+
attempt += 1
|
|
208
|
+
delay = self._backoff_delay(attempt)
|
|
209
|
+
self.log.debug("reconnect attempt %d in %.2fs", attempt, delay)
|
|
210
|
+
await asyncio.sleep(delay)
|
|
211
|
+
|
|
212
|
+
async def _feedback_loop(self, conn: WalsenderConnection) -> None:
|
|
213
|
+
# With liveness on, every status requests a keepalive back, so a
|
|
214
|
+
# healthy connection has inbound traffic every status_interval and
|
|
215
|
+
# silence beyond liveness_timeout means the link or walsender is
|
|
216
|
+
# dead. abort() breaks the supervisor's blocked read -> reconnect.
|
|
217
|
+
probe = self.liveness_timeout is not None
|
|
218
|
+
while True:
|
|
219
|
+
await asyncio.sleep(self._status_interval)
|
|
220
|
+
idle = time.monotonic() - self.last_inbound
|
|
221
|
+
if self.liveness_timeout is not None and idle > self.liveness_timeout:
|
|
222
|
+
self.log.warning("no server traffic for %.1fs; aborting connection", idle)
|
|
223
|
+
conn.abort()
|
|
224
|
+
return
|
|
225
|
+
try:
|
|
226
|
+
await conn.send_standby_status(self._last_lsn, reply=probe)
|
|
227
|
+
except Exception as exc:
|
|
228
|
+
self.log.debug("standby status send failed: %s", exc)
|
|
229
|
+
return
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pgnudge
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Push-only change nudges from PostgreSQL logical replication: the database nudges, consumers refetch.
|
|
5
|
+
Project-URL: Homepage, https://github.com/janbjorge/pgnudge
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: asyncio,cache-invalidation,change-feed,logical-replication,postgresql
|
|
9
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
10
|
+
Classifier: Framework :: AsyncIO
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Classifier: Topic :: Database
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Requires-Dist: scramp>=1.4
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# pgnudge
|
|
23
|
+
|
|
24
|
+
**Push-only change nudges from PostgreSQL — nothing left behind on the server.**
|
|
25
|
+
|
|
26
|
+
[](https://github.com/janbjorge/pgnudge/actions/workflows/ci.yml)
|
|
27
|
+
[](https://pypi.org/project/pgnudge/)
|
|
28
|
+
[](https://pypi.org/project/pgnudge/)
|
|
29
|
+
[](LICENSE)
|
|
30
|
+
|
|
31
|
+
Your database moves; your app wakes up. pgnudge tells you *that* something
|
|
32
|
+
changed and *which tables* — you already know how to load the data. Built
|
|
33
|
+
for live read models: dashboards, cache invalidation, anything that
|
|
34
|
+
renders a query and wants to re-render the instant the database moves.
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
pip install pgnudge
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Python ≥ 3.11, PostgreSQL ≥ 16. One dependency:
|
|
41
|
+
[scramp](https://github.com/tlocke/scramp) (pure-Python SCRAM auth). No
|
|
42
|
+
database driver — pgnudge speaks the PostgreSQL replication protocol
|
|
43
|
+
itself.
|
|
44
|
+
|
|
45
|
+
## Sixty-second tour
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from pgnudge import Batch, Resync, WalFeed
|
|
49
|
+
|
|
50
|
+
async with WalFeed(
|
|
51
|
+
host="db.example.com", user="wal_user", password=...,
|
|
52
|
+
database="app", ssl=True,
|
|
53
|
+
tables=["public.orders", "public.stations"], # server-side filter
|
|
54
|
+
debounce=0.05,
|
|
55
|
+
) as feed:
|
|
56
|
+
async for item in feed:
|
|
57
|
+
match item:
|
|
58
|
+
case Resync(): # connected / reconnected / overflow / failsafe
|
|
59
|
+
await reload_everything()
|
|
60
|
+
case Batch(events=evs): # coalesced wakeups: which tables moved
|
|
61
|
+
await reload(tables={e.payload for e in evs})
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
There is no step 1. Nothing to install in the database, nothing to migrate,
|
|
65
|
+
nothing to revert. Close the connection and the server forgets pgnudge ever
|
|
66
|
+
existed.
|
|
67
|
+
|
|
68
|
+
## The guarantee
|
|
69
|
+
|
|
70
|
+
`WalFeed` creates **nothing on the server that outlives the connection.**
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
your app ──── async for item in feed ────▶ Resync | Batch
|
|
74
|
+
▲
|
|
75
|
+
│ walsender protocol (TLS, SCRAM-SHA-256, CopyBoth) — no driver
|
|
76
|
+
│
|
|
77
|
+
PostgreSQL ── TEMPORARY replication slot ── logical decoding
|
|
78
|
+
└── dropped by the server the instant the session ends,
|
|
79
|
+
cleanly or not
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
The temporary replication slot is the only primitive in PostgreSQL that
|
|
83
|
+
gives you a change feed with connection-scoped lifetime: the server is
|
|
84
|
+
contractually obliged to drop it the moment the session ends — cleanly, by
|
|
85
|
+
crash, by `kill -9`, by yanked cable, by `pg_terminate_backend`. No
|
|
86
|
+
triggers, no functions, no catalog objects, no persistent slots, no cleanup
|
|
87
|
+
jobs. The test suite ends by hard-aborting the socket with no protocol
|
|
88
|
+
goodbye and asserting `pg_replication_slots` is empty.
|
|
89
|
+
|
|
90
|
+
What *is* required is PostgreSQL 16+ and one-time server **configuration**
|
|
91
|
+
(settings, not objects — nothing accumulates): `wal_level = logical`, a
|
|
92
|
+
role with `REPLICATION`, and an output plugin — `wal2json` (default;
|
|
93
|
+
preinstalled on Azure Flexible Server, RDS, and most managed platforms) or
|
|
94
|
+
`test_decoding` (ships inside PostgreSQL itself).
|
|
95
|
+
|
|
96
|
+
The full mechanics — logical decoding, temporary-slot semantics, the
|
|
97
|
+
gap-free handshake argument, and **when not to use pgnudge** — are in
|
|
98
|
+
[docs/temporary-slots.md](docs/temporary-slots.md).
|
|
99
|
+
|
|
100
|
+
## The contract
|
|
101
|
+
|
|
102
|
+
A feed yields exactly two item types:
|
|
103
|
+
|
|
104
|
+
- **`Resync(reason)`** — reload everything. Emitted on every connect and
|
|
105
|
+
reconnect, on internal queue overflow, and (optionally) on a failsafe
|
|
106
|
+
interval. Handle `Resync` correctly and nothing can make your view wrong.
|
|
107
|
+
- **`Batch(events)`** — one debounce window's worth of wakeups,
|
|
108
|
+
deduplicated, in arrival order. Each `Event` carries `payload`
|
|
109
|
+
(`schema.table` — the stable v1 payload contract), `first_seen`, `count`.
|
|
110
|
+
|
|
111
|
+
**Delivery is at-least-once wakeups, from the point of connect only.**
|
|
112
|
+
Events are hints to refetch, never facts to apply. There is no history and
|
|
113
|
+
no backfill, by design and by mechanism: the slot is created fresh at every
|
|
114
|
+
(re)connect with `SNAPSHOT 'nothing'`, and a logical slot can only decode
|
|
115
|
+
forward from its creation point. The handshake is gap-free — `Resync` is
|
|
116
|
+
emitted only after the stream is live, so the refetch it triggers observes
|
|
117
|
+
a state at or after the slot's start point, and every later commit produces
|
|
118
|
+
a nudge; anything landing in between is simply covered twice, which
|
|
119
|
+
at-least-once absorbs. On reconnect `WalFeed` resyncs rather than resumes.
|
|
120
|
+
No replay, no exactly-once, no row images, *on purpose*: refetching is
|
|
121
|
+
idempotent and you have a database right there. (One nuance: slot creation
|
|
122
|
+
waits for write transactions in flight at connect time, so a long-running
|
|
123
|
+
write delays connect — it never causes history to be delivered.)
|
|
124
|
+
|
|
125
|
+
**Coalescing:** per-row changes within the debounce window collapse
|
|
126
|
+
client-side into one `Event` with a `count` — a 500-row transaction on one
|
|
127
|
+
table is one `Event`, `count=500`, one wakeup, one refetch.
|
|
128
|
+
|
|
129
|
+
`INSERT`, `UPDATE`, `DELETE`, and `TRUNCATE` all nudge. Logical decoding
|
|
130
|
+
does not carry other DDL, so schema changes don't — pair migrations with a
|
|
131
|
+
refetch if your view depends on them.
|
|
132
|
+
|
|
133
|
+
## Why not LISTEN/NOTIFY?
|
|
134
|
+
|
|
135
|
+
`NOTIFY` doesn't fire itself: making it track data changes means triggers,
|
|
136
|
+
and triggers are persistent catalog objects — schema footprint, migration
|
|
137
|
+
reviews, cleanup jobs, drift. pgnudge's whole premise is refusing that
|
|
138
|
+
trade. Logical decoding gets the same wakeups straight from the WAL with
|
|
139
|
+
zero objects. (LISTEN is still great on the *consuming* side — see Fan-out.)
|
|
140
|
+
|
|
141
|
+
## Fan-out
|
|
142
|
+
|
|
143
|
+
One `WalFeed` per process is the normal shape. For many consumers, run one
|
|
144
|
+
`WalFeed` in a small bridge daemon that republishes to a NOTIFY channel via
|
|
145
|
+
`pg_notify`, and let consumers attach with plain LISTEN (any driver —
|
|
146
|
+
LISTEN is session state, zero objects). One REPLICATION grant total, one
|
|
147
|
+
decoding pass total, and still zero persistent server objects: the bridge's
|
|
148
|
+
temp slot dies with the bridge.
|
|
149
|
+
|
|
150
|
+
## Ops notes
|
|
151
|
+
|
|
152
|
+
- `status_interval` (default 10 s) must stay under the server's
|
|
153
|
+
`wal_sender_timeout` (default 60 s); the feed also answers
|
|
154
|
+
reply-requested keepalives immediately.
|
|
155
|
+
- `liveness_timeout` (default 30 s, must exceed `status_interval`,
|
|
156
|
+
`None` disables): each status report asks the server to answer with a
|
|
157
|
+
keepalive, so a healthy connection always has inbound traffic — silence
|
|
158
|
+
longer than the timeout means a dead link (NAT drop, yanked VPN, hung
|
|
159
|
+
walsender) and the feed aborts and reconnects instead of blocking
|
|
160
|
+
forever.
|
|
161
|
+
- While connected, each `WalFeed` holds one replication slot and one WAL
|
|
162
|
+
sender against `max_replication_slots` / `max_wal_senders`. Disconnected
|
|
163
|
+
feeds hold nothing — that's the point — which also means an idle feed
|
|
164
|
+
never retains WAL.
|
|
165
|
+
- Managed platforms: enabling `wal_level=logical` typically requires a
|
|
166
|
+
restart (once); grant `REPLICATION` to a dedicated role rather than
|
|
167
|
+
widening an app role — logical decoding sees the whole database's stream.
|
|
168
|
+
- Thundering herd: a database restart reconnects every feed at once, and
|
|
169
|
+
every consumer's `Resync` handler refetches at once. Reconnect timing is
|
|
170
|
+
already jittered, but the refetch is your code — add jitter there when
|
|
171
|
+
many consumers share a database, or fan out through the bridge daemon so
|
|
172
|
+
a single process refetches per change.
|
|
173
|
+
- TLS: `ssl=True` uses platform CA verification; pass an `ssl.SSLContext`
|
|
174
|
+
for custom trust. SCRAM-SHA-256 is supported everywhere; cleartext auth
|
|
175
|
+
only over TLS — pgnudge refuses to send a password on an unencrypted
|
|
176
|
+
connection.
|
|
177
|
+
- Logging: the `pgnudge.wal` logger (stdlib `logging`, no handlers
|
|
178
|
+
configured by the library) reports connect failures and stream errors at
|
|
179
|
+
WARNING, successful (re)connects at INFO, and backoff timing at DEBUG —
|
|
180
|
+
a feed that reconnects in a loop is visible, not silent.
|
|
181
|
+
|
|
182
|
+
## Tested how
|
|
183
|
+
|
|
184
|
+
The suite spins up real PostgreSQL via testcontainers (nothing to install
|
|
185
|
+
beyond Docker) and proves the claims live: no backfill of pre-connect
|
|
186
|
+
writes, client-side coalescing (50-row txn → one `Event`, `count=50`),
|
|
187
|
+
reconnect gets a fresh slot with the old one auto-dropped, TLS + SCRAM over
|
|
188
|
+
an encrypted stream, and the flagship — hard socket abort with no protocol
|
|
189
|
+
goodbye leaves `pg_replication_slots` empty.
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
uv sync && uv run pytest
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
## Non-goals
|
|
196
|
+
|
|
197
|
+
- **Not a queue.** No durability, no competing consumers, no retries. If a
|
|
198
|
+
message must be processed, use a job queue
|
|
199
|
+
(e.g. [pgqueuer](https://github.com/janbjorge/pgqueuer)) — pgnudge is its
|
|
200
|
+
broadcast-shaped sibling: pgqueuer moves *work*, pgnudge moves
|
|
201
|
+
*wakefulness*.
|
|
202
|
+
- **Not CDC.** No row images, no before/after, no replay. Refetch.
|
|
203
|
+
- **Not a driver.** The protocol client implements exactly what a
|
|
204
|
+
logical-decoding consumer needs: startup, auth, simple query, CopyBoth.
|
|
205
|
+
|
|
206
|
+
## Roadmap
|
|
207
|
+
|
|
208
|
+
- Native `pgoutput` parsing would drop the wal2json server-plugin
|
|
209
|
+
requirement — but pgoutput only decodes through a *publication*, and a
|
|
210
|
+
publication is a persistent catalog object, in direct tension with the
|
|
211
|
+
nothing-outlives-the-connection guarantee. Conditional at best: viable
|
|
212
|
+
only if a pre-existing, application-owned publication counts as
|
|
213
|
+
configuration rather than footprint.
|
|
214
|
+
- Opt-in `schema.table:pk` payloads for sharper client-side routing.
|
|
215
|
+
- The bridge daemon as a first-class artifact — same feed contract, one
|
|
216
|
+
slot fanned out over NOTIFY; a native (Zig) implementation is the
|
|
217
|
+
intended long-term core.
|
|
218
|
+
|
|
219
|
+
MIT licensed.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
pgnudge/__init__.py,sha256=zeH6zY3i-CC0TNDQ2C502xmZRvcut3xQqXBwUs5i1Dc,372
|
|
2
|
+
pgnudge/core.py,sha256=Fh2AaK2s8ghnV-CV_XoKRCrFAtw9AOq9NO-LhnVBHyI,1012
|
|
3
|
+
pgnudge/engine.py,sha256=t8k2hooTtzVMc-lIKs9e00h88m5Fr9oiiG2azJDYiPI,9011
|
|
4
|
+
pgnudge/proto.py,sha256=rybHPeccoF8O2JR8mju09xrx-DH3hmxlg3M_Wu1bhhU,10212
|
|
5
|
+
pgnudge/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
pgnudge/wal.py,sha256=tokjQe6S4SYJkdmHxnKV8ja11gIQbNyE357QMLAojE4,9341
|
|
7
|
+
pgnudge-1.0.0.dist-info/METADATA,sha256=83ULx4vjYMYgGYFPBOG_16hJvetrrljqiOxCS1YB5Cw,10252
|
|
8
|
+
pgnudge-1.0.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
9
|
+
pgnudge-1.0.0.dist-info/licenses/LICENSE,sha256=0PnG3kXuspi8WEguytXeC0Tnn9YdpV8vJlzTIeSZhC4,1077
|
|
10
|
+
pgnudge-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 pgnudge contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|