pgnudge 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pgnudge/__init__.py ADDED
@@ -0,0 +1,17 @@
1
+ """Push-only change nudges from PostgreSQL; nothing on the server outlives the connection."""
2
+
3
+ from pgnudge.core import Batch, Event, FeedItem, Resync
4
+ from pgnudge.proto import PgServerError
5
+ from pgnudge.wal import WalFeed
6
+
7
+ __version__ = "1.0.0"
8
+
9
+ __all__ = [
10
+ "Batch",
11
+ "Event",
12
+ "FeedItem",
13
+ "PgServerError",
14
+ "Resync",
15
+ "WalFeed",
16
+ "__version__",
17
+ ]
pgnudge/core.py ADDED
@@ -0,0 +1,39 @@
1
+ """Feed contract: the two item types every feed emits.
2
+
3
+ ``Resync`` (reload everything) and ``Batch`` (coalesced wakeups) —
4
+ at-least-once, every gap bracketed by a Resync. See README.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from typing import TypeAlias
9
+
10
+ __all__ = ["Event", "Batch", "Resync", "FeedItem"]
11
+
12
+
13
+ @dataclass(frozen=True, slots=True)
14
+ class Event:
15
+ """One coalesced wakeup; ``count`` = arrivals of this payload in the window."""
16
+
17
+ payload: str
18
+ first_seen: float # time.time() of first arrival in this batch
19
+ count: int = 1
20
+
21
+
22
+ @dataclass(frozen=True, slots=True)
23
+ class Batch:
24
+ """A debounce window's worth of events, deduplicated, in arrival order."""
25
+
26
+ events: tuple[Event, ...]
27
+
28
+ def payloads(self) -> tuple[str, ...]:
29
+ return tuple(e.payload for e in self.events)
30
+
31
+
32
+ @dataclass(frozen=True, slots=True)
33
+ class Resync:
34
+ """Reload-everything signal; reason is "connected" | "reconnected" | "overflow" | "failsafe"."""
35
+
36
+ reason: str
37
+
38
+
39
+ FeedItem: TypeAlias = Resync | Batch
pgnudge/engine.py ADDED
@@ -0,0 +1,286 @@
1
+ """The machinery behind a feed, one class per concern, pure stdlib.
2
+
3
+ ``Intake`` buffers raw wakeups, ``Coalescer`` dedups them, ``Debouncer``
4
+ decides when a window closes, ``Backoff`` paces reconnects, and
5
+ ``FeedService`` wires them together behind the async-iterator surface
6
+ that ``BaseFeed`` exposes.
7
+ """
8
+
9
+ import asyncio
10
+ import contextlib
11
+ import random
12
+ import time
13
+ from collections.abc import AsyncIterator, Callable, Coroutine
14
+ from dataclasses import dataclass, field
15
+ from types import TracebackType
16
+ from typing import Self
17
+
18
+ from pgnudge.core import Batch, Event, FeedItem, Resync
19
+
20
+ __all__ = ["Wakeup", "Intake", "Coalescer", "Debouncer", "Backoff", "FeedService", "BaseFeed"]
21
+
22
+
23
+ @dataclass(frozen=True, slots=True)
24
+ class Wakeup:
25
+ """One raw arrival from a transport, pre-coalescing."""
26
+
27
+ payload: str
28
+ at: float # time.time() of arrival
29
+
30
+
31
+ @dataclass(slots=True, kw_only=True)
32
+ class Intake:
33
+ """Bounded wakeup buffer; overflow is flagged, never blocks the producer."""
34
+
35
+ maxsize: int
36
+ queue: asyncio.Queue[Wakeup] = field(init=False, repr=False)
37
+ overflowed: bool = field(init=False, default=False)
38
+
39
+ def __post_init__(self) -> None:
40
+ self.queue = asyncio.Queue(maxsize=self.maxsize)
41
+
42
+ def push(self, payload: str) -> None:
43
+ try:
44
+ self.queue.put_nowait(Wakeup(payload=payload, at=time.time()))
45
+ except asyncio.QueueFull:
46
+ self.overflowed = True
47
+
48
+ async def get(self) -> Wakeup:
49
+ return await self.queue.get()
50
+
51
+ async def get_within(self, timeout: float) -> Wakeup | None:
52
+ # asyncio.timeout, not wait_for: 3.11's wait_for can swallow an
53
+ # external cancel when the inner get() already has an item, leaving
54
+ # the pump task uncancellable and aclose() hanging
55
+ try:
56
+ async with asyncio.timeout(timeout):
57
+ return await self.queue.get()
58
+ except TimeoutError:
59
+ return None
60
+
61
+ def consume_overflow(self) -> bool:
62
+ """Check-and-clear the overflow flag (set concurrently by push); drains on overflow."""
63
+ if self.overflowed:
64
+ self.overflowed = False
65
+ self.drain()
66
+ return True
67
+ return False
68
+
69
+ def drain(self) -> None:
70
+ while True:
71
+ try:
72
+ self.queue.get_nowait()
73
+ except asyncio.QueueEmpty:
74
+ return
75
+
76
+
77
+ @dataclass(slots=True)
78
+ class Coalescer:
79
+ """Dedup buffer: one ``Event`` per payload, counting arrivals."""
80
+
81
+ pending: dict[str, Event] = field(init=False, default_factory=dict)
82
+
83
+ def add(self, wakeup: Wakeup) -> None:
84
+ prev = self.pending.get(wakeup.payload)
85
+ if prev is None:
86
+ self.pending[wakeup.payload] = Event(payload=wakeup.payload, first_seen=wakeup.at)
87
+ else:
88
+ self.pending[wakeup.payload] = Event(
89
+ payload=prev.payload, first_seen=prev.first_seen, count=prev.count + 1
90
+ )
91
+
92
+ def flush(self) -> Batch:
93
+ """Return the buffered window as a ``Batch`` and reset."""
94
+ batch = Batch(tuple(self.pending.values()))
95
+ self.pending.clear()
96
+ return batch
97
+
98
+
99
+ @dataclass(frozen=True, slots=True, kw_only=True)
100
+ class Debouncer:
101
+ """Window policy: rolling ``debounce`` quiet period, hard-capped at ``max_batch_wait``."""
102
+
103
+ debounce: float
104
+ max_batch_wait: float
105
+
106
+ async def next_item(self, intake: Intake) -> FeedItem:
107
+ """Collect one window from ``intake``; overflow yields ``Resync("overflow")``."""
108
+ wakeup = await intake.get()
109
+ if intake.consume_overflow():
110
+ return Resync("overflow")
111
+
112
+ coalescer = Coalescer()
113
+ coalescer.add(wakeup)
114
+ hard_deadline = time.monotonic() + self.max_batch_wait
115
+ while True:
116
+ remaining = min(self.debounce, hard_deadline - time.monotonic())
117
+ if remaining <= 0:
118
+ break
119
+ more = await intake.get_within(remaining)
120
+ if more is None:
121
+ break
122
+ coalescer.add(more)
123
+
124
+ if intake.consume_overflow():
125
+ return Resync("overflow")
126
+ return coalescer.flush()
127
+
128
+
129
+ @dataclass(frozen=True, slots=True)
130
+ class Backoff:
131
+ """Jittered exponential reconnect delay."""
132
+
133
+ initial: float = 0.1
134
+ maximum: float = 5.0
135
+
136
+ def delay(self, attempt: int) -> float:
137
+ base = min(self.maximum, self.initial * (2.0 ** min(attempt - 1, 16)))
138
+ return base * random.uniform(0.5, 1.5)
139
+
140
+
141
+ @dataclass(slots=True, kw_only=True)
142
+ class FeedService:
143
+ """Manages the moving parts: intake -> debouncer -> output, tasks, shutdown."""
144
+
145
+ intake: Intake
146
+ debouncer: Debouncer
147
+ failsafe: float | None = None
148
+ out: asyncio.Queue[FeedItem | None] = field(init=False, repr=False) # None = closed
149
+ tasks: list[asyncio.Task[None]] = field(init=False, default_factory=list)
150
+ started: bool = field(init=False, default=False)
151
+ closing: bool = field(init=False, default=False)
152
+
153
+ def __post_init__(self) -> None:
154
+ self.out = asyncio.Queue()
155
+
156
+ # -- transport side --
157
+
158
+ def push(self, payload: str) -> None:
159
+ self.intake.push(payload)
160
+
161
+ def emit(self, item: FeedItem) -> None:
162
+ self.out.put_nowait(item)
163
+
164
+ # -- lifecycle --
165
+
166
+ def start(self, supervisor: Callable[[], Coroutine[None, None, None]], name: str) -> None:
167
+ if self.started:
168
+ return
169
+ self.started = True
170
+ self.tasks.append(asyncio.create_task(supervisor(), name=f"{name}-supervisor"))
171
+ self.tasks.append(asyncio.create_task(self._pump(), name=f"{name}-pump"))
172
+ if self.failsafe is not None:
173
+ self.tasks.append(asyncio.create_task(self._failsafe_loop(self.failsafe), name=f"{name}-failsafe"))
174
+
175
+ async def aclose(self) -> None:
176
+ if self.closing:
177
+ return
178
+ self.closing = True
179
+ for t in self.tasks:
180
+ t.cancel()
181
+ for t in self.tasks:
182
+ with contextlib.suppress(asyncio.CancelledError):
183
+ await t
184
+ self.out.put_nowait(None)
185
+
186
+ # -- consumer side --
187
+
188
+ async def next_item(self) -> FeedItem | None:
189
+ """Next item, or ``None`` once closed."""
190
+ return await self.out.get()
191
+
192
+ # -- internal loops --
193
+
194
+ async def _pump(self) -> None:
195
+ while True:
196
+ self.emit(await self.debouncer.next_item(self.intake))
197
+
198
+ async def _failsafe_loop(self, interval: float) -> None:
199
+ while True:
200
+ await asyncio.sleep(interval)
201
+ self.emit(Resync("failsafe"))
202
+
203
+
204
+ class BaseFeed:
205
+ """Async-iterator surface over a ``FeedService``; subclasses provide the transport.
206
+
207
+ Subclasses implement ``_supervisor`` (call ``_emit_resync`` per
208
+ (re)connect, ``_push_raw`` per wakeup) and may override ``_extra_close``.
209
+ """
210
+
211
+ def __init__(
212
+ self,
213
+ *,
214
+ debounce: float = 0.05,
215
+ max_batch_wait: float | None = None,
216
+ failsafe: float | None = None,
217
+ backoff: tuple[float, float] = (0.1, 5.0),
218
+ raw_queue_size: int = 8192,
219
+ ) -> None:
220
+ self._service = FeedService(
221
+ intake=Intake(maxsize=raw_queue_size),
222
+ debouncer=Debouncer(
223
+ debounce=debounce,
224
+ max_batch_wait=max_batch_wait if max_batch_wait is not None else debounce * 20,
225
+ ),
226
+ failsafe=failsafe,
227
+ )
228
+ self._backoff = Backoff(initial=backoff[0], maximum=backoff[1])
229
+ self.connection_pid: int | None = None # server backend pid while connected
230
+
231
+ # -- lifecycle --
232
+
233
+ async def __aenter__(self) -> Self:
234
+ self._ensure_started()
235
+ return self
236
+
237
+ async def __aexit__(
238
+ self,
239
+ exc_type: type[BaseException] | None,
240
+ exc: BaseException | None,
241
+ tb: TracebackType | None,
242
+ ) -> None:
243
+ await self.aclose()
244
+
245
+ def _ensure_started(self) -> None:
246
+ self._service.start(self._supervisor, name=type(self).__name__)
247
+
248
+ async def aclose(self) -> None:
249
+ if self._service.closing:
250
+ return
251
+ await self._service.aclose()
252
+ await self._extra_close()
253
+ self.connection_pid = None
254
+
255
+ async def _extra_close(self) -> None: # pragma: no cover - subclass hook
256
+ return
257
+
258
+ # -- consumer side --
259
+
260
+ def __aiter__(self) -> AsyncIterator[FeedItem]:
261
+ self._ensure_started()
262
+ return self
263
+
264
+ async def __anext__(self) -> FeedItem:
265
+ item = await self._service.next_item()
266
+ if item is None:
267
+ raise StopAsyncIteration
268
+ return item
269
+
270
+ # -- transport-facing helpers --
271
+
272
+ @property
273
+ def _closing(self) -> bool:
274
+ return self._service.closing
275
+
276
+ def _push_raw(self, payload: str) -> None:
277
+ self._service.push(payload)
278
+
279
+ def _emit_resync(self, reason: str) -> None:
280
+ self._service.emit(Resync(reason))
281
+
282
+ def _backoff_delay(self, attempt: int) -> float:
283
+ return self._backoff.delay(attempt)
284
+
285
+ async def _supervisor(self) -> None: # pragma: no cover - abstract
286
+ raise NotImplementedError
pgnudge/proto.py ADDED
@@ -0,0 +1,248 @@
1
+ """Minimal walsender-mode protocol client, stdlib asyncio + scramp.
2
+
3
+ Startup with ``replication=database``, optional TLS, trust/cleartext/SCRAM
4
+ auth, simple query, CopyBoth streaming. See PostgreSQL docs: "Streaming
5
+ Replication Protocol", "Message Formats".
6
+ """
7
+
8
+ import asyncio
9
+ import contextlib
10
+ import ssl as ssl_module
11
+ import struct
12
+ import time
13
+ from dataclasses import dataclass
14
+ from typing import ClassVar, Self
15
+
16
+ from scramp import ScramClient
17
+
18
+ __all__ = ["PgServerError", "XLogData", "Keepalive", "WalsenderConnection"]
19
+
20
+
21
+ class PgServerError(Exception):
22
+ """ErrorResponse from the server, with the field map preserved."""
23
+
24
+ def __init__(self, fields: dict[str, str]) -> None:
25
+ self.fields = fields
26
+ super().__init__(f"{fields.get('S', 'ERROR')} {fields.get('C', '?????')}: {fields.get('M', 'unknown')}")
27
+
28
+
29
+ @dataclass(frozen=True, slots=True)
30
+ class XLogData:
31
+ end_lsn: int
32
+ payload: bytes
33
+
34
+
35
+ @dataclass(frozen=True, slots=True)
36
+ class Keepalive:
37
+ end_lsn: int
38
+ reply_requested: bool
39
+
40
+
41
+ def _parse_error(body: bytes) -> dict[str, str]:
42
+ fields: dict[str, str] = {}
43
+ i = 0
44
+ while i < len(body) and body[i : i + 1] != b"\x00":
45
+ code = chr(body[i])
46
+ j = body.index(b"\x00", i + 1)
47
+ fields[code] = body[i + 1 : j].decode("utf-8", "replace")
48
+ i = j + 1
49
+ return fields
50
+
51
+
52
+ def _default_ssl_context() -> ssl_module.SSLContext:
53
+ ctx = ssl_module.create_default_context()
54
+ # Azure/managed endpoints commonly need verify-full with the platform CA
55
+ # bundle, which create_default_context gives you. For self-signed dev
56
+ # servers pass your own context with CERT_NONE.
57
+ return ctx
58
+
59
+
60
+ class WalsenderConnection:
61
+ """One logical-replication walsender session."""
62
+
63
+ _PROTOCOL_V3: ClassVar[int] = 196608
64
+ _SSL_REQUEST: ClassVar[int] = 80877103
65
+ _PG_EPOCH_UNIX: ClassVar[int] = 946_684_800 # 2000-01-01 00:00:00 UTC
66
+
67
+ def __init__(
68
+ self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter, *, tls: bool = False
69
+ ) -> None:
70
+ self._reader = reader
71
+ self._writer = writer
72
+ self.tls = tls
73
+ self.backend_pid: int | None = None
74
+
75
+ # -- connection & auth ----------------------------------------------------
76
+
77
+ @classmethod
78
+ async def connect(
79
+ cls,
80
+ *,
81
+ host: str,
82
+ port: int,
83
+ user: str,
84
+ database: str,
85
+ password: str | None = None,
86
+ ssl: bool | ssl_module.SSLContext = False,
87
+ application_name: str = "pgnudge",
88
+ connect_timeout: float = 10.0,
89
+ ) -> Self:
90
+ reader, writer = await asyncio.wait_for(asyncio.open_connection(host, port), connect_timeout)
91
+ if ssl:
92
+ writer.write(struct.pack("!ii", 8, cls._SSL_REQUEST))
93
+ await writer.drain()
94
+ answer = await reader.readexactly(1)
95
+ if answer != b"S":
96
+ writer.close()
97
+ raise ConnectionError("server refused SSL")
98
+ ctx = ssl if isinstance(ssl, ssl_module.SSLContext) else _default_ssl_context()
99
+ await writer.start_tls(ctx, server_hostname=host)
100
+ conn = cls(reader, writer, tls=bool(ssl))
101
+ try:
102
+ await asyncio.wait_for(
103
+ conn._startup(user=user, database=database, password=password, application_name=application_name),
104
+ connect_timeout,
105
+ )
106
+ except BaseException:
107
+ conn.abort()
108
+ raise
109
+ return conn
110
+
111
+ async def _startup(self, *, user: str, database: str, password: str | None, application_name: str) -> None:
112
+ params = {
113
+ "user": user,
114
+ "database": database,
115
+ "replication": "database",
116
+ "application_name": application_name,
117
+ "client_encoding": "UTF8",
118
+ }
119
+ body = b"".join(k.encode() + b"\x00" + v.encode() + b"\x00" for k, v in params.items()) + b"\x00"
120
+ self._writer.write(struct.pack("!ii", 8 + len(body), self._PROTOCOL_V3) + body)
121
+ await self._writer.drain()
122
+
123
+ scram: ScramClient | None = None
124
+ while True:
125
+ mtype, mbody = await self._read_message()
126
+ if mtype == b"R":
127
+ (code,) = struct.unpack("!i", mbody[:4])
128
+ if code == 0: # AuthenticationOk
129
+ break
130
+ if code == 3: # CleartextPassword
131
+ if not self.tls:
132
+ raise PgServerError(
133
+ {
134
+ "M": "refusing cleartext password on an unencrypted connection; "
135
+ "enable ssl= or use SCRAM-SHA-256"
136
+ }
137
+ )
138
+ if password is None:
139
+ raise PgServerError({"M": "server requested a password but none was given"})
140
+ self._write_message(b"p", password.encode() + b"\x00")
141
+ await self._writer.drain()
142
+ elif code == 10: # SASL
143
+ mechanisms = [m.decode() for m in mbody[4:].split(b"\x00") if m]
144
+ plain = [m for m in mechanisms if not m.endswith("-PLUS")]
145
+ if not plain or password is None:
146
+ raise PgServerError({"M": f"unsupported SASL mechanisms {mechanisms} or missing password"})
147
+ scram = ScramClient(plain, user, password)
148
+ first = scram.get_client_first().encode()
149
+ self._write_message(b"p", scram.mechanism_name.encode() + b"\x00" + struct.pack("!i", len(first)) + first)
150
+ await self._writer.drain()
151
+ elif code == 11: # SASLContinue
152
+ assert scram is not None
153
+ scram.set_server_first(mbody[4:].decode())
154
+ self._write_message(b"p", scram.get_client_final().encode())
155
+ await self._writer.drain()
156
+ elif code == 12: # SASLFinal
157
+ assert scram is not None
158
+ scram.set_server_final(mbody[4:].decode())
159
+ else:
160
+ raise PgServerError({"M": f"unsupported authentication request (code {code}); pgnudge speaks trust, cleartext and SCRAM-SHA-256"})
161
+ elif mtype == b"E":
162
+ raise PgServerError(_parse_error(mbody))
163
+ else: # NoticeResponse etc.
164
+ continue
165
+
166
+ while True: # post-auth: BackendKeyData / ReadyForQuery ('S' ParameterStatus skipped)
167
+ mtype, mbody = await self._read_message()
168
+ if mtype == b"K":
169
+ self.backend_pid = struct.unpack("!i", mbody[:4])[0]
170
+ elif mtype == b"Z":
171
+ return
172
+ elif mtype == b"E":
173
+ raise PgServerError(_parse_error(mbody))
174
+
175
+ # -- framing ---------------------------------------------------------------
176
+
177
+ async def _read_message(self) -> tuple[bytes, bytes]:
178
+ header = await self._reader.readexactly(5)
179
+ mtype = header[:1]
180
+ (length,) = struct.unpack("!i", header[1:5])
181
+ body = await self._reader.readexactly(length - 4)
182
+ return mtype, body
183
+
184
+ def _write_message(self, mtype: bytes, body: bytes) -> None:
185
+ self._writer.write(mtype + struct.pack("!i", 4 + len(body)) + body)
186
+
187
+ # -- simple query (the only subprotocol walsender mode speaks) --------------
188
+
189
+ async def simple_query(self, sql: str) -> None:
190
+ """Run a command and drain to ReadyForQuery; result rows are ignored."""
191
+ self._write_message(b"Q", sql.encode() + b"\x00")
192
+ await self._writer.drain()
193
+ error: dict[str, str] | None = None
194
+ while True:
195
+ mtype, mbody = await self._read_message()
196
+ if mtype == b"E":
197
+ error = _parse_error(mbody)
198
+ elif mtype == b"Z":
199
+ if error is not None:
200
+ raise PgServerError(error)
201
+ return
202
+ # 'T' RowDescription, 'D' DataRow, 'C' CommandComplete, 'N' Notice: skipped
203
+
204
+ # -- CopyBoth streaming ------------------------------------------------------
205
+
206
+ async def start_replication(self, command: str) -> None:
207
+ """Send START_REPLICATION and consume up to CopyBothResponse."""
208
+ self._write_message(b"Q", command.encode() + b"\x00")
209
+ await self._writer.drain()
210
+ while True:
211
+ mtype, mbody = await self._read_message()
212
+ if mtype == b"W":
213
+ return
214
+ if mtype == b"E":
215
+ raise PgServerError(_parse_error(mbody))
216
+
217
+ async def read_stream(self) -> XLogData | Keepalive:
218
+ """Read the next replication message. Raises on stream end or error."""
219
+ while True:
220
+ mtype, mbody = await self._read_message()
221
+ if mtype == b"d":
222
+ kind = mbody[:1]
223
+ if kind == b"w":
224
+ _start, end, _ts = struct.unpack("!QQQ", mbody[1:25])
225
+ return XLogData(end_lsn=end, payload=mbody[25:])
226
+ if kind == b"k":
227
+ end, _ts, reply = struct.unpack("!QQB", mbody[1:18])
228
+ return Keepalive(end_lsn=end, reply_requested=bool(reply))
229
+ continue # unknown CopyData subtype
230
+ if mtype == b"E":
231
+ raise PgServerError(_parse_error(mbody))
232
+ if mtype in (b"c", b"C", b"Z"):
233
+ raise ConnectionResetError("replication stream ended")
234
+
235
+ async def send_standby_status(self, lsn: int, *, reply: bool = False) -> None:
236
+ """Acknowledge everything up to ``lsn``; ``reply`` asks the server to answer with a keepalive."""
237
+ ts = int((time.time() - self._PG_EPOCH_UNIX) * 1_000_000)
238
+ self._write_message(b"d", b"r" + struct.pack("!QQQQB", lsn, lsn, lsn, ts, int(reply)))
239
+ await self._writer.drain()
240
+
241
+ # -- teardown ----------------------------------------------------------------
242
+
243
+ def abort(self) -> None:
244
+ """Hard-close the socket, no protocol goodbye — slot cleanup must survive crashes."""
245
+ with contextlib.suppress(Exception):
246
+ transport = self._writer.transport
247
+ if isinstance(transport, asyncio.WriteTransport):
248
+ transport.abort()
pgnudge/py.typed ADDED
File without changes
pgnudge/wal.py ADDED
@@ -0,0 +1,229 @@
1
+ """WalFeed: logical decoding from a TEMPORARY replication slot.
2
+
3
+ The slot auto-drops when the session ends, cleanly or not — nothing pgnudge
4
+ creates outlives the connection. From-connect-only: a fresh slot per
5
+ (re)connect, no history, no backfill. Semantics and the gap-free handshake
6
+ argument: README. Server needs ``wal_level=logical``, a REPLICATION role,
7
+ and an output plugin (wal2json or test_decoding).
8
+ """
9
+
10
+ import asyncio
11
+ import contextlib
12
+ import json
13
+ import logging
14
+ import os
15
+ import re
16
+ import secrets
17
+ import ssl as ssl_module
18
+ import time
19
+ from typing import ClassVar
20
+
21
+ from pgnudge.engine import BaseFeed
22
+ from pgnudge.proto import WalsenderConnection, XLogData
23
+
24
+ __all__ = ["WalFeed"]
25
+
26
+
27
+ def _quote_value(v: str) -> str:
28
+ return "'" + v.replace("'", "''") + "'"
29
+
30
+
31
+ class WalFeed(BaseFeed):
32
+ """Async-iterable ``Resync | Batch`` feed from a temporary logical slot.
33
+
34
+ Payloads are ``schema.table``. ``tables`` filters server-side (wal2json
35
+ only); ``ssl`` takes True or an ``ssl.SSLContext``; ``status_interval``
36
+ must stay under the server's ``wal_sender_timeout`` (default 60 s).
37
+ ``liveness_timeout`` (must exceed ``status_interval``; None disables)
38
+ bounds how long the feed tolerates a silent server before reconnecting.
39
+ """
40
+
41
+ log: ClassVar[logging.Logger] = logging.getLogger("pgnudge.wal")
42
+
43
+ _TEST_DECODING_RE: ClassVar[re.Pattern[str]] = re.compile(
44
+ r"^table (.+?): (?:INSERT|UPDATE|DELETE|TRUNCATE)"
45
+ )
46
+
47
+ def __init__(
48
+ self,
49
+ *,
50
+ host: str = "127.0.0.1",
51
+ port: int = 5432,
52
+ user: str,
53
+ database: str,
54
+ password: str | None = None,
55
+ ssl: bool | ssl_module.SSLContext = False,
56
+ tables: list[str] | None = None,
57
+ plugin: str = "wal2json",
58
+ application_name: str = "pgnudge",
59
+ status_interval: float = 10.0,
60
+ liveness_timeout: float | None = 30.0,
61
+ connect_timeout: float = 10.0,
62
+ debounce: float = 0.05,
63
+ max_batch_wait: float | None = None,
64
+ failsafe: float | None = None,
65
+ backoff: tuple[float, float] = (0.1, 5.0),
66
+ raw_queue_size: int = 8192,
67
+ ) -> None:
68
+ super().__init__(
69
+ debounce=debounce,
70
+ max_batch_wait=max_batch_wait,
71
+ failsafe=failsafe,
72
+ backoff=backoff,
73
+ raw_queue_size=raw_queue_size,
74
+ )
75
+ if plugin not in ("wal2json", "test_decoding"):
76
+ raise ValueError(f"unsupported plugin {plugin!r}")
77
+ self._host = host
78
+ self._port = port
79
+ self._user = user
80
+ self._database = database
81
+ self._password = password
82
+ self._ssl = ssl
83
+ self._tables = list(tables) if tables else None
84
+ self._plugin = plugin
85
+ self._application_name = application_name
86
+ self._status_interval = status_interval
87
+ self.liveness_timeout = liveness_timeout
88
+ self._connect_timeout = connect_timeout
89
+
90
+ self._conn: WalsenderConnection | None = None
91
+ self._last_lsn = 0
92
+ self.last_inbound = time.monotonic()
93
+ self.slot_name: str | None = None
94
+
95
+ # -- payload parsing ----------------------------------------------------------
96
+
97
+ @staticmethod
98
+ def _parse_wal2json_v2(payload: bytes) -> list[str]:
99
+ try:
100
+ obj: object = json.loads(payload)
101
+ except ValueError:
102
+ return []
103
+ if isinstance(obj, dict) and obj.get("action") in ("I", "U", "D", "T"):
104
+ return [f"{obj.get('schema', '?')}.{obj.get('table', '?')}"]
105
+ return []
106
+
107
+ @classmethod
108
+ def _parse_test_decoding(cls, payload: bytes) -> list[str]:
109
+ # TRUNCATE lists every affected table on one line, ", "-joined
110
+ m = cls._TEST_DECODING_RE.match(payload.decode("utf-8", "replace"))
111
+ return m.group(1).split(", ") if m else []
112
+
113
+ # -- teardown ---------------------------------------------------------------
114
+
115
+ async def _extra_close(self) -> None:
116
+ # Hard-close on purpose, no DROP: crash and clean exit must exercise
117
+ # the same server-side cleanup path.
118
+ if self._conn is not None:
119
+ self._conn.abort()
120
+ self._conn = None
121
+ self.slot_name = None
122
+
123
+ # -- replication command assembly --------------------------------------------
124
+
125
+ def _plugin_options(self) -> str:
126
+ if self._plugin == "wal2json":
127
+ opts = [('"format-version"', "2"), ('"include-transaction"', "false")]
128
+ if self._tables:
129
+ opts.append(('"add-tables"', ",".join(self._tables)))
130
+ return ", ".join(f"{name} {_quote_value(value)}" for name, value in opts)
131
+ return '"skip-empty-xacts" \'1\''
132
+
133
+ # -- supervisor ---------------------------------------------------------------
134
+
135
+ async def _supervisor(self) -> None:
136
+ parse = self._parse_wal2json_v2 if self._plugin == "wal2json" else self._parse_test_decoding
137
+ attempt = 0
138
+ first = True
139
+ while not self._closing:
140
+ try:
141
+ conn = await WalsenderConnection.connect(
142
+ host=self._host,
143
+ port=self._port,
144
+ user=self._user,
145
+ database=self._database,
146
+ password=self._password,
147
+ ssl=self._ssl,
148
+ application_name=self._application_name,
149
+ connect_timeout=self._connect_timeout,
150
+ )
151
+ except Exception as exc:
152
+ self.log.warning("connect to %s:%d failed: %s", self._host, self._port, exc)
153
+ attempt += 1
154
+ delay = self._backoff_delay(attempt)
155
+ self.log.debug("reconnect attempt %d in %.2fs", attempt, delay)
156
+ await asyncio.sleep(delay)
157
+ continue
158
+
159
+ self._conn = conn
160
+ self.connection_pid = conn.backend_pid
161
+ slot = f"pgnudge_{os.getpid()}_{secrets.token_hex(3)}"
162
+ feedback: asyncio.Task[None] | None = None
163
+ try:
164
+ await conn.simple_query(
165
+ # SNAPSHOT 'nothing': from-connect-only, the Resync refetch is the backfill
166
+ f'CREATE_REPLICATION_SLOT "{slot}" TEMPORARY LOGICAL {self._plugin} (SNAPSHOT \'nothing\')'
167
+ )
168
+ await conn.start_replication(
169
+ f'START_REPLICATION SLOT "{slot}" LOGICAL 0/0 ({self._plugin_options()})'
170
+ )
171
+ self.slot_name = slot
172
+ attempt = 0
173
+ self._emit_resync("connected" if first else "reconnected")
174
+ self.log.info("streaming from slot %s (backend pid %s)", slot, conn.backend_pid)
175
+ first = False
176
+
177
+ self._last_lsn = 0
178
+ self.last_inbound = time.monotonic()
179
+ feedback = asyncio.create_task(self._feedback_loop(conn))
180
+ while True:
181
+ msg = await conn.read_stream()
182
+ self.last_inbound = time.monotonic()
183
+ if isinstance(msg, XLogData):
184
+ self._last_lsn = max(self._last_lsn, msg.end_lsn)
185
+ for table in parse(msg.payload):
186
+ self._push_raw(table)
187
+ else: # Keepalive — read_stream returns nothing else
188
+ self._last_lsn = max(self._last_lsn, msg.end_lsn)
189
+ if msg.reply_requested:
190
+ await conn.send_standby_status(self._last_lsn)
191
+ except asyncio.CancelledError:
192
+ raise
193
+ except Exception as exc:
194
+ # fall through to reconnect with a fresh slot
195
+ self.log.warning("stream error on slot %s, reconnecting: %s", slot, exc)
196
+ finally:
197
+ if feedback is not None:
198
+ feedback.cancel()
199
+ with contextlib.suppress(asyncio.CancelledError):
200
+ await feedback
201
+ self.connection_pid = None
202
+ self.slot_name = None
203
+ self._conn = None
204
+ conn.abort()
205
+
206
+ if not self._closing:
207
+ attempt += 1
208
+ delay = self._backoff_delay(attempt)
209
+ self.log.debug("reconnect attempt %d in %.2fs", attempt, delay)
210
+ await asyncio.sleep(delay)
211
+
212
+ async def _feedback_loop(self, conn: WalsenderConnection) -> None:
213
+ # With liveness on, every status requests a keepalive back, so a
214
+ # healthy connection has inbound traffic every status_interval and
215
+ # silence beyond liveness_timeout means the link or walsender is
216
+ # dead. abort() breaks the supervisor's blocked read -> reconnect.
217
+ probe = self.liveness_timeout is not None
218
+ while True:
219
+ await asyncio.sleep(self._status_interval)
220
+ idle = time.monotonic() - self.last_inbound
221
+ if self.liveness_timeout is not None and idle > self.liveness_timeout:
222
+ self.log.warning("no server traffic for %.1fs; aborting connection", idle)
223
+ conn.abort()
224
+ return
225
+ try:
226
+ await conn.send_standby_status(self._last_lsn, reply=probe)
227
+ except Exception as exc:
228
+ self.log.debug("standby status send failed: %s", exc)
229
+ return
@@ -0,0 +1,219 @@
1
+ Metadata-Version: 2.4
2
+ Name: pgnudge
3
+ Version: 1.0.0
4
+ Summary: Push-only change nudges from PostgreSQL logical replication: the database nudges, consumers refetch.
5
+ Project-URL: Homepage, https://github.com/janbjorge/pgnudge
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Keywords: asyncio,cache-invalidation,change-feed,logical-replication,postgresql
9
+ Classifier: Development Status :: 5 - Production/Stable
10
+ Classifier: Framework :: AsyncIO
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
17
+ Classifier: Topic :: Database
18
+ Requires-Python: >=3.11
19
+ Requires-Dist: scramp>=1.4
20
+ Description-Content-Type: text/markdown
21
+
22
+ # pgnudge
23
+
24
+ **Push-only change nudges from PostgreSQL — nothing left behind on the server.**
25
+
26
+ [![CI](https://github.com/janbjorge/pgnudge/actions/workflows/ci.yml/badge.svg)](https://github.com/janbjorge/pgnudge/actions/workflows/ci.yml)
27
+ [![PyPI](https://img.shields.io/pypi/v/pgnudge)](https://pypi.org/project/pgnudge/)
28
+ [![Python](https://img.shields.io/badge/python-3.11%2B-blue)](https://pypi.org/project/pgnudge/)
29
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green)](LICENSE)
30
+
31
+ Your database moves; your app wakes up. pgnudge tells you *that* something
32
+ changed and *which tables* — you already know how to load the data. Built
33
+ for live read models: dashboards, cache invalidation, anything that
34
+ renders a query and wants to re-render the instant the database moves.
35
+
36
+ ```
37
+ pip install pgnudge
38
+ ```
39
+
40
+ Python ≥ 3.11, PostgreSQL ≥ 16. One dependency:
41
+ [scramp](https://github.com/tlocke/scramp) (pure-Python SCRAM auth). No
42
+ database driver — pgnudge speaks the PostgreSQL replication protocol
43
+ itself.
44
+
45
+ ## Sixty-second tour
46
+
47
+ ```python
48
+ from pgnudge import Batch, Resync, WalFeed
49
+
50
+ async with WalFeed(
51
+ host="db.example.com", user="wal_user", password=...,
52
+ database="app", ssl=True,
53
+ tables=["public.orders", "public.stations"], # server-side filter
54
+ debounce=0.05,
55
+ ) as feed:
56
+ async for item in feed:
57
+ match item:
58
+ case Resync(): # connected / reconnected / overflow / failsafe
59
+ await reload_everything()
60
+ case Batch(events=evs): # coalesced wakeups: which tables moved
61
+ await reload(tables={e.payload for e in evs})
62
+ ```
63
+
64
+ There is no step 1. Nothing to install in the database, nothing to migrate,
65
+ nothing to revert. Close the connection and the server forgets pgnudge ever
66
+ existed.
67
+
68
+ ## The guarantee
69
+
70
+ `WalFeed` creates **nothing on the server that outlives the connection.**
71
+
72
+ ```
73
+ your app ──── async for item in feed ────▶ Resync | Batch
74
+
75
+ │ walsender protocol (TLS, SCRAM-SHA-256, CopyBoth) — no driver
76
+
77
+ PostgreSQL ── TEMPORARY replication slot ── logical decoding
78
+ └── dropped by the server the instant the session ends,
79
+ cleanly or not
80
+ ```
81
+
82
+ The temporary replication slot is the only primitive in PostgreSQL that
83
+ gives you a change feed with connection-scoped lifetime: the server is
84
+ contractually obliged to drop it the moment the session ends — cleanly, by
85
+ crash, by `kill -9`, by yanked cable, by `pg_terminate_backend`. No
86
+ triggers, no functions, no catalog objects, no persistent slots, no cleanup
87
+ jobs. The test suite ends by hard-aborting the socket with no protocol
88
+ goodbye and asserting `pg_replication_slots` is empty.
89
+
90
+ What *is* required is PostgreSQL 16+ and one-time server **configuration**
91
+ (settings, not objects — nothing accumulates): `wal_level = logical`, a
92
+ role with `REPLICATION`, and an output plugin — `wal2json` (default;
93
+ preinstalled on Azure Flexible Server, RDS, and most managed platforms) or
94
+ `test_decoding` (ships inside PostgreSQL itself).
95
+
96
+ The full mechanics — logical decoding, temporary-slot semantics, the
97
+ gap-free handshake argument, and **when not to use pgnudge** — are in
98
+ [docs/temporary-slots.md](docs/temporary-slots.md).
99
+
100
+ ## The contract
101
+
102
+ A feed yields exactly two item types:
103
+
104
+ - **`Resync(reason)`** — reload everything. Emitted on every connect and
105
+ reconnect, on internal queue overflow, and (optionally) on a failsafe
106
+ interval. Handle `Resync` correctly and nothing can make your view wrong.
107
+ - **`Batch(events)`** — one debounce window's worth of wakeups,
108
+ deduplicated, in arrival order. Each `Event` carries `payload`
109
+ (`schema.table` — the stable v1 payload contract), `first_seen`, `count`.
110
+
111
+ **Delivery is at-least-once wakeups, from the point of connect only.**
112
+ Events are hints to refetch, never facts to apply. There is no history and
113
+ no backfill, by design and by mechanism: the slot is created fresh at every
114
+ (re)connect with `SNAPSHOT 'nothing'`, and a logical slot can only decode
115
+ forward from its creation point. The handshake is gap-free — `Resync` is
116
+ emitted only after the stream is live, so the refetch it triggers observes
117
+ a state at or after the slot's start point, and every later commit produces
118
+ a nudge; anything landing in between is simply covered twice, which
119
+ at-least-once absorbs. On reconnect `WalFeed` resyncs rather than resumes.
120
+ No replay, no exactly-once, no row images, *on purpose*: refetching is
121
+ idempotent and you have a database right there. (One nuance: slot creation
122
+ waits for write transactions in flight at connect time, so a long-running
123
+ write delays connect — it never causes history to be delivered.)
124
+
125
+ **Coalescing:** per-row changes within the debounce window collapse
126
+ client-side into one `Event` with a `count` — a 500-row transaction on one
127
+ table is one `Event`, `count=500`, one wakeup, one refetch.
128
+
129
+ `INSERT`, `UPDATE`, `DELETE`, and `TRUNCATE` all nudge. Logical decoding
130
+ does not carry other DDL, so schema changes don't — pair migrations with a
131
+ refetch if your view depends on them.
132
+
133
+ ## Why not LISTEN/NOTIFY?
134
+
135
+ `NOTIFY` doesn't fire itself: making it track data changes means triggers,
136
+ and triggers are persistent catalog objects — schema footprint, migration
137
+ reviews, cleanup jobs, drift. pgnudge's whole premise is refusing that
138
+ trade. Logical decoding gets the same wakeups straight from the WAL with
139
+ zero objects. (LISTEN is still great on the *consuming* side — see Fan-out.)
140
+
141
+ ## Fan-out
142
+
143
+ One `WalFeed` per process is the normal shape. For many consumers, run one
144
+ `WalFeed` in a small bridge daemon that republishes to a NOTIFY channel via
145
+ `pg_notify`, and let consumers attach with plain LISTEN (any driver —
146
+ LISTEN is session state, zero objects). One REPLICATION grant total, one
147
+ decoding pass total, and still zero persistent server objects: the bridge's
148
+ temp slot dies with the bridge.
149
+
150
+ ## Ops notes
151
+
152
+ - `status_interval` (default 10 s) must stay under the server's
153
+ `wal_sender_timeout` (default 60 s); the feed also answers
154
+ reply-requested keepalives immediately.
155
+ - `liveness_timeout` (default 30 s, must exceed `status_interval`,
156
+ `None` disables): each status report asks the server to answer with a
157
+ keepalive, so a healthy connection always has inbound traffic — silence
158
+ longer than the timeout means a dead link (NAT drop, yanked VPN, hung
159
+ walsender) and the feed aborts and reconnects instead of blocking
160
+ forever.
161
+ - While connected, each `WalFeed` holds one replication slot and one WAL
162
+ sender against `max_replication_slots` / `max_wal_senders`. Disconnected
163
+ feeds hold nothing — that's the point — which also means an idle feed
164
+ never retains WAL.
165
+ - Managed platforms: enabling `wal_level=logical` typically requires a
166
+ restart (once); grant `REPLICATION` to a dedicated role rather than
167
+ widening an app role — logical decoding sees the whole database's stream.
168
+ - Thundering herd: a database restart reconnects every feed at once, and
169
+ every consumer's `Resync` handler refetches at once. Reconnect timing is
170
+ already jittered, but the refetch is your code — add jitter there when
171
+ many consumers share a database, or fan out through the bridge daemon so
172
+ a single process refetches per change.
173
+ - TLS: `ssl=True` uses platform CA verification; pass an `ssl.SSLContext`
174
+ for custom trust. SCRAM-SHA-256 is supported everywhere; cleartext auth
175
+ only over TLS — pgnudge refuses to send a password on an unencrypted
176
+ connection.
177
+ - Logging: the `pgnudge.wal` logger (stdlib `logging`, no handlers
178
+ configured by the library) reports connect failures and stream errors at
179
+ WARNING, successful (re)connects at INFO, and backoff timing at DEBUG —
180
+ a feed that reconnects in a loop is visible, not silent.
181
+
182
+ ## Tested how
183
+
184
+ The suite spins up real PostgreSQL via testcontainers (nothing to install
185
+ beyond Docker) and proves the claims live: no backfill of pre-connect
186
+ writes, client-side coalescing (50-row txn → one `Event`, `count=50`),
187
+ reconnect gets a fresh slot with the old one auto-dropped, TLS + SCRAM over
188
+ an encrypted stream, and the flagship — hard socket abort with no protocol
189
+ goodbye leaves `pg_replication_slots` empty.
190
+
191
+ ```bash
192
+ uv sync && uv run pytest
193
+ ```
194
+
195
+ ## Non-goals
196
+
197
+ - **Not a queue.** No durability, no competing consumers, no retries. If a
198
+ message must be processed, use a job queue
199
+ (e.g. [pgqueuer](https://github.com/janbjorge/pgqueuer)) — pgnudge is its
200
+ broadcast-shaped sibling: pgqueuer moves *work*, pgnudge moves
201
+ *wakefulness*.
202
+ - **Not CDC.** No row images, no before/after, no replay. Refetch.
203
+ - **Not a driver.** The protocol client implements exactly what a
204
+ logical-decoding consumer needs: startup, auth, simple query, CopyBoth.
205
+
206
+ ## Roadmap
207
+
208
+ - Native `pgoutput` parsing would drop the wal2json server-plugin
209
+ requirement — but pgoutput only decodes through a *publication*, and a
210
+ publication is a persistent catalog object, in direct tension with the
211
+ nothing-outlives-the-connection guarantee. Conditional at best: viable
212
+ only if a pre-existing, application-owned publication counts as
213
+ configuration rather than footprint.
214
+ - Opt-in `schema.table:pk` payloads for sharper client-side routing.
215
+ - The bridge daemon as a first-class artifact — same feed contract, one
216
+ slot fanned out over NOTIFY; a native (Zig) implementation is the
217
+ intended long-term core.
218
+
219
+ MIT licensed.
@@ -0,0 +1,10 @@
1
+ pgnudge/__init__.py,sha256=zeH6zY3i-CC0TNDQ2C502xmZRvcut3xQqXBwUs5i1Dc,372
2
+ pgnudge/core.py,sha256=Fh2AaK2s8ghnV-CV_XoKRCrFAtw9AOq9NO-LhnVBHyI,1012
3
+ pgnudge/engine.py,sha256=t8k2hooTtzVMc-lIKs9e00h88m5Fr9oiiG2azJDYiPI,9011
4
+ pgnudge/proto.py,sha256=rybHPeccoF8O2JR8mju09xrx-DH3hmxlg3M_Wu1bhhU,10212
5
+ pgnudge/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ pgnudge/wal.py,sha256=tokjQe6S4SYJkdmHxnKV8ja11gIQbNyE357QMLAojE4,9341
7
+ pgnudge-1.0.0.dist-info/METADATA,sha256=83ULx4vjYMYgGYFPBOG_16hJvetrrljqiOxCS1YB5Cw,10252
8
+ pgnudge-1.0.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
9
+ pgnudge-1.0.0.dist-info/licenses/LICENSE,sha256=0PnG3kXuspi8WEguytXeC0Tnn9YdpV8vJlzTIeSZhC4,1077
10
+ pgnudge-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 pgnudge contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.