onestep-postgres 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib.metadata import PackageNotFoundError, version as _package_version
4
+
5
+ from .connector import (
6
+ IncrementalDelivery,
7
+ PostgresConnector,
8
+ PostgresIncrementalSource,
9
+ PostgresTableQueueDelivery,
10
+ PostgresTableQueueSource,
11
+ PostgresTableSink,
12
+ )
13
+ from .resources import register_resources
14
+ from .resilience import classify_sqlalchemy_error
15
+ from .state_sqlalchemy import SQLAlchemyCursorStore, SQLAlchemyStateStore
16
+
17
+ try:
18
+ __version__ = _package_version("onestep-postgres")
19
+ except PackageNotFoundError: # pragma: no cover - local source tree before install
20
+ __version__ = "dev"
21
+
22
+ register = register_resources
23
+
24
+ __all__ = [
25
+ "IncrementalDelivery",
26
+ "PostgresConnector",
27
+ "PostgresIncrementalSource",
28
+ "PostgresTableQueueDelivery",
29
+ "PostgresTableQueueSource",
30
+ "PostgresTableSink",
31
+ "SQLAlchemyCursorStore",
32
+ "SQLAlchemyStateStore",
33
+ "__version__",
34
+ "classify_sqlalchemy_error",
35
+ "register",
36
+ "register_resources",
37
+ ]
@@ -0,0 +1,465 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import hashlib
5
+ from collections import deque
6
+ from collections.abc import Mapping, Sequence
7
+ from dataclasses import dataclass
8
+ from typing import Any
9
+
10
+ from onestep.connectors.base import Delivery, Sink, Source
11
+ from onestep.envelope import Envelope
12
+ from onestep.resilience import ConnectorOperation
13
+ from onestep.state import CursorStore, InMemoryCursorStore
14
+
15
+ from .resilience import as_postgres_connector_operation_error
16
+ from .state_sqlalchemy import SQLAlchemyCursorStore, SQLAlchemyStateStore
17
+
18
+ try:
19
+ import sqlalchemy as sa
20
+ from sqlalchemy import create_engine
21
+ except ImportError: # pragma: no cover - exercised when optional deps are missing
22
+ sa = None
23
+ create_engine = None
24
+
25
+
26
+ class PostgresConnector:
27
+ def __init__(self, dsn: str, **engine_options: Any) -> None:
28
+ if create_engine is None:
29
+ raise RuntimeError("PostgresConnector requires SQLAlchemy. Install onestep-postgres.")
30
+ self.dsn = dsn
31
+ self.engine = create_engine(dsn, future=True, pool_pre_ping=True, **engine_options)
32
+ self._tables: dict[str, Any] = {}
33
+
34
+ async def close(self) -> None:
35
+ await asyncio.to_thread(self.engine.dispose)
36
+
37
+ def state_store(
38
+ self,
39
+ *,
40
+ table: str = "onestep_state",
41
+ key_column: str = "state_key",
42
+ value_column: str = "state_value",
43
+ updated_at_column: str = "updated_at",
44
+ auto_create: bool = True,
45
+ ) -> SQLAlchemyStateStore:
46
+ return SQLAlchemyStateStore(
47
+ engine=self.engine,
48
+ table=table,
49
+ key_column=key_column,
50
+ value_column=value_column,
51
+ updated_at_column=updated_at_column,
52
+ auto_create=auto_create,
53
+ )
54
+
55
+ def cursor_store(
56
+ self,
57
+ *,
58
+ table: str = "onestep_cursor",
59
+ key_column: str = "cursor_key",
60
+ value_column: str = "cursor_value",
61
+ updated_at_column: str = "updated_at",
62
+ auto_create: bool = True,
63
+ ) -> SQLAlchemyCursorStore:
64
+ return SQLAlchemyCursorStore(
65
+ engine=self.engine,
66
+ table=table,
67
+ key_column=key_column,
68
+ value_column=value_column,
69
+ updated_at_column=updated_at_column,
70
+ auto_create=auto_create,
71
+ )
72
+
73
+ def table_queue(
74
+ self,
75
+ *,
76
+ table: str,
77
+ key: str,
78
+ where: str,
79
+ claim: Mapping[str, Any],
80
+ ack: Mapping[str, Any],
81
+ nack: Mapping[str, Any] | None = None,
82
+ batch_size: int = 100,
83
+ poll_interval_s: float = 1.0,
84
+ ) -> "PostgresTableQueueSource":
85
+ return PostgresTableQueueSource(
86
+ connector=self,
87
+ table=table,
88
+ key=key,
89
+ where=where,
90
+ claim=dict(claim),
91
+ ack=dict(ack),
92
+ nack=dict(nack or {}),
93
+ batch_size=batch_size,
94
+ poll_interval_s=poll_interval_s,
95
+ )
96
+
97
+ def incremental(
98
+ self,
99
+ *,
100
+ table: str,
101
+ key: str,
102
+ cursor: Sequence[str],
103
+ where: str | None = None,
104
+ batch_size: int = 1000,
105
+ poll_interval_s: float = 1.0,
106
+ state: CursorStore | None = None,
107
+ state_key: str | None = None,
108
+ ) -> "PostgresIncrementalSource":
109
+ if len(cursor) < 1:
110
+ raise ValueError("cursor must contain at least one column")
111
+ effective_cursor = tuple(cursor) if key in cursor else (*tuple(cursor), key)
112
+ return PostgresIncrementalSource(
113
+ connector=self,
114
+ table=table,
115
+ key=key,
116
+ cursor=effective_cursor,
117
+ where=where,
118
+ batch_size=batch_size,
119
+ poll_interval_s=poll_interval_s,
120
+ state=state or InMemoryCursorStore(),
121
+ state_key=state_key or _default_incremental_state_key(
122
+ table=table,
123
+ cursor=effective_cursor,
124
+ key=key,
125
+ where=where,
126
+ ),
127
+ )
128
+
129
+ def table_sink(
130
+ self,
131
+ *,
132
+ table: str,
133
+ mode: str = "insert",
134
+ keys: Sequence[str] = (),
135
+ ) -> "PostgresTableSink":
136
+ return PostgresTableSink(connector=self, table=table, mode=mode, keys=tuple(keys))
137
+
138
+ def _table(self, table_name: str):
139
+ table = self._tables.get(table_name)
140
+ if table is None:
141
+ metadata = sa.MetaData()
142
+ table = sa.Table(table_name, metadata, autoload_with=self.engine)
143
+ self._tables[table_name] = table
144
+ return table
145
+
146
+
147
+ def _default_incremental_state_key(
148
+ *,
149
+ table: str,
150
+ cursor: Sequence[str],
151
+ key: str,
152
+ where: str | None,
153
+ ) -> str:
154
+ normalized_where = " ".join((where or "").split())
155
+ if normalized_where:
156
+ where_fragment = normalized_where
157
+ if len(where_fragment) > 64:
158
+ where_fragment = f"sha1:{hashlib.sha1(where_fragment.encode('utf-8')).hexdigest()}"
159
+ else:
160
+ where_fragment = "-"
161
+ return f"{table}:{','.join(cursor)}:key={key}:where={where_fragment}"
162
+
163
+
164
+ @dataclass
165
+ class _TableRowRef:
166
+ table: str
167
+ key: str
168
+ key_value: Any
169
+
170
+
171
+ class PostgresTableQueueDelivery(Delivery):
172
+ def __init__(self, source: "PostgresTableQueueSource", envelope: Envelope, row_ref: _TableRowRef) -> None:
173
+ super().__init__(envelope)
174
+ self._source = source
175
+ self._row_ref = row_ref
176
+
177
+ async def update_current_row(self, values: Mapping[str, Any]) -> None:
178
+ payload = dict(values)
179
+ await self._source.update_row(self._row_ref, payload)
180
+ if isinstance(self.envelope.body, dict):
181
+ self.envelope.body.update(payload)
182
+
183
+ async def ack(self) -> None:
184
+ await self._source.ack_row(self._row_ref)
185
+
186
+ async def retry(self, *, delay_s: float | None = None) -> None:
187
+ await self._source.retry_row(self._row_ref, delay_s=delay_s)
188
+
189
+ async def fail(self, exc: Exception | None = None) -> None:
190
+ await self._source.fail_row(self._row_ref, exc=exc)
191
+
192
+ async def release_unstarted(self) -> None:
193
+ await self._source.release_row(self._row_ref)
194
+
195
+
196
+ class PostgresTableQueueSource(Source):
197
+ fetch_is_cancel_safe = False
198
+
199
+ def __init__(
200
+ self,
201
+ *,
202
+ connector: PostgresConnector,
203
+ table: str,
204
+ key: str,
205
+ where: str,
206
+ claim: dict[str, Any],
207
+ ack: dict[str, Any],
208
+ nack: dict[str, Any],
209
+ batch_size: int,
210
+ poll_interval_s: float,
211
+ ) -> None:
212
+ super().__init__(f"postgres.table_queue:{table}")
213
+ self.connector = connector
214
+ self.table_name = table
215
+ self.key = key
216
+ self.where = where
217
+ self.claim = claim
218
+ self.ack = ack
219
+ self.nack = nack
220
+ self.batch_size = batch_size
221
+ self.poll_interval_s = poll_interval_s
222
+
223
+ async def fetch(self, limit: int) -> list[Delivery]:
224
+ try:
225
+ rows = await asyncio.to_thread(self._fetch_sync, max(1, min(limit, self.batch_size)))
226
+ except Exception as exc:
227
+ connector_error = as_postgres_connector_operation_error(
228
+ operation=ConnectorOperation.FETCH,
229
+ exc=exc,
230
+ source_name=self.name,
231
+ retry_delay_s=self.poll_interval_s,
232
+ )
233
+ if connector_error is None:
234
+ raise
235
+ raise connector_error from exc
236
+ deliveries: list[Delivery] = []
237
+ for row in rows:
238
+ key_value = row[self.key]
239
+ envelope = Envelope(body=row, meta={"table": self.table_name})
240
+ row_ref = _TableRowRef(self.table_name, self.key, key_value)
241
+ deliveries.append(PostgresTableQueueDelivery(self, envelope, row_ref))
242
+ return deliveries
243
+
244
+ def _fetch_sync(self, limit: int) -> list[dict[str, Any]]:
245
+ table = self.connector._table(self.table_name)
246
+ with self.connector.engine.begin() as conn:
247
+ stmt = sa.select(table).where(sa.text(self.where)).order_by(table.c[self.key]).limit(limit)
248
+ try:
249
+ stmt = stmt.with_for_update(skip_locked=True)
250
+ except TypeError:
251
+ stmt = stmt.with_for_update()
252
+ rows = [dict(row) for row in conn.execute(stmt).mappings().all()]
253
+ if not rows:
254
+ return []
255
+ ids = [row[self.key] for row in rows]
256
+ conn.execute(sa.update(table).where(table.c[self.key].in_(ids)).values(**self.claim))
257
+ refreshed = conn.execute(
258
+ sa.select(table).where(table.c[self.key].in_(ids)).order_by(table.c[self.key])
259
+ )
260
+ return [dict(row) for row in refreshed.mappings().all()]
261
+
262
+ async def ack_row(self, row_ref: _TableRowRef) -> None:
263
+ await self.update_row(row_ref, self.ack)
264
+
265
+ async def retry_row(self, row_ref: _TableRowRef, *, delay_s: float | None = None) -> None:
266
+ if delay_s:
267
+ await asyncio.sleep(delay_s)
268
+ await self.update_row(row_ref, self.nack)
269
+
270
+ async def fail_row(self, row_ref: _TableRowRef, exc: Exception | None = None) -> None:
271
+ await self.update_row(row_ref, self.nack)
272
+
273
+ async def release_row(self, row_ref: _TableRowRef) -> None:
274
+ await self.update_row(row_ref, self.nack)
275
+
276
+ async def update_row(self, row_ref: _TableRowRef, values: Mapping[str, Any]) -> None:
277
+ await asyncio.to_thread(self._update_row_sync, row_ref, dict(values))
278
+
279
+ def _update_row_sync(self, row_ref: _TableRowRef, values: Mapping[str, Any]) -> None:
280
+ if not values:
281
+ return
282
+ table = self.connector._table(row_ref.table)
283
+ with self.connector.engine.begin() as conn:
284
+ conn.execute(
285
+ sa.update(table)
286
+ .where(table.c[row_ref.key] == row_ref.key_value)
287
+ .values(**dict(values))
288
+ )
289
+
290
+
291
+ @dataclass
292
+ class _CursorToken:
293
+ value: tuple[Any, ...]
294
+
295
+
296
+ class IncrementalDelivery(Delivery):
297
+ def __init__(self, source: "PostgresIncrementalSource", envelope: Envelope, token: _CursorToken) -> None:
298
+ super().__init__(envelope)
299
+ self._source = source
300
+ self._token = token
301
+
302
+ async def ack(self) -> None:
303
+ await self._source.ack_token(self._token)
304
+
305
+ async def retry(self, *, delay_s: float | None = None) -> None:
306
+ if delay_s:
307
+ await asyncio.sleep(delay_s)
308
+
309
+ async def fail(self, exc: Exception | None = None) -> None:
310
+ return None
311
+
312
+
313
+ class PostgresIncrementalSource(Source):
314
+ def __init__(
315
+ self,
316
+ *,
317
+ connector: PostgresConnector,
318
+ table: str,
319
+ key: str,
320
+ cursor: tuple[str, ...],
321
+ where: str | None,
322
+ batch_size: int,
323
+ poll_interval_s: float,
324
+ state: CursorStore,
325
+ state_key: str,
326
+ ) -> None:
327
+ super().__init__(f"postgres.incremental:{table}")
328
+ self.connector = connector
329
+ self.table_name = table
330
+ self.key = key
331
+ self.configured_cursor = cursor
332
+ self.cursor = cursor if key in cursor else (*cursor, key)
333
+ self.where = where
334
+ self.batch_size = batch_size
335
+ self.poll_interval_s = poll_interval_s
336
+ self.state = state
337
+ self.state_key = state_key
338
+ self._pending: deque[tuple[Any, ...]] = deque()
339
+ self._acked: set[tuple[Any, ...]] = set()
340
+ self._commit_lock: asyncio.Lock | None = None
341
+ self._commit_loop: asyncio.AbstractEventLoop | None = None
342
+ self._loaded = False
343
+ self._committed_cursor: tuple[Any, ...] | None = None
344
+ self._fetched_cursor: tuple[Any, ...] | None = None
345
+
346
+ async def open(self) -> None:
347
+ if not self._loaded:
348
+ loaded = await self.state.load(self.state_key)
349
+ if loaded is not None and len(loaded) == len(self.cursor):
350
+ self._committed_cursor = tuple(loaded)
351
+ self._fetched_cursor = self._committed_cursor
352
+ self._loaded = True
353
+
354
+ async def fetch(self, limit: int) -> list[Delivery]:
355
+ await self.open()
356
+ try:
357
+ rows = await asyncio.to_thread(self._fetch_sync, max(1, min(limit, self.batch_size)))
358
+ except Exception as exc:
359
+ connector_error = as_postgres_connector_operation_error(
360
+ operation=ConnectorOperation.FETCH,
361
+ exc=exc,
362
+ source_name=self.name,
363
+ retry_delay_s=self.poll_interval_s,
364
+ )
365
+ if connector_error is None:
366
+ raise
367
+ raise connector_error from exc
368
+ deliveries: list[Delivery] = []
369
+ for row in rows:
370
+ token = _CursorToken(tuple(row[column] for column in self.cursor))
371
+ self._pending.append(token.value)
372
+ self._fetched_cursor = token.value
373
+ envelope = Envelope(body=row, meta={"table": self.table_name})
374
+ deliveries.append(IncrementalDelivery(self, envelope, token))
375
+ return deliveries
376
+
377
+ def _fetch_sync(self, limit: int) -> list[dict[str, Any]]:
378
+ table = self.connector._table(self.table_name)
379
+ stmt = sa.select(table)
380
+ predicates = []
381
+ if self.where:
382
+ predicates.append(sa.text(self.where))
383
+ read_cursor = self._fetched_cursor or self._committed_cursor
384
+ if read_cursor is not None:
385
+ cursor_columns = [table.c[name] for name in self.cursor]
386
+ predicates.append(sa.tuple_(*cursor_columns) > tuple(read_cursor))
387
+ if predicates:
388
+ stmt = stmt.where(*predicates)
389
+ order_columns = [table.c[name] for name in self.cursor]
390
+ stmt = stmt.order_by(*order_columns).limit(limit)
391
+ with self.connector.engine.begin() as conn:
392
+ rows = conn.execute(stmt).mappings().all()
393
+ return [dict(row) for row in rows]
394
+
395
+ async def ack_token(self, token: _CursorToken) -> None:
396
+ lock = self._runtime_commit_lock()
397
+ async with lock:
398
+ self._acked.add(token.value)
399
+ advanced: tuple[Any, ...] | None = None
400
+ while self._pending and self._pending[0] in self._acked:
401
+ advanced = self._pending.popleft()
402
+ self._acked.remove(advanced)
403
+ if advanced is not None:
404
+ self._committed_cursor = advanced
405
+ if not self._pending:
406
+ self._fetched_cursor = advanced
407
+ await self.state.save(self.state_key, list(advanced))
408
+
409
+ def _runtime_commit_lock(self) -> asyncio.Lock:
410
+ current_loop = asyncio.get_running_loop()
411
+ if self._commit_lock is None or self._commit_loop is not current_loop:
412
+ self._commit_lock = asyncio.Lock()
413
+ self._commit_loop = current_loop
414
+ return self._commit_lock
415
+
416
+
417
+ class PostgresTableSink(Sink):
418
+ def __init__(self, *, connector: PostgresConnector, table: str, mode: str, keys: tuple[str, ...]) -> None:
419
+ super().__init__(f"postgres.table_sink:{table}")
420
+ if mode not in {"insert", "upsert"}:
421
+ raise ValueError("mode must be either 'insert' or 'upsert'")
422
+ self.connector = connector
423
+ self.table_name = table
424
+ self.mode = mode
425
+ self.keys = keys
426
+
427
+ async def send(self, envelope: Envelope) -> None:
428
+ if not isinstance(envelope.body, Mapping):
429
+ raise TypeError("PostgresTableSink only accepts mapping payloads")
430
+ try:
431
+ await asyncio.to_thread(self._send_sync, dict(envelope.body))
432
+ except Exception as exc:
433
+ connector_error = as_postgres_connector_operation_error(
434
+ operation=ConnectorOperation.SEND,
435
+ exc=exc,
436
+ source_name=self.name,
437
+ retry_delay_s=1.0,
438
+ )
439
+ if connector_error is None:
440
+ raise
441
+ raise connector_error from exc
442
+
443
+ def _send_sync(self, payload: dict[str, Any]) -> None:
444
+ table = self.connector._table(self.table_name)
445
+ dialect = self.connector.engine.dialect.name
446
+ with self.connector.engine.begin() as conn:
447
+ if self.mode == "insert":
448
+ conn.execute(sa.insert(table).values(**payload))
449
+ return
450
+ if not self.keys:
451
+ raise ValueError("upsert mode requires keys")
452
+ update_payload = {key: value for key, value in payload.items() if key not in self.keys}
453
+ if dialect == "postgresql":
454
+ from sqlalchemy.dialects.postgresql import insert as postgres_insert
455
+
456
+ stmt = postgres_insert(table).values(**payload)
457
+ conn.execute(stmt.on_conflict_do_update(index_elements=list(self.keys), set_=update_payload))
458
+ return
459
+ if dialect == "sqlite":
460
+ from sqlalchemy.dialects.sqlite import insert as sqlite_insert
461
+
462
+ stmt = sqlite_insert(table).values(**payload)
463
+ conn.execute(stmt.on_conflict_do_update(index_elements=list(self.keys), set_=update_payload))
464
+ return
465
+ conn.execute(sa.insert(table).values(**payload))
@@ -0,0 +1,64 @@
1
+ from __future__ import annotations
2
+
3
+ from onestep.resilience import ConnectorErrorKind, ConnectorOperation, ConnectorOperationError
4
+
5
+ try: # pragma: no cover - optional dependency
6
+ import sqlalchemy as sa
7
+ except ImportError: # pragma: no cover - optional dependency
8
+ sa = None
9
+
10
+
11
+ def classify_sqlalchemy_error(exc: BaseException) -> ConnectorErrorKind | None:
12
+ if sa is None:
13
+ return None
14
+ sql_exc = sa.exc
15
+ if isinstance(exc, getattr(sql_exc, "TimeoutError", ())):
16
+ return ConnectorErrorKind.TRANSIENT
17
+ if isinstance(exc, getattr(sql_exc, "InterfaceError", ())):
18
+ return ConnectorErrorKind.DISCONNECTED
19
+ if isinstance(exc, getattr(sql_exc, "ProgrammingError", ())):
20
+ return ConnectorErrorKind.PERMANENT
21
+ if isinstance(exc, getattr(sql_exc, "DBAPIError", ())):
22
+ if getattr(exc, "connection_invalidated", False):
23
+ return ConnectorErrorKind.DISCONNECTED
24
+ message = " ".join(
25
+ str(part).lower()
26
+ for part in (
27
+ getattr(exc, "orig", None),
28
+ exc,
29
+ )
30
+ if part is not None
31
+ )
32
+ if any(token in message for token in ("server closed the connection", "connection refused", "connection reset")):
33
+ return ConnectorErrorKind.DISCONNECTED
34
+ if any(token in message for token in ("deadlock detected", "lock timeout", "could not serialize access")):
35
+ return ConnectorErrorKind.TRANSIENT
36
+ if any(token in message for token in ("password authentication failed", "permission denied")):
37
+ return ConnectorErrorKind.MISCONFIGURED
38
+ if any(token in message for token in ("database", "role")) and "does not exist" in message:
39
+ return ConnectorErrorKind.MISCONFIGURED
40
+ if any(token in message for token in ("undefined table", "undefined column", "syntax error", "does not exist")):
41
+ return ConnectorErrorKind.PERMANENT
42
+ if isinstance(exc, getattr(sql_exc, "OperationalError", ())):
43
+ return ConnectorErrorKind.TRANSIENT
44
+ return None
45
+
46
+
47
+ def as_postgres_connector_operation_error(
48
+ *,
49
+ operation: ConnectorOperation,
50
+ exc: BaseException,
51
+ source_name: str | None = None,
52
+ retry_delay_s: float | None = None,
53
+ ) -> ConnectorOperationError | None:
54
+ kind = classify_sqlalchemy_error(exc)
55
+ if kind is None:
56
+ return None
57
+ return ConnectorOperationError(
58
+ backend="postgres",
59
+ operation=operation,
60
+ kind=kind,
61
+ source_name=source_name,
62
+ retry_delay_s=retry_delay_s,
63
+ cause=exc,
64
+ )
@@ -0,0 +1,152 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping
4
+ from typing import Any
5
+
6
+ from onestep.resource_registry import ResourceBuildContext, ResourceRegistry, ResourceSpecHandler
7
+
8
+ from .connector import PostgresConnector
9
+
10
+ _POSTGRES_FIELDS = frozenset({"type", "dsn", "engine_options"})
11
+ _POSTGRES_STATE_STORE_FIELDS = frozenset(
12
+ {"type", "connector", "table", "key_column", "value_column", "updated_at_column", "auto_create"}
13
+ )
14
+ _POSTGRES_CURSOR_STORE_FIELDS = frozenset(
15
+ {"type", "connector", "table", "key_column", "value_column", "updated_at_column", "auto_create"}
16
+ )
17
+ _POSTGRES_TABLE_QUEUE_FIELDS = frozenset(
18
+ {"type", "connector", "table", "key", "where", "claim", "ack", "nack", "batch_size", "poll_interval_s"}
19
+ )
20
+ _POSTGRES_INCREMENTAL_FIELDS = frozenset(
21
+ {"type", "connector", "table", "key", "cursor", "where", "batch_size", "poll_interval_s", "state", "state_key"}
22
+ )
23
+ _POSTGRES_TABLE_SINK_FIELDS = frozenset({"type", "connector", "table", "mode", "keys"})
24
+
25
+
26
+ def register_resources(registry: ResourceRegistry) -> None:
27
+ registry.register_resource_type(
28
+ ResourceSpecHandler(
29
+ type="postgres",
30
+ allowed_fields=_POSTGRES_FIELDS,
31
+ build=_build_postgres,
32
+ )
33
+ )
34
+ registry.register_resource_type(
35
+ ResourceSpecHandler(
36
+ type="postgres_state_store",
37
+ allowed_fields=_POSTGRES_STATE_STORE_FIELDS,
38
+ build=_build_postgres_state_store,
39
+ )
40
+ )
41
+ registry.register_resource_type(
42
+ ResourceSpecHandler(
43
+ type="postgres_cursor_store",
44
+ allowed_fields=_POSTGRES_CURSOR_STORE_FIELDS,
45
+ build=_build_postgres_cursor_store,
46
+ )
47
+ )
48
+ registry.register_resource_type(
49
+ ResourceSpecHandler(
50
+ type="postgres_table_queue",
51
+ allowed_fields=_POSTGRES_TABLE_QUEUE_FIELDS,
52
+ build=_build_postgres_table_queue,
53
+ )
54
+ )
55
+ registry.register_resource_type(
56
+ ResourceSpecHandler(
57
+ type="postgres_incremental",
58
+ allowed_fields=_POSTGRES_INCREMENTAL_FIELDS,
59
+ build=_build_postgres_incremental,
60
+ )
61
+ )
62
+ registry.register_resource_type(
63
+ ResourceSpecHandler(
64
+ type="postgres_table_sink",
65
+ allowed_fields=_POSTGRES_TABLE_SINK_FIELDS,
66
+ build=_build_postgres_table_sink,
67
+ )
68
+ )
69
+
70
+
71
+ def _build_postgres(ctx: ResourceBuildContext, spec: Mapping[str, Any]) -> PostgresConnector:
72
+ return PostgresConnector(
73
+ ctx.require_string(spec, "dsn"),
74
+ **ctx.mapping_value(spec.get("engine_options"), field=f"{ctx.field}.engine_options"),
75
+ )
76
+
77
+
78
+ def _build_postgres_state_store(ctx: ResourceBuildContext, spec: Mapping[str, Any]) -> Any:
79
+ connector = ctx.resolve_dependency(spec, "connector")
80
+ if not hasattr(connector, "state_store"):
81
+ raise TypeError(f"resource {spec['connector']!r} cannot build postgres_state_store")
82
+ return connector.state_store(
83
+ table=spec.get("table", "onestep_state"),
84
+ key_column=spec.get("key_column", "state_key"),
85
+ value_column=spec.get("value_column", "state_value"),
86
+ updated_at_column=spec.get("updated_at_column", "updated_at"),
87
+ auto_create=spec.get("auto_create", True),
88
+ )
89
+
90
+
91
+ def _build_postgres_cursor_store(ctx: ResourceBuildContext, spec: Mapping[str, Any]) -> Any:
92
+ connector = ctx.resolve_dependency(spec, "connector")
93
+ if not hasattr(connector, "cursor_store"):
94
+ raise TypeError(f"resource {spec['connector']!r} cannot build postgres_cursor_store")
95
+ return connector.cursor_store(
96
+ table=spec.get("table", "onestep_cursor"),
97
+ key_column=spec.get("key_column", "cursor_key"),
98
+ value_column=spec.get("value_column", "cursor_value"),
99
+ updated_at_column=spec.get("updated_at_column", "updated_at"),
100
+ auto_create=spec.get("auto_create", True),
101
+ )
102
+
103
+
104
+ def _build_postgres_table_queue(ctx: ResourceBuildContext, spec: Mapping[str, Any]) -> Any:
105
+ connector = ctx.resolve_dependency(spec, "connector")
106
+ if not hasattr(connector, "table_queue"):
107
+ raise TypeError(f"resource {spec['connector']!r} cannot build postgres_table_queue")
108
+ return connector.table_queue(
109
+ table=ctx.require_string(spec, "table"),
110
+ key=ctx.require_string(spec, "key"),
111
+ where=ctx.require_string(spec, "where"),
112
+ claim=ctx.require_mapping(spec, "claim"),
113
+ ack=ctx.require_mapping(spec, "ack"),
114
+ nack=ctx.optional_mapping(spec.get("nack"), field=f"{ctx.field}.nack") or None,
115
+ batch_size=spec.get("batch_size", 100),
116
+ poll_interval_s=spec.get("poll_interval_s", 1.0),
117
+ )
118
+
119
+
120
+ def _build_postgres_incremental(ctx: ResourceBuildContext, spec: Mapping[str, Any]) -> Any:
121
+ connector = ctx.resolve_dependency(spec, "connector")
122
+ if not hasattr(connector, "incremental"):
123
+ raise TypeError(f"resource {spec['connector']!r} cannot build postgres_incremental")
124
+ raw_state_name = spec.get("state")
125
+ state = None
126
+ if raw_state_name is not None:
127
+ state_name = ctx.string_value(raw_state_name, field=f"{ctx.field}.state")
128
+ state = ctx.resolve(state_name)
129
+ if not ctx.is_cursor_store(state):
130
+ raise TypeError(f"resource {state_name!r} cannot be used as incremental state")
131
+ return connector.incremental(
132
+ table=ctx.require_string(spec, "table"),
133
+ key=ctx.require_string(spec, "key"),
134
+ cursor=tuple(ctx.string_list(spec.get("cursor"), field=f"{ctx.field}.cursor")),
135
+ where=spec.get("where"),
136
+ batch_size=spec.get("batch_size", 1000),
137
+ poll_interval_s=spec.get("poll_interval_s", 1.0),
138
+ state=state,
139
+ state_key=spec.get("state_key"),
140
+ )
141
+
142
+
143
+ def _build_postgres_table_sink(ctx: ResourceBuildContext, spec: Mapping[str, Any]) -> Any:
144
+ connector = ctx.resolve_dependency(spec, "connector")
145
+ if not hasattr(connector, "table_sink"):
146
+ raise TypeError(f"resource {spec['connector']!r} cannot build postgres_table_sink")
147
+ keys = spec.get("keys")
148
+ return connector.table_sink(
149
+ table=ctx.require_string(spec, "table"),
150
+ mode=spec.get("mode", "insert"),
151
+ keys=tuple(ctx.string_list(keys, field=f"{ctx.field}.keys")) if keys is not None else (),
152
+ )
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import json
5
+ import threading
6
+ from datetime import datetime, timezone
7
+ from typing import Any
8
+
9
+ try:
10
+ import sqlalchemy as sa
11
+ from sqlalchemy import create_engine
12
+ except ImportError: # pragma: no cover - exercised when optional deps are missing
13
+ sa = None
14
+ create_engine = None
15
+
16
+
17
+ class SQLAlchemyStateStore:
18
+ def __init__(
19
+ self,
20
+ *,
21
+ dsn: str | None = None,
22
+ engine: Any | None = None,
23
+ table: str = "onestep_state",
24
+ key_column: str = "state_key",
25
+ value_column: str = "state_value",
26
+ updated_at_column: str = "updated_at",
27
+ auto_create: bool = True,
28
+ **engine_options: Any,
29
+ ) -> None:
30
+ if create_engine is None or sa is None:
31
+ raise RuntimeError("SQLAlchemyStateStore requires SQLAlchemy. Install onestep-postgres.")
32
+ if engine is None and dsn is None:
33
+ raise ValueError("dsn or engine is required")
34
+ if engine is not None and dsn is not None:
35
+ raise ValueError("pass either dsn or engine, not both")
36
+ self.engine = engine or create_engine(dsn, future=True, pool_pre_ping=True, **engine_options)
37
+ self._owns_engine = engine is None
38
+ self.table_name = table
39
+ self.key_column_name = key_column
40
+ self.value_column_name = value_column
41
+ self.updated_at_column_name = updated_at_column
42
+ self.auto_create = auto_create
43
+ self._metadata = sa.MetaData()
44
+ self._table = sa.Table(
45
+ table,
46
+ self._metadata,
47
+ sa.Column(key_column, sa.String(255), primary_key=True),
48
+ sa.Column(value_column, sa.Text(), nullable=False),
49
+ sa.Column(updated_at_column, sa.DateTime(timezone=True), nullable=False),
50
+ )
51
+ self._ready = False
52
+ self._ready_lock = threading.Lock()
53
+
54
+ async def load(self, key: str) -> Any | None:
55
+ return await asyncio.to_thread(self._load_sync, key)
56
+
57
+ async def save(self, key: str, value: Any) -> None:
58
+ await asyncio.to_thread(self._save_sync, key, value)
59
+
60
+ async def delete(self, key: str) -> None:
61
+ await asyncio.to_thread(self._delete_sync, key)
62
+
63
+ async def close(self) -> None:
64
+ if self._owns_engine:
65
+ await asyncio.to_thread(self.engine.dispose)
66
+
67
+ def _load_sync(self, key: str) -> Any | None:
68
+ self._ensure_ready_sync()
69
+ key_column = self._table.c[self.key_column_name]
70
+ value_column = self._table.c[self.value_column_name]
71
+ with self.engine.begin() as conn:
72
+ row = conn.execute(sa.select(value_column).where(key_column == key)).scalar_one_or_none()
73
+ if row is None:
74
+ return None
75
+ return json.loads(row)
76
+
77
+ def _save_sync(self, key: str, value: Any) -> None:
78
+ self._ensure_ready_sync()
79
+ key_column = self._table.c[self.key_column_name]
80
+ payload = {
81
+ self.key_column_name: key,
82
+ self.value_column_name: json.dumps(value, ensure_ascii=False),
83
+ self.updated_at_column_name: datetime.now(timezone.utc),
84
+ }
85
+ with self.engine.begin() as conn:
86
+ exists = conn.execute(sa.select(key_column).where(key_column == key)).scalar_one_or_none()
87
+ if exists is None:
88
+ conn.execute(sa.insert(self._table).values(**payload))
89
+ return
90
+ conn.execute(
91
+ sa.update(self._table)
92
+ .where(key_column == key)
93
+ .values(
94
+ **{
95
+ self.value_column_name: payload[self.value_column_name],
96
+ self.updated_at_column_name: payload[self.updated_at_column_name],
97
+ }
98
+ )
99
+ )
100
+
101
+ def _delete_sync(self, key: str) -> None:
102
+ self._ensure_ready_sync()
103
+ key_column = self._table.c[self.key_column_name]
104
+ with self.engine.begin() as conn:
105
+ conn.execute(sa.delete(self._table).where(key_column == key))
106
+
107
+ def _ensure_ready_sync(self) -> None:
108
+ if self._ready or not self.auto_create:
109
+ return
110
+ with self._ready_lock:
111
+ if self._ready:
112
+ return
113
+ self._metadata.create_all(self.engine, tables=[self._table], checkfirst=True)
114
+ self._ready = True
115
+
116
+
117
+ class SQLAlchemyCursorStore(SQLAlchemyStateStore):
118
+ pass
@@ -0,0 +1,56 @@
1
+ Metadata-Version: 2.4
2
+ Name: onestep-postgres
3
+ Version: 0.1.0
4
+ Summary: PostgreSQL connector plugin for onestep.
5
+ License: MIT
6
+ Requires-Python: >=3.9
7
+ Requires-Dist: onestep>=1.4.2
8
+ Requires-Dist: psycopg[binary]>=3.2.0
9
+ Requires-Dist: sqlalchemy>=2.0.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
12
+ Provides-Extra: test
13
+ Requires-Dist: pytest>=8.0.0; extra == 'test'
14
+ Description-Content-Type: text/markdown
15
+
16
+ # onestep-postgres
17
+
18
+ PostgreSQL connector plugin for onestep.
19
+
20
+ Install it with:
21
+
22
+ ```bash
23
+ pip install onestep-postgres
24
+ ```
25
+
26
+ YAML resources are available after the plugin is installed:
27
+
28
+ ```yaml
29
+ resources:
30
+ pg:
31
+ type: postgres
32
+ dsn: "${POSTGRES_DSN}"
33
+
34
+ cursor:
35
+ type: postgres_cursor_store
36
+ connector: pg
37
+
38
+ users:
39
+ type: postgres_incremental
40
+ connector: pg
41
+ table: users
42
+ key: id
43
+ cursor: [updated_at, id]
44
+ state: cursor
45
+
46
+ processed:
47
+ type: postgres_table_sink
48
+ connector: pg
49
+ table: processed_users
50
+ mode: upsert
51
+ keys: [id]
52
+ ```
53
+
54
+ The first version supports table queues, incremental polling, table sinks, and
55
+ SQLAlchemy-backed state/cursor stores. It does not support PostgreSQL logical
56
+ replication or CDC.
@@ -0,0 +1,9 @@
1
+ onestep_postgres/__init__.py,sha256=BQfpRdzWJGyJx0Tc2jnI3BA2Mm0u61ZVFuUVue1Wrg0,1017
2
+ onestep_postgres/connector.py,sha256=aj1A9XXRITDgkB9mYfJs-54THqf50gb1uf0U7HUyD-w,16987
3
+ onestep_postgres/resilience.py,sha256=tr0Jhgv13xJrvVjuoaSI2_tsYhrCrOOUz2K8IMYwBSY,2586
4
+ onestep_postgres/resources.py,sha256=qu3pWzeAv42UeMOW6MYKM7rmdcX-UZe0gwx2OaYW7is,6266
5
+ onestep_postgres/state_sqlalchemy.py,sha256=g2yFDgygvnHFMutLzEnMaZNmDs5Pb-m9N1EReO4wERk,4400
6
+ onestep_postgres-0.1.0.dist-info/METADATA,sha256=0i05tUfWh2_f7RPQiW-TOCbDOr87kZrgbV-t_6K2e9A,1173
7
+ onestep_postgres-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
8
+ onestep_postgres-0.1.0.dist-info/entry_points.txt,sha256=XuAxF8oKTQStyyD4DJ2phsFC1J5N2EKD-3uXvP-hh9I,57
9
+ onestep_postgres-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [onestep.resources]
2
+ postgres = onestep_postgres:register