dagster-postgres 0.19.2__py3-none-any.whl → 0.28.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_postgres/__init__.py +6 -6
- dagster_postgres/alembic/alembic.ini +5 -5
- dagster_postgres/event_log/__init__.py +1 -2
- dagster_postgres/event_log/event_log.py +121 -53
- dagster_postgres/run_storage/__init__.py +1 -1
- dagster_postgres/run_storage/run_storage.py +46 -26
- dagster_postgres/schedule_storage/__init__.py +3 -1
- dagster_postgres/schedule_storage/schedule_storage.py +66 -20
- dagster_postgres/storage.py +6 -6
- dagster_postgres/test_fixtures/__init__.py +75 -0
- dagster_postgres/test_fixtures/docker-compose.yml +10 -0
- dagster_postgres/utils.py +6 -3
- dagster_postgres/version.py +1 -1
- dagster_postgres-0.28.11.dist-info/METADATA +28 -0
- dagster_postgres-0.28.11.dist-info/RECORD +19 -0
- {dagster_postgres-0.19.2.dist-info → dagster_postgres-0.28.11.dist-info}/WHEEL +1 -1
- {dagster_postgres-0.19.2.dist-info → dagster_postgres-0.28.11.dist-info/licenses}/LICENSE +1 -1
- dagster_postgres/event_log/event_watcher.py +0 -137
- dagster_postgres/pynotify.py +0 -138
- dagster_postgres-0.19.2.dist-info/METADATA +0 -19
- dagster_postgres-0.19.2.dist-info/RECORD +0 -19
- {dagster_postgres-0.19.2.dist-info → dagster_postgres-0.28.11.dist-info}/top_level.txt +0 -0
dagster_postgres/__init__.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from
|
|
1
|
+
from dagster_shared.libraries import DagsterLibraryRegistry
|
|
2
2
|
|
|
3
|
-
from .event_log import PostgresEventLogStorage
|
|
4
|
-
from .run_storage import PostgresRunStorage
|
|
5
|
-
from .schedule_storage import PostgresScheduleStorage
|
|
6
|
-
from .storage import DagsterPostgresStorage
|
|
7
|
-
from .version import __version__
|
|
3
|
+
from dagster_postgres.event_log import PostgresEventLogStorage
|
|
4
|
+
from dagster_postgres.run_storage import PostgresRunStorage
|
|
5
|
+
from dagster_postgres.schedule_storage import PostgresScheduleStorage
|
|
6
|
+
from dagster_postgres.storage import DagsterPostgresStorage
|
|
7
|
+
from dagster_postgres.version import __version__
|
|
8
8
|
|
|
9
9
|
DagsterLibraryRegistry.register("dagster-postgres", __version__)
|
|
10
10
|
__all__ = [
|
|
@@ -8,11 +8,11 @@ script_location = dagster:_core/storage/alembic
|
|
|
8
8
|
# on newly generated revision scripts. See the documentation for further
|
|
9
9
|
# detail and examples
|
|
10
10
|
|
|
11
|
-
# format using "
|
|
12
|
-
hooks =
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
11
|
+
# format using "ruff" - use the console_scripts runner, against the "ruff" entrypoint
|
|
12
|
+
hooks = ruff
|
|
13
|
+
ruff.type = console_scripts
|
|
14
|
+
ruff.entrypoint = ruff
|
|
15
|
+
ruff.options = format --target-version=py38 --line-length=100
|
|
16
16
|
|
|
17
17
|
# Logging configuration
|
|
18
18
|
[loggers]
|
|
@@ -1,2 +1 @@
|
|
|
1
|
-
from .event_log import PostgresEventLogStorage as PostgresEventLogStorage
|
|
2
|
-
from .event_watcher import PostgresEventWatcher as PostgresEventWatcher
|
|
1
|
+
from dagster_postgres.event_log.event_log import PostgresEventLogStorage as PostgresEventLogStorage
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Iterator, Mapping, Sequence
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from typing import Any, ContextManager, Optional, cast # noqa: UP035
|
|
2
4
|
|
|
3
5
|
import dagster._check as check
|
|
4
6
|
import sqlalchemy as db
|
|
@@ -7,17 +9,19 @@ import sqlalchemy.pool as db_pool
|
|
|
7
9
|
from dagster._config.config_schema import UserConfigSchema
|
|
8
10
|
from dagster._core.errors import DagsterInvariantViolationError
|
|
9
11
|
from dagster._core.event_api import EventHandlerFn
|
|
10
|
-
from dagster._core.events import ASSET_EVENTS
|
|
12
|
+
from dagster._core.events import ASSET_CHECK_EVENTS, ASSET_EVENTS, BATCH_WRITABLE_EVENTS
|
|
11
13
|
from dagster._core.events.log import EventLogEntry
|
|
12
14
|
from dagster._core.storage.config import pg_config
|
|
13
15
|
from dagster._core.storage.event_log import (
|
|
14
16
|
AssetKeyTable,
|
|
17
|
+
DynamicPartitionsTable,
|
|
15
18
|
SqlEventLogStorage,
|
|
16
19
|
SqlEventLogStorageMetadata,
|
|
17
20
|
SqlEventLogStorageTable,
|
|
18
21
|
)
|
|
19
22
|
from dagster._core.storage.event_log.base import EventLogCursor
|
|
20
23
|
from dagster._core.storage.event_log.migration import ASSET_KEY_INDEX_COLS
|
|
24
|
+
from dagster._core.storage.event_log.polling_event_watcher import SqlPollingEventWatcher
|
|
21
25
|
from dagster._core.storage.sql import (
|
|
22
26
|
AlembicVersion,
|
|
23
27
|
check_alembic_revision,
|
|
@@ -25,18 +29,19 @@ from dagster._core.storage.sql import (
|
|
|
25
29
|
run_alembic_upgrade,
|
|
26
30
|
stamp_alembic_rev,
|
|
27
31
|
)
|
|
32
|
+
from dagster._core.storage.sqlalchemy_compat import db_select
|
|
28
33
|
from dagster._serdes import ConfigurableClass, ConfigurableClassData, deserialize_value
|
|
34
|
+
from sqlalchemy import event
|
|
29
35
|
from sqlalchemy.engine import Connection
|
|
30
36
|
|
|
31
|
-
from
|
|
37
|
+
from dagster_postgres.utils import (
|
|
32
38
|
create_pg_connection,
|
|
33
39
|
pg_alembic_config,
|
|
34
|
-
pg_statement_timeout,
|
|
35
40
|
pg_url_from_config,
|
|
36
41
|
retry_pg_connection_fn,
|
|
37
42
|
retry_pg_creation_fn,
|
|
43
|
+
set_pg_statement_timeout,
|
|
38
44
|
)
|
|
39
|
-
from .event_watcher import PostgresEventWatcher
|
|
40
45
|
|
|
41
46
|
CHANNEL_NAME = "run_events"
|
|
42
47
|
|
|
@@ -45,7 +50,7 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
|
|
|
45
50
|
"""Postgres-backed event log storage.
|
|
46
51
|
|
|
47
52
|
Users should not directly instantiate this class; it is instantiated by internal machinery when
|
|
48
|
-
``
|
|
53
|
+
``dagster-webserver`` and ``dagster-graphql`` load, based on the values in the ``dagster.yaml`` file in
|
|
49
54
|
``$DAGSTER_HOME``. Configuration of this class should be done by setting values in that file.
|
|
50
55
|
|
|
51
56
|
To use Postgres for all of the components of your instance storage, you can add the following
|
|
@@ -82,15 +87,11 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
|
|
|
82
87
|
should_autocreate_tables, "should_autocreate_tables"
|
|
83
88
|
)
|
|
84
89
|
|
|
85
|
-
self._disposed = False
|
|
86
|
-
|
|
87
90
|
# Default to not holding any connections open to prevent accumulating connections per DagsterInstance
|
|
88
91
|
self._engine = create_engine(
|
|
89
92
|
self.postgres_url, isolation_level="AUTOCOMMIT", poolclass=db_pool.NullPool
|
|
90
93
|
)
|
|
91
|
-
|
|
92
|
-
# lazy init
|
|
93
|
-
self._event_watcher: Optional[PostgresEventWatcher] = None
|
|
94
|
+
self._event_watcher: Optional[SqlPollingEventWatcher] = None
|
|
94
95
|
|
|
95
96
|
self._secondary_index_cache = {}
|
|
96
97
|
|
|
@@ -111,20 +112,24 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
|
|
|
111
112
|
SqlEventLogStorageMetadata.create_all(conn)
|
|
112
113
|
stamp_alembic_rev(pg_alembic_config(__file__), conn)
|
|
113
114
|
|
|
114
|
-
def
|
|
115
|
-
|
|
115
|
+
def optimize_for_webserver(
|
|
116
|
+
self, statement_timeout: int, pool_recycle: int, max_overflow: int
|
|
117
|
+
) -> None:
|
|
118
|
+
# When running in dagster-webserver, hold an open connection and set statement_timeout
|
|
119
|
+
kwargs = {
|
|
120
|
+
"isolation_level": "AUTOCOMMIT",
|
|
121
|
+
"pool_size": 1,
|
|
122
|
+
"pool_recycle": pool_recycle,
|
|
123
|
+
"max_overflow": max_overflow,
|
|
124
|
+
}
|
|
116
125
|
existing_options = self._engine.url.query.get("options")
|
|
117
|
-
timeout_option = pg_statement_timeout(statement_timeout)
|
|
118
126
|
if existing_options:
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
pool_size=1,
|
|
126
|
-
connect_args={"options": options},
|
|
127
|
-
pool_recycle=pool_recycle,
|
|
127
|
+
kwargs["connect_args"] = {"options": existing_options}
|
|
128
|
+
self._engine = create_engine(self.postgres_url, **kwargs)
|
|
129
|
+
event.listen(
|
|
130
|
+
self._engine,
|
|
131
|
+
"connect",
|
|
132
|
+
lambda connection, _: set_pg_statement_timeout(connection, statement_timeout),
|
|
128
133
|
)
|
|
129
134
|
|
|
130
135
|
def upgrade(self) -> None:
|
|
@@ -165,7 +170,7 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
|
|
|
165
170
|
return PostgresEventLogStorage(conn_string, should_autocreate_tables)
|
|
166
171
|
|
|
167
172
|
def store_event(self, event: EventLogEntry) -> None:
|
|
168
|
-
"""Store an event corresponding to a
|
|
173
|
+
"""Store an event corresponding to a run.
|
|
169
174
|
|
|
170
175
|
Args:
|
|
171
176
|
event (EventLogEntry): The event to store.
|
|
@@ -180,11 +185,13 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
|
|
|
180
185
|
)
|
|
181
186
|
res = result.fetchone()
|
|
182
187
|
result.close()
|
|
188
|
+
|
|
189
|
+
# LISTEN/NOTIFY no longer used for pg event watch - preserved here to support version skew
|
|
183
190
|
conn.execute(
|
|
184
|
-
f"""NOTIFY {CHANNEL_NAME},
|
|
185
|
-
|
|
191
|
+
db.text(f"""NOTIFY {CHANNEL_NAME}, :notify_id; """),
|
|
192
|
+
{"notify_id": res[0] + "_" + str(res[1])}, # type: ignore
|
|
186
193
|
)
|
|
187
|
-
event_id = res[1] # type: ignore
|
|
194
|
+
event_id = int(res[1]) # type: ignore
|
|
188
195
|
|
|
189
196
|
if (
|
|
190
197
|
event.is_dagster_event
|
|
@@ -198,7 +205,51 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
|
|
|
198
205
|
"Cannot store asset event tags for null event id."
|
|
199
206
|
)
|
|
200
207
|
|
|
201
|
-
self.store_asset_event_tags(event, event_id)
|
|
208
|
+
self.store_asset_event_tags([event], [event_id])
|
|
209
|
+
|
|
210
|
+
if event.is_dagster_event and event.dagster_event_type in ASSET_CHECK_EVENTS:
|
|
211
|
+
self.store_asset_check_event(event, event_id)
|
|
212
|
+
|
|
213
|
+
def store_event_batch(self, events: Sequence[EventLogEntry]) -> None:
|
|
214
|
+
from dagster import DagsterEventType
|
|
215
|
+
|
|
216
|
+
check.sequence_param(events, "event", of_type=EventLogEntry)
|
|
217
|
+
|
|
218
|
+
event_types = {event.get_dagster_event().event_type for event in events}
|
|
219
|
+
|
|
220
|
+
check.invariant(
|
|
221
|
+
all(event_type in BATCH_WRITABLE_EVENTS for event_type in event_types),
|
|
222
|
+
f"{BATCH_WRITABLE_EVENTS} are the only currently supported events for batch writes.",
|
|
223
|
+
)
|
|
224
|
+
events = [
|
|
225
|
+
event
|
|
226
|
+
for event in events
|
|
227
|
+
if not event.get_dagster_event().is_asset_failed_to_materialize
|
|
228
|
+
]
|
|
229
|
+
if len(events) == 0:
|
|
230
|
+
return
|
|
231
|
+
|
|
232
|
+
if event_types == {DagsterEventType.ASSET_MATERIALIZATION} or event_types == {
|
|
233
|
+
DagsterEventType.ASSET_OBSERVATION
|
|
234
|
+
}:
|
|
235
|
+
insert_event_statement = self.prepare_insert_event_batch(events)
|
|
236
|
+
with self._connect() as conn:
|
|
237
|
+
result = conn.execute(
|
|
238
|
+
insert_event_statement.returning(SqlEventLogStorageTable.c.id)
|
|
239
|
+
)
|
|
240
|
+
event_ids = [cast("int", row[0]) for row in result.fetchall()]
|
|
241
|
+
|
|
242
|
+
# We only update the asset table with the last event
|
|
243
|
+
self.store_asset_event(events[-1], event_ids[-1])
|
|
244
|
+
|
|
245
|
+
if any(event_id is None for event_id in event_ids):
|
|
246
|
+
raise DagsterInvariantViolationError(
|
|
247
|
+
"Cannot store asset event tags for null event id."
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
self.store_asset_event_tags(events, event_ids)
|
|
251
|
+
else:
|
|
252
|
+
return super().store_event_batch(events)
|
|
202
253
|
|
|
203
254
|
def store_asset_event(self, event: EventLogEntry, event_id: int) -> None:
|
|
204
255
|
check.inst_param(event, "event", EventLogEntry)
|
|
@@ -207,7 +258,7 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
|
|
|
207
258
|
|
|
208
259
|
# We switched to storing the entire event record of the last materialization instead of just
|
|
209
260
|
# the AssetMaterialization object, so that we have access to metadata like timestamp,
|
|
210
|
-
#
|
|
261
|
+
# job, run_id, etc.
|
|
211
262
|
#
|
|
212
263
|
# This should make certain asset queries way more performant, without having to do extra
|
|
213
264
|
# queries against the event log.
|
|
@@ -222,7 +273,7 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
|
|
|
222
273
|
# This column is used nowhere else, and as of AssetObservation/AssetMaterializationPlanned
|
|
223
274
|
# event creation, we want to extend this functionality to ensure that assets with any event
|
|
224
275
|
# (observation, materialization, or materialization planned) yielded with timestamp
|
|
225
|
-
# > wipe timestamp display in
|
|
276
|
+
# > wipe timestamp display in the Dagster UI.
|
|
226
277
|
|
|
227
278
|
# As of the following PRs, we update last_materialization_timestamp to store the timestamp
|
|
228
279
|
# of the latest asset observation, materialization, or materialization_planned that has occurred.
|
|
@@ -252,6 +303,26 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
|
|
|
252
303
|
query = query.on_conflict_do_nothing()
|
|
253
304
|
conn.execute(query)
|
|
254
305
|
|
|
306
|
+
def add_dynamic_partitions(
|
|
307
|
+
self, partitions_def_name: str, partition_keys: Sequence[str]
|
|
308
|
+
) -> None:
|
|
309
|
+
if not partition_keys:
|
|
310
|
+
return
|
|
311
|
+
|
|
312
|
+
# Overload base implementation to push upsert logic down into the db layer
|
|
313
|
+
self._check_partitions_table()
|
|
314
|
+
with self.index_connection() as conn:
|
|
315
|
+
conn.execute(
|
|
316
|
+
db_dialects.postgresql.insert(DynamicPartitionsTable)
|
|
317
|
+
.values(
|
|
318
|
+
[
|
|
319
|
+
dict(partitions_def_name=partitions_def_name, partition=partition_key)
|
|
320
|
+
for partition_key in partition_keys
|
|
321
|
+
]
|
|
322
|
+
)
|
|
323
|
+
.on_conflict_do_nothing(),
|
|
324
|
+
)
|
|
325
|
+
|
|
255
326
|
def _connect(self) -> ContextManager[Connection]:
|
|
256
327
|
return create_pg_connection(self._engine)
|
|
257
328
|
|
|
@@ -261,18 +332,27 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
|
|
|
261
332
|
def index_connection(self) -> ContextManager[Connection]:
|
|
262
333
|
return self._connect()
|
|
263
334
|
|
|
335
|
+
@contextmanager
|
|
336
|
+
def index_transaction(self) -> Iterator[Connection]:
|
|
337
|
+
"""Context manager yielding a connection to the index shard that has begun a transaction."""
|
|
338
|
+
with self.index_connection() as conn:
|
|
339
|
+
if conn.in_transaction():
|
|
340
|
+
yield conn
|
|
341
|
+
else:
|
|
342
|
+
conn = conn.execution_options(isolation_level="READ COMMITTED") # noqa: PLW2901
|
|
343
|
+
with conn.begin():
|
|
344
|
+
yield conn
|
|
345
|
+
|
|
264
346
|
def has_table(self, table_name: str) -> bool:
|
|
265
347
|
return bool(self._engine.dialect.has_table(self._engine.connect(), table_name))
|
|
266
348
|
|
|
267
349
|
def has_secondary_index(self, name: str) -> bool:
|
|
268
350
|
if name not in self._secondary_index_cache:
|
|
269
|
-
self._secondary_index_cache[name] = super(
|
|
270
|
-
PostgresEventLogStorage, self
|
|
271
|
-
).has_secondary_index(name)
|
|
351
|
+
self._secondary_index_cache[name] = super().has_secondary_index(name)
|
|
272
352
|
return self._secondary_index_cache[name]
|
|
273
353
|
|
|
274
354
|
def enable_secondary_index(self, name: str) -> None:
|
|
275
|
-
super(
|
|
355
|
+
super().enable_secondary_index(name)
|
|
276
356
|
if name in self._secondary_index_cache:
|
|
277
357
|
del self._secondary_index_cache[name]
|
|
278
358
|
|
|
@@ -284,40 +364,28 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
|
|
|
284
364
|
) -> None:
|
|
285
365
|
if cursor and EventLogCursor.parse(cursor).is_offset_cursor():
|
|
286
366
|
check.failed("Cannot call `watch` with an offset cursor")
|
|
287
|
-
|
|
288
367
|
if self._event_watcher is None:
|
|
289
|
-
self._event_watcher =
|
|
290
|
-
self.postgres_url,
|
|
291
|
-
[CHANNEL_NAME],
|
|
292
|
-
self._gen_event_log_entry_from_cursor,
|
|
293
|
-
)
|
|
368
|
+
self._event_watcher = SqlPollingEventWatcher(self)
|
|
294
369
|
|
|
295
370
|
self._event_watcher.watch_run(run_id, cursor, callback)
|
|
296
371
|
|
|
297
372
|
def _gen_event_log_entry_from_cursor(self, cursor) -> EventLogEntry:
|
|
298
373
|
with self._engine.connect() as conn:
|
|
299
374
|
cursor_res = conn.execute(
|
|
300
|
-
|
|
375
|
+
db_select([SqlEventLogStorageTable.c.event]).where(
|
|
301
376
|
SqlEventLogStorageTable.c.id == cursor
|
|
302
377
|
),
|
|
303
378
|
)
|
|
304
379
|
return deserialize_value(cursor_res.scalar(), EventLogEntry) # type: ignore
|
|
305
380
|
|
|
306
381
|
def end_watch(self, run_id: str, handler: EventHandlerFn) -> None:
|
|
307
|
-
if self._event_watcher
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
self._event_watcher.unwatch_run(run_id, handler)
|
|
311
|
-
|
|
312
|
-
def __del__(self) -> None:
|
|
313
|
-
# Keep the inherent limitations of __del__ in Python in mind!
|
|
314
|
-
self.dispose()
|
|
382
|
+
if self._event_watcher:
|
|
383
|
+
self._event_watcher.unwatch_run(run_id, handler)
|
|
315
384
|
|
|
316
385
|
def dispose(self) -> None:
|
|
317
|
-
if
|
|
318
|
-
self.
|
|
319
|
-
|
|
320
|
-
self._event_watcher.close()
|
|
386
|
+
if self._event_watcher:
|
|
387
|
+
self._event_watcher.close()
|
|
388
|
+
self._event_watcher = None
|
|
321
389
|
|
|
322
390
|
def alembic_version(self) -> AlembicVersion:
|
|
323
391
|
alembic_config = pg_alembic_config(__file__)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
from .run_storage import PostgresRunStorage as PostgresRunStorage
|
|
1
|
+
from dagster_postgres.run_storage.run_storage import PostgresRunStorage as PostgresRunStorage
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
import zlib
|
|
2
|
+
from collections.abc import Mapping
|
|
3
|
+
from typing import ContextManager, Optional # noqa: UP035
|
|
2
4
|
|
|
3
5
|
import dagster._check as check
|
|
4
6
|
import sqlalchemy as db
|
|
@@ -12,7 +14,8 @@ from dagster._core.storage.runs import (
|
|
|
12
14
|
RunStorageSqlMetadata,
|
|
13
15
|
SqlRunStorage,
|
|
14
16
|
)
|
|
15
|
-
from dagster._core.storage.runs.schema import KeyValueStoreTable
|
|
17
|
+
from dagster._core.storage.runs.schema import KeyValueStoreTable, SnapshotsTable
|
|
18
|
+
from dagster._core.storage.runs.sql_run_storage import SnapshotType
|
|
16
19
|
from dagster._core.storage.sql import (
|
|
17
20
|
AlembicVersion,
|
|
18
21
|
check_alembic_revision,
|
|
@@ -22,16 +25,17 @@ from dagster._core.storage.sql import (
|
|
|
22
25
|
)
|
|
23
26
|
from dagster._daemon.types import DaemonHeartbeat
|
|
24
27
|
from dagster._serdes import ConfigurableClass, ConfigurableClassData, serialize_value
|
|
25
|
-
from dagster.
|
|
28
|
+
from dagster._time import datetime_from_timestamp
|
|
29
|
+
from sqlalchemy import event
|
|
26
30
|
from sqlalchemy.engine import Connection
|
|
27
31
|
|
|
28
|
-
from
|
|
32
|
+
from dagster_postgres.utils import (
|
|
29
33
|
create_pg_connection,
|
|
30
34
|
pg_alembic_config,
|
|
31
|
-
pg_statement_timeout,
|
|
32
35
|
pg_url_from_config,
|
|
33
36
|
retry_pg_connection_fn,
|
|
34
37
|
retry_pg_creation_fn,
|
|
38
|
+
set_pg_statement_timeout,
|
|
35
39
|
)
|
|
36
40
|
|
|
37
41
|
|
|
@@ -39,7 +43,7 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
|
|
|
39
43
|
"""Postgres-backed run storage.
|
|
40
44
|
|
|
41
45
|
Users should not directly instantiate this class; it is instantiated by internal machinery when
|
|
42
|
-
``
|
|
46
|
+
``dagster-webserver`` and ``dagster-graphql`` load, based on the values in the ``dagster.yaml`` file in
|
|
43
47
|
``$DAGSTER_HOME``. Configuration of this class should be done by setting values in that file.
|
|
44
48
|
|
|
45
49
|
To use Postgres for all of the components of your instance storage, you can add the following
|
|
@@ -104,20 +108,24 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
|
|
|
104
108
|
# This revision may be shared by any other dagster storage classes using the same DB
|
|
105
109
|
stamp_alembic_rev(pg_alembic_config(__file__), conn)
|
|
106
110
|
|
|
107
|
-
def
|
|
108
|
-
|
|
111
|
+
def optimize_for_webserver(
|
|
112
|
+
self, statement_timeout: int, pool_recycle: int, max_overflow: int
|
|
113
|
+
) -> None:
|
|
114
|
+
# When running in dagster-webserver, hold an open connection and set statement_timeout
|
|
115
|
+
kwargs = {
|
|
116
|
+
"isolation_level": "AUTOCOMMIT",
|
|
117
|
+
"pool_size": 1,
|
|
118
|
+
"pool_recycle": pool_recycle,
|
|
119
|
+
"max_overflow": max_overflow,
|
|
120
|
+
}
|
|
109
121
|
existing_options = self._engine.url.query.get("options")
|
|
110
|
-
timeout_option = pg_statement_timeout(statement_timeout)
|
|
111
122
|
if existing_options:
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
pool_size=1,
|
|
119
|
-
connect_args={"options": options},
|
|
120
|
-
pool_recycle=pool_recycle,
|
|
123
|
+
kwargs["connect_args"] = {"options": existing_options}
|
|
124
|
+
self._engine = create_engine(self.postgres_url, **kwargs)
|
|
125
|
+
event.listen(
|
|
126
|
+
self._engine,
|
|
127
|
+
"connect",
|
|
128
|
+
lambda connection, _: set_pg_statement_timeout(connection, statement_timeout),
|
|
121
129
|
)
|
|
122
130
|
|
|
123
131
|
@property
|
|
@@ -129,7 +137,7 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
|
|
|
129
137
|
return pg_config()
|
|
130
138
|
|
|
131
139
|
@classmethod
|
|
132
|
-
def from_config_value(
|
|
140
|
+
def from_config_value( # pyright: ignore[reportIncompatibleMethodOverride]
|
|
133
141
|
cls, inst_data: Optional[ConfigurableClassData], config_value: PostgresStorageConfig
|
|
134
142
|
):
|
|
135
143
|
return PostgresRunStorage(
|
|
@@ -160,13 +168,11 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
|
|
|
160
168
|
|
|
161
169
|
def has_built_index(self, migration_name: str) -> bool:
|
|
162
170
|
if migration_name not in self._index_migration_cache:
|
|
163
|
-
self._index_migration_cache[migration_name] = super(
|
|
164
|
-
PostgresRunStorage, self
|
|
165
|
-
).has_built_index(migration_name)
|
|
171
|
+
self._index_migration_cache[migration_name] = super().has_built_index(migration_name)
|
|
166
172
|
return self._index_migration_cache[migration_name]
|
|
167
173
|
|
|
168
174
|
def mark_index_built(self, migration_name: str) -> None:
|
|
169
|
-
super(
|
|
175
|
+
super().mark_index_built(migration_name)
|
|
170
176
|
if migration_name in self._index_migration_cache:
|
|
171
177
|
del self._index_migration_cache[migration_name]
|
|
172
178
|
|
|
@@ -176,7 +182,7 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
|
|
|
176
182
|
conn.execute(
|
|
177
183
|
db_dialects.postgresql.insert(DaemonHeartbeatsTable)
|
|
178
184
|
.values(
|
|
179
|
-
timestamp=
|
|
185
|
+
timestamp=datetime_from_timestamp(daemon_heartbeat.timestamp),
|
|
180
186
|
daemon_type=daemon_heartbeat.daemon_type,
|
|
181
187
|
daemon_id=daemon_heartbeat.daemon_id,
|
|
182
188
|
body=serialize_value(daemon_heartbeat),
|
|
@@ -184,7 +190,7 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
|
|
|
184
190
|
.on_conflict_do_update(
|
|
185
191
|
index_elements=[DaemonHeartbeatsTable.c.daemon_type],
|
|
186
192
|
set_={
|
|
187
|
-
"timestamp":
|
|
193
|
+
"timestamp": datetime_from_timestamp(daemon_heartbeat.timestamp),
|
|
188
194
|
"daemon_id": daemon_heartbeat.daemon_id,
|
|
189
195
|
"body": serialize_value(daemon_heartbeat),
|
|
190
196
|
},
|
|
@@ -199,7 +205,7 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
|
|
|
199
205
|
def set_cursor_values(self, pairs: Mapping[str, str]) -> None:
|
|
200
206
|
check.mapping_param(pairs, "pairs", key_type=str, value_type=str)
|
|
201
207
|
|
|
202
|
-
# pg
|
|
208
|
+
# pg specific on_conflict_do_update
|
|
203
209
|
insert_stmt = db_dialects.postgresql.insert(KeyValueStoreTable).values(
|
|
204
210
|
[{"key": k, "value": v} for k, v in pairs.items()]
|
|
205
211
|
)
|
|
@@ -217,6 +223,20 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
|
|
|
217
223
|
with self.connect() as conn:
|
|
218
224
|
conn.execute(upsert_stmt)
|
|
219
225
|
|
|
226
|
+
def _add_snapshot(self, snapshot_id: str, snapshot_obj, snapshot_type: SnapshotType) -> str:
|
|
227
|
+
with self.connect() as conn:
|
|
228
|
+
snapshot_insert = (
|
|
229
|
+
db_dialects.postgresql.insert(SnapshotsTable)
|
|
230
|
+
.values(
|
|
231
|
+
snapshot_id=snapshot_id,
|
|
232
|
+
snapshot_body=zlib.compress(serialize_value(snapshot_obj).encode("utf-8")),
|
|
233
|
+
snapshot_type=snapshot_type.value,
|
|
234
|
+
)
|
|
235
|
+
.on_conflict_do_nothing()
|
|
236
|
+
)
|
|
237
|
+
conn.execute(snapshot_insert)
|
|
238
|
+
return snapshot_id
|
|
239
|
+
|
|
220
240
|
def alembic_version(self) -> AlembicVersion:
|
|
221
241
|
alembic_config = pg_alembic_config(__file__)
|
|
222
242
|
with self.connect() as conn:
|
|
@@ -1,15 +1,22 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
from typing import ContextManager, Optional # noqa: UP035
|
|
2
3
|
|
|
3
4
|
import dagster._check as check
|
|
4
|
-
import pendulum
|
|
5
5
|
import sqlalchemy as db
|
|
6
6
|
import sqlalchemy.dialects as db_dialects
|
|
7
7
|
import sqlalchemy.pool as db_pool
|
|
8
8
|
from dagster._config.config_schema import UserConfigSchema
|
|
9
|
+
from dagster._core.definitions.asset_key import EntityKey
|
|
10
|
+
from dagster._core.definitions.declarative_automation.serialized_objects import (
|
|
11
|
+
AutomationConditionEvaluationWithRunIds,
|
|
12
|
+
)
|
|
9
13
|
from dagster._core.scheduler.instigation import InstigatorState
|
|
10
14
|
from dagster._core.storage.config import PostgresStorageConfig, pg_config
|
|
11
15
|
from dagster._core.storage.schedules import ScheduleStorageSqlMetadata, SqlScheduleStorage
|
|
12
|
-
from dagster._core.storage.schedules.schema import
|
|
16
|
+
from dagster._core.storage.schedules.schema import (
|
|
17
|
+
AssetDaemonAssetEvaluationsTable,
|
|
18
|
+
InstigatorsTable,
|
|
19
|
+
)
|
|
13
20
|
from dagster._core.storage.sql import (
|
|
14
21
|
AlembicVersion,
|
|
15
22
|
check_alembic_revision,
|
|
@@ -18,15 +25,17 @@ from dagster._core.storage.sql import (
|
|
|
18
25
|
stamp_alembic_rev,
|
|
19
26
|
)
|
|
20
27
|
from dagster._serdes import ConfigurableClass, ConfigurableClassData, serialize_value
|
|
28
|
+
from dagster._time import get_current_datetime
|
|
29
|
+
from sqlalchemy import event
|
|
21
30
|
from sqlalchemy.engine import Connection
|
|
22
31
|
|
|
23
|
-
from
|
|
32
|
+
from dagster_postgres.utils import (
|
|
24
33
|
create_pg_connection,
|
|
25
34
|
pg_alembic_config,
|
|
26
|
-
pg_statement_timeout,
|
|
27
35
|
pg_url_from_config,
|
|
28
36
|
retry_pg_connection_fn,
|
|
29
37
|
retry_pg_creation_fn,
|
|
38
|
+
set_pg_statement_timeout,
|
|
30
39
|
)
|
|
31
40
|
|
|
32
41
|
|
|
@@ -34,7 +43,7 @@ class PostgresScheduleStorage(SqlScheduleStorage, ConfigurableClass):
|
|
|
34
43
|
"""Postgres-backed run storage.
|
|
35
44
|
|
|
36
45
|
Users should not directly instantiate this class; it is instantiated by internal machinery when
|
|
37
|
-
``
|
|
46
|
+
``dagster-webserver`` and ``dagster-graphql`` load, based on the values in the ``dagster.yaml`` file in
|
|
38
47
|
``$DAGSTER_HOME``. Configuration of this class should be done by setting values in that file.
|
|
39
48
|
|
|
40
49
|
To use Postgres for all of the components of your instance storage, you can add the following
|
|
@@ -95,20 +104,24 @@ class PostgresScheduleStorage(SqlScheduleStorage, ConfigurableClass):
|
|
|
95
104
|
self.migrate()
|
|
96
105
|
self.optimize()
|
|
97
106
|
|
|
98
|
-
def
|
|
99
|
-
|
|
107
|
+
def optimize_for_webserver(
|
|
108
|
+
self, statement_timeout: int, pool_recycle: int, max_overflow: int
|
|
109
|
+
) -> None:
|
|
110
|
+
# When running in dagster-webserver, hold an open connection and set statement_timeout
|
|
111
|
+
kwargs = {
|
|
112
|
+
"isolation_level": "AUTOCOMMIT",
|
|
113
|
+
"pool_size": 1,
|
|
114
|
+
"pool_recycle": pool_recycle,
|
|
115
|
+
"max_overflow": max_overflow,
|
|
116
|
+
}
|
|
100
117
|
existing_options = self._engine.url.query.get("options")
|
|
101
|
-
timeout_option = pg_statement_timeout(statement_timeout)
|
|
102
118
|
if existing_options:
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
pool_size=1,
|
|
110
|
-
connect_args={"options": options},
|
|
111
|
-
pool_recycle=pool_recycle,
|
|
119
|
+
kwargs["connect_args"] = {"options": existing_options}
|
|
120
|
+
self._engine = create_engine(self.postgres_url, **kwargs)
|
|
121
|
+
event.listen(
|
|
122
|
+
self._engine,
|
|
123
|
+
"connect",
|
|
124
|
+
lambda connection, _: set_pg_statement_timeout(connection, statement_timeout),
|
|
112
125
|
)
|
|
113
126
|
|
|
114
127
|
@property
|
|
@@ -120,7 +133,7 @@ class PostgresScheduleStorage(SqlScheduleStorage, ConfigurableClass):
|
|
|
120
133
|
return pg_config()
|
|
121
134
|
|
|
122
135
|
@classmethod
|
|
123
|
-
def from_config_value(
|
|
136
|
+
def from_config_value( # pyright: ignore[reportIncompatibleMethodOverride]
|
|
124
137
|
cls, inst_data: Optional[ConfigurableClassData], config_value: PostgresStorageConfig
|
|
125
138
|
) -> "PostgresScheduleStorage":
|
|
126
139
|
return PostgresScheduleStorage(
|
|
@@ -167,11 +180,44 @@ class PostgresScheduleStorage(SqlScheduleStorage, ConfigurableClass):
|
|
|
167
180
|
"status": state.status.value,
|
|
168
181
|
"instigator_type": state.instigator_type.value,
|
|
169
182
|
"instigator_body": serialize_value(state),
|
|
170
|
-
"update_timestamp":
|
|
183
|
+
"update_timestamp": get_current_datetime(),
|
|
171
184
|
},
|
|
172
185
|
)
|
|
173
186
|
)
|
|
174
187
|
|
|
188
|
+
def add_auto_materialize_asset_evaluations(
|
|
189
|
+
self,
|
|
190
|
+
evaluation_id: int,
|
|
191
|
+
asset_evaluations: Sequence[AutomationConditionEvaluationWithRunIds[EntityKey]],
|
|
192
|
+
):
|
|
193
|
+
if not asset_evaluations:
|
|
194
|
+
return
|
|
195
|
+
|
|
196
|
+
insert_stmt = db_dialects.postgresql.insert(AssetDaemonAssetEvaluationsTable).values(
|
|
197
|
+
[
|
|
198
|
+
{
|
|
199
|
+
"evaluation_id": evaluation_id,
|
|
200
|
+
"asset_key": evaluation.key.to_db_string(),
|
|
201
|
+
"asset_evaluation_body": serialize_value(evaluation),
|
|
202
|
+
"num_requested": evaluation.num_requested,
|
|
203
|
+
}
|
|
204
|
+
for evaluation in asset_evaluations
|
|
205
|
+
]
|
|
206
|
+
)
|
|
207
|
+
upsert_stmt = insert_stmt.on_conflict_do_update(
|
|
208
|
+
index_elements=[
|
|
209
|
+
AssetDaemonAssetEvaluationsTable.c.evaluation_id,
|
|
210
|
+
AssetDaemonAssetEvaluationsTable.c.asset_key,
|
|
211
|
+
],
|
|
212
|
+
set_={
|
|
213
|
+
"asset_evaluation_body": insert_stmt.excluded.asset_evaluation_body,
|
|
214
|
+
"num_requested": insert_stmt.excluded.num_requested,
|
|
215
|
+
},
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
with self.connect() as conn:
|
|
219
|
+
conn.execute(upsert_stmt)
|
|
220
|
+
|
|
175
221
|
def alembic_version(self) -> AlembicVersion:
|
|
176
222
|
alembic_config = pg_alembic_config(__file__)
|
|
177
223
|
with self.connect() as conn:
|
dagster_postgres/storage.py
CHANGED
|
@@ -9,17 +9,17 @@ from dagster._core.storage.runs import RunStorage
|
|
|
9
9
|
from dagster._core.storage.schedules import ScheduleStorage
|
|
10
10
|
from dagster._serdes import ConfigurableClass, ConfigurableClassData
|
|
11
11
|
|
|
12
|
-
from .event_log import PostgresEventLogStorage
|
|
13
|
-
from .run_storage import PostgresRunStorage
|
|
14
|
-
from .schedule_storage import PostgresScheduleStorage
|
|
15
|
-
from .utils import pg_url_from_config
|
|
12
|
+
from dagster_postgres.event_log import PostgresEventLogStorage
|
|
13
|
+
from dagster_postgres.run_storage import PostgresRunStorage
|
|
14
|
+
from dagster_postgres.schedule_storage import PostgresScheduleStorage
|
|
15
|
+
from dagster_postgres.utils import pg_url_from_config
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class DagsterPostgresStorage(DagsterStorage, ConfigurableClass):
|
|
19
19
|
"""Postgres-backed dagster storage.
|
|
20
20
|
|
|
21
21
|
Users should not directly instantiate this class; it is instantiated by internal machinery when
|
|
22
|
-
``
|
|
22
|
+
``dagster-webserver`` and ``dagster-daemon`` load, based on the values in the ``dagster.yaml`` file in
|
|
23
23
|
``$DAGSTER_HOME``. Configuration of this class should be done by setting values in that file.
|
|
24
24
|
|
|
25
25
|
To use Postgres for storage, you can add a block such as the following to your
|
|
@@ -59,7 +59,7 @@ class DagsterPostgresStorage(DagsterStorage, ConfigurableClass):
|
|
|
59
59
|
return pg_config()
|
|
60
60
|
|
|
61
61
|
@classmethod
|
|
62
|
-
def from_config_value(
|
|
62
|
+
def from_config_value( # pyright: ignore[reportIncompatibleMethodOverride]
|
|
63
63
|
cls, inst_data: Optional[ConfigurableClassData], config_value: PostgresStorageConfig
|
|
64
64
|
) -> "DagsterPostgresStorage":
|
|
65
65
|
return DagsterPostgresStorage(
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import tempfile
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
from dagster._core.test_utils import instance_for_test
|
|
7
|
+
from dagster._utils.merger import merge_dicts
|
|
8
|
+
from dagster_test.fixtures import docker_compose_cm, network_name_from_yml
|
|
9
|
+
|
|
10
|
+
from dagster_postgres.utils import get_conn_string, wait_for_connection
|
|
11
|
+
|
|
12
|
+
compose_file = Path(__file__).parent / "docker-compose.yml"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.fixture(scope="session")
|
|
16
|
+
def postgres_network():
|
|
17
|
+
yield network_name_from_yml(compose_file)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pytest.fixture(scope="session")
|
|
21
|
+
def postgres_hostname():
|
|
22
|
+
with docker_compose_cm(docker_compose_yml=compose_file) as hostnames:
|
|
23
|
+
yield hostnames["postgres"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@pytest.fixture(scope="session")
|
|
27
|
+
def postgres_conn_str(postgres_hostname):
|
|
28
|
+
conn_str = get_conn_string(
|
|
29
|
+
username="test",
|
|
30
|
+
password="test",
|
|
31
|
+
hostname=postgres_hostname,
|
|
32
|
+
db_name="test",
|
|
33
|
+
params=dict(connect_timeout=5),
|
|
34
|
+
)
|
|
35
|
+
wait_for_connection(
|
|
36
|
+
conn_str,
|
|
37
|
+
retry_limit=10,
|
|
38
|
+
retry_wait=3,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
yield conn_str
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@pytest.fixture
|
|
45
|
+
def postgres_instance(postgres_conn_str):
|
|
46
|
+
@contextmanager
|
|
47
|
+
def _instance(overrides=None):
|
|
48
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
49
|
+
with instance_for_test(
|
|
50
|
+
temp_dir=temp_dir,
|
|
51
|
+
overrides=merge_dicts(
|
|
52
|
+
{
|
|
53
|
+
"run_storage": {
|
|
54
|
+
"module": "dagster_postgres.run_storage.run_storage",
|
|
55
|
+
"class": "PostgresRunStorage",
|
|
56
|
+
"config": {"postgres_url": postgres_conn_str},
|
|
57
|
+
},
|
|
58
|
+
"event_log_storage": {
|
|
59
|
+
"module": "dagster_postgres.event_log.event_log",
|
|
60
|
+
"class": "PostgresEventLogStorage",
|
|
61
|
+
"config": {"postgres_url": postgres_conn_str},
|
|
62
|
+
},
|
|
63
|
+
"schedule_storage": {
|
|
64
|
+
"module": "dagster_postgres.schedule_storage.schedule_storage",
|
|
65
|
+
"class": "PostgresScheduleStorage",
|
|
66
|
+
"config": {"postgres_url": postgres_conn_str},
|
|
67
|
+
},
|
|
68
|
+
},
|
|
69
|
+
overrides if overrides else {},
|
|
70
|
+
),
|
|
71
|
+
) as instance:
|
|
72
|
+
instance.wipe()
|
|
73
|
+
yield instance
|
|
74
|
+
|
|
75
|
+
return _instance
|
dagster_postgres/utils.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import time
|
|
3
|
+
from collections.abc import Callable, Iterator, Mapping
|
|
3
4
|
from contextlib import contextmanager
|
|
4
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, Optional, TypeVar
|
|
5
6
|
from urllib.parse import quote, urlencode
|
|
6
7
|
|
|
7
8
|
import alembic.config
|
|
@@ -169,6 +170,8 @@ def create_pg_connection(
|
|
|
169
170
|
conn.close()
|
|
170
171
|
|
|
171
172
|
|
|
172
|
-
def
|
|
173
|
+
def set_pg_statement_timeout(conn: psycopg2.extensions.connection, millis: int):
|
|
173
174
|
check.int_param(millis, "millis")
|
|
174
|
-
|
|
175
|
+
with conn:
|
|
176
|
+
with conn.cursor() as curs:
|
|
177
|
+
curs.execute(f"SET statement_timeout = {millis};")
|
dagster_postgres/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.28.11"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dagster-postgres
|
|
3
|
+
Version: 0.28.11
|
|
4
|
+
Summary: A Dagster integration for postgres
|
|
5
|
+
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-postgres
|
|
6
|
+
Author: Dagster Labs
|
|
7
|
+
Author-email: hello@dagsterlabs.com
|
|
8
|
+
License: Apache-2.0
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Requires-Python: >=3.10,<3.15
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: dagster==1.12.11
|
|
19
|
+
Requires-Dist: psycopg2-binary
|
|
20
|
+
Dynamic: author
|
|
21
|
+
Dynamic: author-email
|
|
22
|
+
Dynamic: classifier
|
|
23
|
+
Dynamic: home-page
|
|
24
|
+
Dynamic: license
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
Dynamic: requires-dist
|
|
27
|
+
Dynamic: requires-python
|
|
28
|
+
Dynamic: summary
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
dagster_postgres/__init__.py,sha256=rZfjhBDGC_SIGnKuiSCzi7D0xD36mAnchDLRL-n1Tmk,561
|
|
2
|
+
dagster_postgres/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
|
|
3
|
+
dagster_postgres/storage.py,sha256=CiPA773j0BQvPtCEz5vVqKjap9h0ODAxR_R_CvbI-bI,4310
|
|
4
|
+
dagster_postgres/utils.py,sha256=BzkzfQ9t9U9FMCs_3NazAKW7stK0Wrz_nH0KK4NQfi8,5930
|
|
5
|
+
dagster_postgres/version.py,sha256=MlSqB7SmZ8R1F0K_PtG4rMWRGGllC7hEsX-reHAF_8A,24
|
|
6
|
+
dagster_postgres/alembic/alembic.ini,sha256=GovyDEhu_6HvkWV6txqjdDBOe4BseSM0YDWGxXM5_cA,986
|
|
7
|
+
dagster_postgres/event_log/__init__.py,sha256=wRcUR-StRNrPCqpEzi0MRY8b-r_TEWV17OsEynFqlLs,100
|
|
8
|
+
dagster_postgres/event_log/event_log.py,sha256=w8_ZsvDb0vt7qDVSS912jmNRpLDI6F0KQrwribyAmwM,16145
|
|
9
|
+
dagster_postgres/run_storage/__init__.py,sha256=oW_546mJ5K-e-RF0Ou7r-4fHWxFthHgPPhWxklsVK1g,94
|
|
10
|
+
dagster_postgres/run_storage/run_storage.py,sha256=VMXT5sb5n04ViWOYj2eOoZqOk78hqL38J6CuikM79No,9757
|
|
11
|
+
dagster_postgres/schedule_storage/__init__.py,sha256=-jW-1S4Xf5Ew-cz-DjKjU5sVs9EEly_2ELMLOXTewv0,123
|
|
12
|
+
dagster_postgres/schedule_storage/schedule_storage.py,sha256=iPKcvnj1eEeG5LH73GoFvoS3hmE8MFddiFyf7QoKZ3g,8826
|
|
13
|
+
dagster_postgres/test_fixtures/__init__.py,sha256=ZIcbGYa_CMmVF0YkuiX4uI3wRjBke_2VE8QXKcji0Y4,2447
|
|
14
|
+
dagster_postgres/test_fixtures/docker-compose.yml,sha256=hp2VTnENYK6CL2Yae3IsktssahPFQiHyhpNcG36CivM,208
|
|
15
|
+
dagster_postgres-0.28.11.dist-info/licenses/LICENSE,sha256=4lsMW-RCvfVD4_F57wrmpe3vX1xwUk_OAKKmV_XT7Z0,11348
|
|
16
|
+
dagster_postgres-0.28.11.dist-info/METADATA,sha256=971I43x8Ltg7CRFX9AILtVaxOlw1PK7fsQ0jwuGdEYo,947
|
|
17
|
+
dagster_postgres-0.28.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
18
|
+
dagster_postgres-0.28.11.dist-info/top_level.txt,sha256=lScMtAEKDX1yIv2tGa1nzntBa0HEStfWPfCwD8FWlHk,17
|
|
19
|
+
dagster_postgres-0.28.11.dist-info/RECORD,,
|
|
@@ -186,7 +186,7 @@
|
|
|
186
186
|
same "printed page" as the copyright notice for easier
|
|
187
187
|
identification within third-party archives.
|
|
188
188
|
|
|
189
|
-
Copyright
|
|
189
|
+
Copyright 2025 Dagster Labs, Inc.
|
|
190
190
|
|
|
191
191
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
192
|
you may not use this file except in compliance with the License.
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import threading
|
|
3
|
-
from collections import defaultdict
|
|
4
|
-
from typing import Callable, List, MutableMapping, Optional, Sequence
|
|
5
|
-
|
|
6
|
-
import dagster._check as check
|
|
7
|
-
from dagster._core.event_api import EventHandlerFn
|
|
8
|
-
from dagster._core.events.log import EventLogEntry
|
|
9
|
-
from dagster._core.storage.event_log.base import EventLogCursor
|
|
10
|
-
from dagster._core.storage.event_log.polling_event_watcher import CallbackAfterCursor
|
|
11
|
-
|
|
12
|
-
from ..pynotify import await_pg_notifications
|
|
13
|
-
|
|
14
|
-
POLLING_CADENCE = 0.25
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def watcher_thread(
|
|
18
|
-
conn_string: str,
|
|
19
|
-
handlers_dict: MutableMapping[str, Sequence[CallbackAfterCursor]],
|
|
20
|
-
dict_lock: threading.Lock,
|
|
21
|
-
watcher_thread_exit: threading.Event,
|
|
22
|
-
watcher_thread_started: threading.Event,
|
|
23
|
-
channels: Sequence[str],
|
|
24
|
-
gen_event_log_entry_from_cursor: Callable[[int], EventLogEntry],
|
|
25
|
-
) -> None:
|
|
26
|
-
for notif in await_pg_notifications(
|
|
27
|
-
conn_string,
|
|
28
|
-
channels=channels,
|
|
29
|
-
timeout=POLLING_CADENCE,
|
|
30
|
-
yield_on_timeout=True,
|
|
31
|
-
exit_event=watcher_thread_exit,
|
|
32
|
-
started_event=watcher_thread_started,
|
|
33
|
-
):
|
|
34
|
-
if notif is None:
|
|
35
|
-
if watcher_thread_exit.is_set():
|
|
36
|
-
break
|
|
37
|
-
else:
|
|
38
|
-
run_id, index_str = notif.payload.split("_")
|
|
39
|
-
with dict_lock:
|
|
40
|
-
if run_id not in handlers_dict:
|
|
41
|
-
continue
|
|
42
|
-
|
|
43
|
-
index = int(index_str)
|
|
44
|
-
with dict_lock:
|
|
45
|
-
handlers = handlers_dict.get(run_id, [])
|
|
46
|
-
|
|
47
|
-
dagster_event = gen_event_log_entry_from_cursor(index)
|
|
48
|
-
|
|
49
|
-
for callback_with_cursor in handlers:
|
|
50
|
-
try:
|
|
51
|
-
if (
|
|
52
|
-
callback_with_cursor.cursor is None
|
|
53
|
-
or EventLogCursor.parse(callback_with_cursor.cursor).storage_id() < index
|
|
54
|
-
):
|
|
55
|
-
callback_with_cursor.callback(
|
|
56
|
-
dagster_event, str(EventLogCursor.from_storage_id(index))
|
|
57
|
-
)
|
|
58
|
-
except:
|
|
59
|
-
logging.exception("Exception in callback for event watch on run %s.", run_id)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
class PostgresEventWatcher:
|
|
63
|
-
def __init__(
|
|
64
|
-
self,
|
|
65
|
-
conn_string: str,
|
|
66
|
-
channels: Sequence[str],
|
|
67
|
-
gen_event_log_entry_from_cursor: Callable[[int], EventLogEntry],
|
|
68
|
-
):
|
|
69
|
-
self._conn_string: str = check.str_param(conn_string, "conn_string")
|
|
70
|
-
self._handlers_dict: MutableMapping[str, List[CallbackAfterCursor]] = defaultdict(list)
|
|
71
|
-
self._dict_lock: threading.Lock = threading.Lock()
|
|
72
|
-
self._watcher_thread_exit: Optional[threading.Event] = None
|
|
73
|
-
self._watcher_thread_started: Optional[threading.Event] = None
|
|
74
|
-
self._watcher_thread: Optional[threading.Thread] = None
|
|
75
|
-
self._channels: Sequence[str] = check.sequence_param(channels, "channels")
|
|
76
|
-
self._gen_event_log_entry_from_cursor: Callable[
|
|
77
|
-
[int], EventLogEntry
|
|
78
|
-
] = check.callable_param(gen_event_log_entry_from_cursor, "gen_event_log_entry_from_cursor")
|
|
79
|
-
|
|
80
|
-
def watch_run(
|
|
81
|
-
self,
|
|
82
|
-
run_id: str,
|
|
83
|
-
cursor: Optional[str],
|
|
84
|
-
callback: EventHandlerFn,
|
|
85
|
-
start_timeout: int = 15,
|
|
86
|
-
) -> None:
|
|
87
|
-
check.str_param(run_id, "run_id")
|
|
88
|
-
check.opt_str_param(cursor, "cursor")
|
|
89
|
-
check.callable_param(callback, "callback")
|
|
90
|
-
if not self._watcher_thread:
|
|
91
|
-
self._watcher_thread_exit = threading.Event()
|
|
92
|
-
self._watcher_thread_started = threading.Event()
|
|
93
|
-
|
|
94
|
-
self._watcher_thread = threading.Thread(
|
|
95
|
-
target=watcher_thread,
|
|
96
|
-
args=(
|
|
97
|
-
self._conn_string,
|
|
98
|
-
self._handlers_dict,
|
|
99
|
-
self._dict_lock,
|
|
100
|
-
self._watcher_thread_exit,
|
|
101
|
-
self._watcher_thread_started,
|
|
102
|
-
self._channels,
|
|
103
|
-
self._gen_event_log_entry_from_cursor,
|
|
104
|
-
),
|
|
105
|
-
name="postgres-event-watch",
|
|
106
|
-
)
|
|
107
|
-
self._watcher_thread.daemon = True
|
|
108
|
-
self._watcher_thread.start()
|
|
109
|
-
|
|
110
|
-
# Wait until the watcher thread is actually listening before returning
|
|
111
|
-
self._watcher_thread_started.wait(start_timeout)
|
|
112
|
-
if not self._watcher_thread_started.is_set():
|
|
113
|
-
raise Exception("Watcher thread never started")
|
|
114
|
-
|
|
115
|
-
with self._dict_lock:
|
|
116
|
-
self._handlers_dict[run_id].append(CallbackAfterCursor(cursor, callback))
|
|
117
|
-
|
|
118
|
-
def unwatch_run(self, run_id: str, handler: EventHandlerFn) -> None:
|
|
119
|
-
check.str_param(run_id, "run_id")
|
|
120
|
-
check.callable_param(handler, "handler")
|
|
121
|
-
with self._dict_lock:
|
|
122
|
-
if run_id in self._handlers_dict:
|
|
123
|
-
self._handlers_dict[run_id] = [
|
|
124
|
-
callback_with_cursor
|
|
125
|
-
for callback_with_cursor in self._handlers_dict[run_id]
|
|
126
|
-
if callback_with_cursor.callback != handler
|
|
127
|
-
]
|
|
128
|
-
if not self._handlers_dict[run_id]:
|
|
129
|
-
del self._handlers_dict[run_id]
|
|
130
|
-
|
|
131
|
-
def close(self) -> None:
|
|
132
|
-
if self._watcher_thread:
|
|
133
|
-
self._watcher_thread_exit.set() # type: ignore
|
|
134
|
-
if self._watcher_thread.is_alive():
|
|
135
|
-
self._watcher_thread.join()
|
|
136
|
-
self._watcher_thread_exit = None
|
|
137
|
-
self._watcher_thread = None
|
dagster_postgres/pynotify.py
DELETED
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
# copied from https://github.com/djrobstep/pgnotify/blob/43bbe7bd3cedfb99700e4ab370cb6f5d7426bea3/pgnotify/notify.py
|
|
2
|
-
|
|
3
|
-
# This is free and unencumbered software released into the public domain.
|
|
4
|
-
|
|
5
|
-
# Anyone is free to copy, modify, publish, use, compile, sell, or
|
|
6
|
-
# distribute this software, either in source code form or as a compiled
|
|
7
|
-
# binary, for any purpose, commercial or non-commercial, and by any
|
|
8
|
-
# means.
|
|
9
|
-
|
|
10
|
-
# In jurisdictions that recognize copyright laws, the author or authors
|
|
11
|
-
# of this software dedicate any and all copyright interest in the
|
|
12
|
-
# software to the public domain. We make this dedication for the benefit
|
|
13
|
-
# of the public at large and to the detriment of our heirs and
|
|
14
|
-
# successors. We intend this dedication to be an overt act of
|
|
15
|
-
# relinquishment in perpetuity of all present and future rights to this
|
|
16
|
-
# software under copyright law.
|
|
17
|
-
|
|
18
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
19
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
20
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
21
|
-
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
22
|
-
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
23
|
-
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
24
|
-
# OTHER DEALINGS IN THE SOFTWARE.
|
|
25
|
-
|
|
26
|
-
# For more information, please refer to <http://unlicense.org>
|
|
27
|
-
|
|
28
|
-
import errno
|
|
29
|
-
import os
|
|
30
|
-
import select
|
|
31
|
-
import signal
|
|
32
|
-
import sys
|
|
33
|
-
from threading import Event
|
|
34
|
-
from typing import Iterator, Optional, Sequence
|
|
35
|
-
|
|
36
|
-
import dagster._check as check
|
|
37
|
-
from dagster._core.storage.sql import create_engine
|
|
38
|
-
from psycopg2.extensions import Notify
|
|
39
|
-
|
|
40
|
-
from .utils import create_pg_connection
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def get_wakeup_fd() -> int:
|
|
44
|
-
pipe_r, pipe_w = os.pipe()
|
|
45
|
-
if "win" not in sys.platform:
|
|
46
|
-
import fcntl
|
|
47
|
-
|
|
48
|
-
flags = fcntl.fcntl(pipe_w, fcntl.F_GETFL, 0)
|
|
49
|
-
flags = os.O_NONBLOCK
|
|
50
|
-
flags = fcntl.fcntl(pipe_w, fcntl.F_SETFL, flags)
|
|
51
|
-
signal.set_wakeup_fd(pipe_w)
|
|
52
|
-
return pipe_r
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def _empty_handler(_signal, _frame):
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def quote_table_name(name: str) -> str:
|
|
60
|
-
return f'"{name}"'
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def start_listening(connection, channels):
|
|
64
|
-
names = (quote_table_name(each) for each in channels)
|
|
65
|
-
listens = "; ".join([f"LISTEN {n}" for n in names])
|
|
66
|
-
|
|
67
|
-
with connection.cursor() as curs:
|
|
68
|
-
curs.execute(listens)
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def construct_signals(arg) -> signal.Signals:
|
|
72
|
-
# function exists to consolidate and scope pylint directive
|
|
73
|
-
return signal.Signals(arg)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def await_pg_notifications(
|
|
77
|
-
conn_string: str,
|
|
78
|
-
channels: Optional[Sequence[str]] = None,
|
|
79
|
-
timeout: float = 5.0,
|
|
80
|
-
yield_on_timeout: bool = False,
|
|
81
|
-
exit_event: Optional[Event] = None,
|
|
82
|
-
started_event: Optional[Event] = None,
|
|
83
|
-
) -> Iterator[Optional[Notify]]:
|
|
84
|
-
"""Subscribe to PostgreSQL notifications, and handle them
|
|
85
|
-
in infinite-loop style.
|
|
86
|
-
|
|
87
|
-
Args:
|
|
88
|
-
conn_string (str): connection string to PG DB
|
|
89
|
-
channels (Optional[List[str]], optional): List of channel names to listen to. Defaults to None.
|
|
90
|
-
timeout (float, optional): Timeout interval. Defaults to 5.0.
|
|
91
|
-
yield_on_timeout (bool, optional): Should the function yield on timeout. Defaults to False.
|
|
92
|
-
exit_event (Optional[Event], optional): Event that indicates that polling for new notifications should stop. Defaults to None.
|
|
93
|
-
started_event (Optional[Event], optional): Event that this function can set to notify that the subscription has been established. Defaults to None.
|
|
94
|
-
|
|
95
|
-
Yields:
|
|
96
|
-
Iterator[Optional[Notify]]: Can yield one of two types:
|
|
97
|
-
1: None, in case of timeout
|
|
98
|
-
2: Notify, in case of successful notification reception
|
|
99
|
-
"""
|
|
100
|
-
check.str_param(conn_string, "conn_string")
|
|
101
|
-
channels = None if channels is None else check.sequence_param(channels, "channels", of_type=str)
|
|
102
|
-
check.float_param(timeout, "timeout")
|
|
103
|
-
check.bool_param(yield_on_timeout, "yield_on_timeout")
|
|
104
|
-
|
|
105
|
-
engine = create_engine(
|
|
106
|
-
conn_string,
|
|
107
|
-
isolation_level="AUTOCOMMIT",
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
with create_pg_connection(engine) as conn:
|
|
111
|
-
connection = conn.connection.connection # DBAPI connection # type: ignore
|
|
112
|
-
|
|
113
|
-
if channels:
|
|
114
|
-
start_listening(connection, channels)
|
|
115
|
-
|
|
116
|
-
if started_event:
|
|
117
|
-
started_event.set()
|
|
118
|
-
|
|
119
|
-
while True and not (exit_event and exit_event.is_set()):
|
|
120
|
-
try:
|
|
121
|
-
r, w, x = select.select([connection], [], [], max(0, timeout))
|
|
122
|
-
if (r, w, x) == ([], [], []):
|
|
123
|
-
if yield_on_timeout:
|
|
124
|
-
yield None
|
|
125
|
-
|
|
126
|
-
if connection in r:
|
|
127
|
-
connection.poll()
|
|
128
|
-
|
|
129
|
-
# copy the conn.notifies list/queue & empty it
|
|
130
|
-
notify_list, connection.notifies = connection.notifies, []
|
|
131
|
-
for notif in notify_list:
|
|
132
|
-
yield notif
|
|
133
|
-
|
|
134
|
-
except select.error as e:
|
|
135
|
-
if e.errno == errno.EINTR:
|
|
136
|
-
pass
|
|
137
|
-
else:
|
|
138
|
-
raise
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: dagster-postgres
|
|
3
|
-
Version: 0.19.2
|
|
4
|
-
Summary: A Dagster integration for postgres
|
|
5
|
-
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-postgres
|
|
6
|
-
Author: Elementl
|
|
7
|
-
Author-email: hello@elementl.com
|
|
8
|
-
License: Apache-2.0
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
-
Classifier: Operating System :: OS Independent
|
|
16
|
-
License-File: LICENSE
|
|
17
|
-
Requires-Dist: dagster (==1.3.2)
|
|
18
|
-
Requires-Dist: psycopg2-binary
|
|
19
|
-
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
dagster_postgres/__init__.py,sha256=LEcXwiGeuZgIvfDg0PVcRuKjUmg0t9Uslx5nLZ8Yg08,480
|
|
2
|
-
dagster_postgres/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
|
|
3
|
-
dagster_postgres/pynotify.py,sha256=kpAZ24THE1qYQIxScUk21Mp3QV9qd92P3CMXYGEgRIU,5004
|
|
4
|
-
dagster_postgres/storage.py,sha256=cEr8Hkab9jgLzimC-efkSwwQXWpVKUvN-RHhf4IpuB8,4181
|
|
5
|
-
dagster_postgres/utils.py,sha256=b_KukmD2puOJdvTFy0OUZtM5sGWyE1Fq6nP9hmUmRCc,5798
|
|
6
|
-
dagster_postgres/version.py,sha256=qZNYBPCf5y9eOSOCnc71mXgsDo0Xwb9v1HTFq7X3sdU,23
|
|
7
|
-
dagster_postgres/alembic/alembic.ini,sha256=S0-B6cHIaS_CJaJc2dd0NiQfDLIWfvaZJGZYQsH7tSY,1040
|
|
8
|
-
dagster_postgres/event_log/__init__.py,sha256=T95tRLYxnC-g69K_5G9i1NLTXi3pGexLVKeGug8niMA,146
|
|
9
|
-
dagster_postgres/event_log/event_log.py,sha256=tSBNxccx8bXFMDcWYPNqBpjG-EbF86zgFNZ_Ix3-ttc,13001
|
|
10
|
-
dagster_postgres/event_log/event_watcher.py,sha256=59TuxkNX56Va54GJFqdQSzSCmA14onS_4l5IFYbRxb8,5346
|
|
11
|
-
dagster_postgres/run_storage/__init__.py,sha256=4bI2C2kkZO9glXH8jIUViYBRSeMjdDHiVQVi_k1HkYw,66
|
|
12
|
-
dagster_postgres/run_storage/run_storage.py,sha256=x35qMKJVhOtEw-CiYiHBw8K4WcdCd8AUYYao7mYMQiQ,8867
|
|
13
|
-
dagster_postgres/schedule_storage/__init__.py,sha256=GKJlFZYh27fUxJDMQwNJUwibbTnip1ledbcokczutmk,81
|
|
14
|
-
dagster_postgres/schedule_storage/schedule_storage.py,sha256=UDKJ1ZEWYtNWHACI8ap1mZlfB8WXSwGtNgqgXrleSLU,7045
|
|
15
|
-
dagster_postgres-0.19.2.dist-info/LICENSE,sha256=-gtoVIAZYUHYmNHISZg982FI4Oh19mV1nxgTVW8eCB8,11344
|
|
16
|
-
dagster_postgres-0.19.2.dist-info/METADATA,sha256=whK5RB_yBbNjR367aSYLqTOh80XZ9mEzhdXg7pFo5aI,727
|
|
17
|
-
dagster_postgres-0.19.2.dist-info/WHEEL,sha256=p46_5Uhzqz6AzeSosiOnxK-zmFja1i22CrQCjmYe8ec,92
|
|
18
|
-
dagster_postgres-0.19.2.dist-info/top_level.txt,sha256=lScMtAEKDX1yIv2tGa1nzntBa0HEStfWPfCwD8FWlHk,17
|
|
19
|
-
dagster_postgres-0.19.2.dist-info/RECORD,,
|
|
File without changes
|