dagster-postgres 0.19.2__py3-none-any.whl → 0.28.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,10 @@
1
- from dagster._core.libraries import DagsterLibraryRegistry
1
+ from dagster_shared.libraries import DagsterLibraryRegistry
2
2
 
3
- from .event_log import PostgresEventLogStorage
4
- from .run_storage import PostgresRunStorage
5
- from .schedule_storage import PostgresScheduleStorage
6
- from .storage import DagsterPostgresStorage
7
- from .version import __version__
3
+ from dagster_postgres.event_log import PostgresEventLogStorage
4
+ from dagster_postgres.run_storage import PostgresRunStorage
5
+ from dagster_postgres.schedule_storage import PostgresScheduleStorage
6
+ from dagster_postgres.storage import DagsterPostgresStorage
7
+ from dagster_postgres.version import __version__
8
8
 
9
9
  DagsterLibraryRegistry.register("dagster-postgres", __version__)
10
10
  __all__ = [
@@ -8,11 +8,11 @@ script_location = dagster:_core/storage/alembic
8
8
  # on newly generated revision scripts. See the documentation for further
9
9
  # detail and examples
10
10
 
11
- # format using "black" - use the console_scripts runner, against the "black" entrypoint
12
- hooks = black
13
- black.type = console_scripts
14
- black.entrypoint = black
15
- black.options = --line-length 100 --target-version py36 --target-version py37 --target-version py38 -S --fast
11
+ # format using "ruff" - use the console_scripts runner, against the "ruff" entrypoint
12
+ hooks = ruff
13
+ ruff.type = console_scripts
14
+ ruff.entrypoint = ruff
15
+ ruff.options = format --target-version=py38 --line-length=100
16
16
 
17
17
  # Logging configuration
18
18
  [loggers]
@@ -1,2 +1 @@
1
- from .event_log import PostgresEventLogStorage as PostgresEventLogStorage
2
- from .event_watcher import PostgresEventWatcher as PostgresEventWatcher
1
+ from dagster_postgres.event_log.event_log import PostgresEventLogStorage as PostgresEventLogStorage
@@ -1,4 +1,6 @@
1
- from typing import Any, ContextManager, Mapping, Optional
1
+ from collections.abc import Iterator, Mapping, Sequence
2
+ from contextlib import contextmanager
3
+ from typing import Any, ContextManager, Optional, cast # noqa: UP035
2
4
 
3
5
  import dagster._check as check
4
6
  import sqlalchemy as db
@@ -7,17 +9,19 @@ import sqlalchemy.pool as db_pool
7
9
  from dagster._config.config_schema import UserConfigSchema
8
10
  from dagster._core.errors import DagsterInvariantViolationError
9
11
  from dagster._core.event_api import EventHandlerFn
10
- from dagster._core.events import ASSET_EVENTS
12
+ from dagster._core.events import ASSET_CHECK_EVENTS, ASSET_EVENTS, BATCH_WRITABLE_EVENTS
11
13
  from dagster._core.events.log import EventLogEntry
12
14
  from dagster._core.storage.config import pg_config
13
15
  from dagster._core.storage.event_log import (
14
16
  AssetKeyTable,
17
+ DynamicPartitionsTable,
15
18
  SqlEventLogStorage,
16
19
  SqlEventLogStorageMetadata,
17
20
  SqlEventLogStorageTable,
18
21
  )
19
22
  from dagster._core.storage.event_log.base import EventLogCursor
20
23
  from dagster._core.storage.event_log.migration import ASSET_KEY_INDEX_COLS
24
+ from dagster._core.storage.event_log.polling_event_watcher import SqlPollingEventWatcher
21
25
  from dagster._core.storage.sql import (
22
26
  AlembicVersion,
23
27
  check_alembic_revision,
@@ -25,18 +29,19 @@ from dagster._core.storage.sql import (
25
29
  run_alembic_upgrade,
26
30
  stamp_alembic_rev,
27
31
  )
32
+ from dagster._core.storage.sqlalchemy_compat import db_select
28
33
  from dagster._serdes import ConfigurableClass, ConfigurableClassData, deserialize_value
34
+ from sqlalchemy import event
29
35
  from sqlalchemy.engine import Connection
30
36
 
31
- from ..utils import (
37
+ from dagster_postgres.utils import (
32
38
  create_pg_connection,
33
39
  pg_alembic_config,
34
- pg_statement_timeout,
35
40
  pg_url_from_config,
36
41
  retry_pg_connection_fn,
37
42
  retry_pg_creation_fn,
43
+ set_pg_statement_timeout,
38
44
  )
39
- from .event_watcher import PostgresEventWatcher
40
45
 
41
46
  CHANNEL_NAME = "run_events"
42
47
 
@@ -45,7 +50,7 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
45
50
  """Postgres-backed event log storage.
46
51
 
47
52
  Users should not directly instantiate this class; it is instantiated by internal machinery when
48
- ``dagit`` and ``dagster-graphql`` load, based on the values in the ``dagster.yaml`` file in
53
+ ``dagster-webserver`` and ``dagster-graphql`` load, based on the values in the ``dagster.yaml`` file in
49
54
  ``$DAGSTER_HOME``. Configuration of this class should be done by setting values in that file.
50
55
 
51
56
  To use Postgres for all of the components of your instance storage, you can add the following
@@ -82,15 +87,11 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
82
87
  should_autocreate_tables, "should_autocreate_tables"
83
88
  )
84
89
 
85
- self._disposed = False
86
-
87
90
  # Default to not holding any connections open to prevent accumulating connections per DagsterInstance
88
91
  self._engine = create_engine(
89
92
  self.postgres_url, isolation_level="AUTOCOMMIT", poolclass=db_pool.NullPool
90
93
  )
91
-
92
- # lazy init
93
- self._event_watcher: Optional[PostgresEventWatcher] = None
94
+ self._event_watcher: Optional[SqlPollingEventWatcher] = None
94
95
 
95
96
  self._secondary_index_cache = {}
96
97
 
@@ -111,20 +112,24 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
111
112
  SqlEventLogStorageMetadata.create_all(conn)
112
113
  stamp_alembic_rev(pg_alembic_config(__file__), conn)
113
114
 
114
- def optimize_for_dagit(self, statement_timeout: int, pool_recycle: int) -> None:
115
- # When running in dagit, hold an open connection and set statement_timeout
115
+ def optimize_for_webserver(
116
+ self, statement_timeout: int, pool_recycle: int, max_overflow: int
117
+ ) -> None:
118
+ # When running in dagster-webserver, hold an open connection and set statement_timeout
119
+ kwargs = {
120
+ "isolation_level": "AUTOCOMMIT",
121
+ "pool_size": 1,
122
+ "pool_recycle": pool_recycle,
123
+ "max_overflow": max_overflow,
124
+ }
116
125
  existing_options = self._engine.url.query.get("options")
117
- timeout_option = pg_statement_timeout(statement_timeout)
118
126
  if existing_options:
119
- options = f"{timeout_option} {existing_options}"
120
- else:
121
- options = timeout_option
122
- self._engine = create_engine(
123
- self.postgres_url,
124
- isolation_level="AUTOCOMMIT",
125
- pool_size=1,
126
- connect_args={"options": options},
127
- pool_recycle=pool_recycle,
127
+ kwargs["connect_args"] = {"options": existing_options}
128
+ self._engine = create_engine(self.postgres_url, **kwargs)
129
+ event.listen(
130
+ self._engine,
131
+ "connect",
132
+ lambda connection, _: set_pg_statement_timeout(connection, statement_timeout),
128
133
  )
129
134
 
130
135
  def upgrade(self) -> None:
@@ -165,7 +170,7 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
165
170
  return PostgresEventLogStorage(conn_string, should_autocreate_tables)
166
171
 
167
172
  def store_event(self, event: EventLogEntry) -> None:
168
- """Store an event corresponding to a pipeline run.
173
+ """Store an event corresponding to a run.
169
174
 
170
175
  Args:
171
176
  event (EventLogEntry): The event to store.
@@ -180,11 +185,13 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
180
185
  )
181
186
  res = result.fetchone()
182
187
  result.close()
188
+
189
+ # LISTEN/NOTIFY no longer used for pg event watch - preserved here to support version skew
183
190
  conn.execute(
184
- f"""NOTIFY {CHANNEL_NAME}, %s; """,
185
- (res[0] + "_" + str(res[1]),), # type: ignore
191
+ db.text(f"""NOTIFY {CHANNEL_NAME}, :notify_id; """),
192
+ {"notify_id": res[0] + "_" + str(res[1])}, # type: ignore
186
193
  )
187
- event_id = res[1] # type: ignore
194
+ event_id = int(res[1]) # type: ignore
188
195
 
189
196
  if (
190
197
  event.is_dagster_event
@@ -198,7 +205,51 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
198
205
  "Cannot store asset event tags for null event id."
199
206
  )
200
207
 
201
- self.store_asset_event_tags(event, event_id)
208
+ self.store_asset_event_tags([event], [event_id])
209
+
210
+ if event.is_dagster_event and event.dagster_event_type in ASSET_CHECK_EVENTS:
211
+ self.store_asset_check_event(event, event_id)
212
+
213
+ def store_event_batch(self, events: Sequence[EventLogEntry]) -> None:
214
+ from dagster import DagsterEventType
215
+
216
+ check.sequence_param(events, "event", of_type=EventLogEntry)
217
+
218
+ event_types = {event.get_dagster_event().event_type for event in events}
219
+
220
+ check.invariant(
221
+ all(event_type in BATCH_WRITABLE_EVENTS for event_type in event_types),
222
+ f"{BATCH_WRITABLE_EVENTS} are the only currently supported events for batch writes.",
223
+ )
224
+ events = [
225
+ event
226
+ for event in events
227
+ if not event.get_dagster_event().is_asset_failed_to_materialize
228
+ ]
229
+ if len(events) == 0:
230
+ return
231
+
232
+ if event_types == {DagsterEventType.ASSET_MATERIALIZATION} or event_types == {
233
+ DagsterEventType.ASSET_OBSERVATION
234
+ }:
235
+ insert_event_statement = self.prepare_insert_event_batch(events)
236
+ with self._connect() as conn:
237
+ result = conn.execute(
238
+ insert_event_statement.returning(SqlEventLogStorageTable.c.id)
239
+ )
240
+ event_ids = [cast("int", row[0]) for row in result.fetchall()]
241
+
242
+ # We only update the asset table with the last event
243
+ self.store_asset_event(events[-1], event_ids[-1])
244
+
245
+ if any(event_id is None for event_id in event_ids):
246
+ raise DagsterInvariantViolationError(
247
+ "Cannot store asset event tags for null event id."
248
+ )
249
+
250
+ self.store_asset_event_tags(events, event_ids)
251
+ else:
252
+ return super().store_event_batch(events)
202
253
 
203
254
  def store_asset_event(self, event: EventLogEntry, event_id: int) -> None:
204
255
  check.inst_param(event, "event", EventLogEntry)
@@ -207,7 +258,7 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
207
258
 
208
259
  # We switched to storing the entire event record of the last materialization instead of just
209
260
  # the AssetMaterialization object, so that we have access to metadata like timestamp,
210
- # pipeline, run_id, etc.
261
+ # job, run_id, etc.
211
262
  #
212
263
  # This should make certain asset queries way more performant, without having to do extra
213
264
  # queries against the event log.
@@ -222,7 +273,7 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
222
273
  # This column is used nowhere else, and as of AssetObservation/AssetMaterializationPlanned
223
274
  # event creation, we want to extend this functionality to ensure that assets with any event
224
275
  # (observation, materialization, or materialization planned) yielded with timestamp
225
- # > wipe timestamp display in Dagit.
276
+ # > wipe timestamp display in the Dagster UI.
226
277
 
227
278
  # As of the following PRs, we update last_materialization_timestamp to store the timestamp
228
279
  # of the latest asset observation, materialization, or materialization_planned that has occurred.
@@ -252,6 +303,26 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
252
303
  query = query.on_conflict_do_nothing()
253
304
  conn.execute(query)
254
305
 
306
+ def add_dynamic_partitions(
307
+ self, partitions_def_name: str, partition_keys: Sequence[str]
308
+ ) -> None:
309
+ if not partition_keys:
310
+ return
311
+
312
+ # Overload base implementation to push upsert logic down into the db layer
313
+ self._check_partitions_table()
314
+ with self.index_connection() as conn:
315
+ conn.execute(
316
+ db_dialects.postgresql.insert(DynamicPartitionsTable)
317
+ .values(
318
+ [
319
+ dict(partitions_def_name=partitions_def_name, partition=partition_key)
320
+ for partition_key in partition_keys
321
+ ]
322
+ )
323
+ .on_conflict_do_nothing(),
324
+ )
325
+
255
326
  def _connect(self) -> ContextManager[Connection]:
256
327
  return create_pg_connection(self._engine)
257
328
 
@@ -261,18 +332,27 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
261
332
  def index_connection(self) -> ContextManager[Connection]:
262
333
  return self._connect()
263
334
 
335
+ @contextmanager
336
+ def index_transaction(self) -> Iterator[Connection]:
337
+ """Context manager yielding a connection to the index shard that has begun a transaction."""
338
+ with self.index_connection() as conn:
339
+ if conn.in_transaction():
340
+ yield conn
341
+ else:
342
+ conn = conn.execution_options(isolation_level="READ COMMITTED") # noqa: PLW2901
343
+ with conn.begin():
344
+ yield conn
345
+
264
346
  def has_table(self, table_name: str) -> bool:
265
347
  return bool(self._engine.dialect.has_table(self._engine.connect(), table_name))
266
348
 
267
349
  def has_secondary_index(self, name: str) -> bool:
268
350
  if name not in self._secondary_index_cache:
269
- self._secondary_index_cache[name] = super(
270
- PostgresEventLogStorage, self
271
- ).has_secondary_index(name)
351
+ self._secondary_index_cache[name] = super().has_secondary_index(name)
272
352
  return self._secondary_index_cache[name]
273
353
 
274
354
  def enable_secondary_index(self, name: str) -> None:
275
- super(PostgresEventLogStorage, self).enable_secondary_index(name)
355
+ super().enable_secondary_index(name)
276
356
  if name in self._secondary_index_cache:
277
357
  del self._secondary_index_cache[name]
278
358
 
@@ -284,40 +364,28 @@ class PostgresEventLogStorage(SqlEventLogStorage, ConfigurableClass):
284
364
  ) -> None:
285
365
  if cursor and EventLogCursor.parse(cursor).is_offset_cursor():
286
366
  check.failed("Cannot call `watch` with an offset cursor")
287
-
288
367
  if self._event_watcher is None:
289
- self._event_watcher = PostgresEventWatcher(
290
- self.postgres_url,
291
- [CHANNEL_NAME],
292
- self._gen_event_log_entry_from_cursor,
293
- )
368
+ self._event_watcher = SqlPollingEventWatcher(self)
294
369
 
295
370
  self._event_watcher.watch_run(run_id, cursor, callback)
296
371
 
297
372
  def _gen_event_log_entry_from_cursor(self, cursor) -> EventLogEntry:
298
373
  with self._engine.connect() as conn:
299
374
  cursor_res = conn.execute(
300
- db.select([SqlEventLogStorageTable.c.event]).where(
375
+ db_select([SqlEventLogStorageTable.c.event]).where(
301
376
  SqlEventLogStorageTable.c.id == cursor
302
377
  ),
303
378
  )
304
379
  return deserialize_value(cursor_res.scalar(), EventLogEntry) # type: ignore
305
380
 
306
381
  def end_watch(self, run_id: str, handler: EventHandlerFn) -> None:
307
- if self._event_watcher is None:
308
- return
309
-
310
- self._event_watcher.unwatch_run(run_id, handler)
311
-
312
- def __del__(self) -> None:
313
- # Keep the inherent limitations of __del__ in Python in mind!
314
- self.dispose()
382
+ if self._event_watcher:
383
+ self._event_watcher.unwatch_run(run_id, handler)
315
384
 
316
385
  def dispose(self) -> None:
317
- if not self._disposed:
318
- self._disposed = True
319
- if self._event_watcher:
320
- self._event_watcher.close()
386
+ if self._event_watcher:
387
+ self._event_watcher.close()
388
+ self._event_watcher = None
321
389
 
322
390
  def alembic_version(self) -> AlembicVersion:
323
391
  alembic_config = pg_alembic_config(__file__)
@@ -1 +1 @@
1
- from .run_storage import PostgresRunStorage as PostgresRunStorage
1
+ from dagster_postgres.run_storage.run_storage import PostgresRunStorage as PostgresRunStorage
@@ -1,4 +1,6 @@
1
- from typing import ContextManager, Mapping, Optional
1
+ import zlib
2
+ from collections.abc import Mapping
3
+ from typing import ContextManager, Optional # noqa: UP035
2
4
 
3
5
  import dagster._check as check
4
6
  import sqlalchemy as db
@@ -12,7 +14,8 @@ from dagster._core.storage.runs import (
12
14
  RunStorageSqlMetadata,
13
15
  SqlRunStorage,
14
16
  )
15
- from dagster._core.storage.runs.schema import KeyValueStoreTable
17
+ from dagster._core.storage.runs.schema import KeyValueStoreTable, SnapshotsTable
18
+ from dagster._core.storage.runs.sql_run_storage import SnapshotType
16
19
  from dagster._core.storage.sql import (
17
20
  AlembicVersion,
18
21
  check_alembic_revision,
@@ -22,16 +25,17 @@ from dagster._core.storage.sql import (
22
25
  )
23
26
  from dagster._daemon.types import DaemonHeartbeat
24
27
  from dagster._serdes import ConfigurableClass, ConfigurableClassData, serialize_value
25
- from dagster._utils import utc_datetime_from_timestamp
28
+ from dagster._time import datetime_from_timestamp
29
+ from sqlalchemy import event
26
30
  from sqlalchemy.engine import Connection
27
31
 
28
- from ..utils import (
32
+ from dagster_postgres.utils import (
29
33
  create_pg_connection,
30
34
  pg_alembic_config,
31
- pg_statement_timeout,
32
35
  pg_url_from_config,
33
36
  retry_pg_connection_fn,
34
37
  retry_pg_creation_fn,
38
+ set_pg_statement_timeout,
35
39
  )
36
40
 
37
41
 
@@ -39,7 +43,7 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
39
43
  """Postgres-backed run storage.
40
44
 
41
45
  Users should not directly instantiate this class; it is instantiated by internal machinery when
42
- ``dagit`` and ``dagster-graphql`` load, based on the values in the ``dagster.yaml`` file in
46
+ ``dagster-webserver`` and ``dagster-graphql`` load, based on the values in the ``dagster.yaml`` file in
43
47
  ``$DAGSTER_HOME``. Configuration of this class should be done by setting values in that file.
44
48
 
45
49
  To use Postgres for all of the components of your instance storage, you can add the following
@@ -104,20 +108,24 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
104
108
  # This revision may be shared by any other dagster storage classes using the same DB
105
109
  stamp_alembic_rev(pg_alembic_config(__file__), conn)
106
110
 
107
- def optimize_for_dagit(self, statement_timeout: int, pool_recycle: int) -> None:
108
- # When running in dagit, hold 1 open connection and set statement_timeout
111
+ def optimize_for_webserver(
112
+ self, statement_timeout: int, pool_recycle: int, max_overflow: int
113
+ ) -> None:
114
+ # When running in dagster-webserver, hold an open connection and set statement_timeout
115
+ kwargs = {
116
+ "isolation_level": "AUTOCOMMIT",
117
+ "pool_size": 1,
118
+ "pool_recycle": pool_recycle,
119
+ "max_overflow": max_overflow,
120
+ }
109
121
  existing_options = self._engine.url.query.get("options")
110
- timeout_option = pg_statement_timeout(statement_timeout)
111
122
  if existing_options:
112
- options = f"{timeout_option} {existing_options}"
113
- else:
114
- options = timeout_option
115
- self._engine = create_engine(
116
- self.postgres_url,
117
- isolation_level="AUTOCOMMIT",
118
- pool_size=1,
119
- connect_args={"options": options},
120
- pool_recycle=pool_recycle,
123
+ kwargs["connect_args"] = {"options": existing_options}
124
+ self._engine = create_engine(self.postgres_url, **kwargs)
125
+ event.listen(
126
+ self._engine,
127
+ "connect",
128
+ lambda connection, _: set_pg_statement_timeout(connection, statement_timeout),
121
129
  )
122
130
 
123
131
  @property
@@ -129,7 +137,7 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
129
137
  return pg_config()
130
138
 
131
139
  @classmethod
132
- def from_config_value(
140
+ def from_config_value( # pyright: ignore[reportIncompatibleMethodOverride]
133
141
  cls, inst_data: Optional[ConfigurableClassData], config_value: PostgresStorageConfig
134
142
  ):
135
143
  return PostgresRunStorage(
@@ -160,13 +168,11 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
160
168
 
161
169
  def has_built_index(self, migration_name: str) -> bool:
162
170
  if migration_name not in self._index_migration_cache:
163
- self._index_migration_cache[migration_name] = super(
164
- PostgresRunStorage, self
165
- ).has_built_index(migration_name)
171
+ self._index_migration_cache[migration_name] = super().has_built_index(migration_name)
166
172
  return self._index_migration_cache[migration_name]
167
173
 
168
174
  def mark_index_built(self, migration_name: str) -> None:
169
- super(PostgresRunStorage, self).mark_index_built(migration_name)
175
+ super().mark_index_built(migration_name)
170
176
  if migration_name in self._index_migration_cache:
171
177
  del self._index_migration_cache[migration_name]
172
178
 
@@ -176,7 +182,7 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
176
182
  conn.execute(
177
183
  db_dialects.postgresql.insert(DaemonHeartbeatsTable)
178
184
  .values(
179
- timestamp=utc_datetime_from_timestamp(daemon_heartbeat.timestamp),
185
+ timestamp=datetime_from_timestamp(daemon_heartbeat.timestamp),
180
186
  daemon_type=daemon_heartbeat.daemon_type,
181
187
  daemon_id=daemon_heartbeat.daemon_id,
182
188
  body=serialize_value(daemon_heartbeat),
@@ -184,7 +190,7 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
184
190
  .on_conflict_do_update(
185
191
  index_elements=[DaemonHeartbeatsTable.c.daemon_type],
186
192
  set_={
187
- "timestamp": utc_datetime_from_timestamp(daemon_heartbeat.timestamp),
193
+ "timestamp": datetime_from_timestamp(daemon_heartbeat.timestamp),
188
194
  "daemon_id": daemon_heartbeat.daemon_id,
189
195
  "body": serialize_value(daemon_heartbeat),
190
196
  },
@@ -199,7 +205,7 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
199
205
  def set_cursor_values(self, pairs: Mapping[str, str]) -> None:
200
206
  check.mapping_param(pairs, "pairs", key_type=str, value_type=str)
201
207
 
202
- # pg speciic on_conflict_do_update
208
+ # pg specific on_conflict_do_update
203
209
  insert_stmt = db_dialects.postgresql.insert(KeyValueStoreTable).values(
204
210
  [{"key": k, "value": v} for k, v in pairs.items()]
205
211
  )
@@ -217,6 +223,20 @@ class PostgresRunStorage(SqlRunStorage, ConfigurableClass):
217
223
  with self.connect() as conn:
218
224
  conn.execute(upsert_stmt)
219
225
 
226
+ def _add_snapshot(self, snapshot_id: str, snapshot_obj, snapshot_type: SnapshotType) -> str:
227
+ with self.connect() as conn:
228
+ snapshot_insert = (
229
+ db_dialects.postgresql.insert(SnapshotsTable)
230
+ .values(
231
+ snapshot_id=snapshot_id,
232
+ snapshot_body=zlib.compress(serialize_value(snapshot_obj).encode("utf-8")),
233
+ snapshot_type=snapshot_type.value,
234
+ )
235
+ .on_conflict_do_nothing()
236
+ )
237
+ conn.execute(snapshot_insert)
238
+ return snapshot_id
239
+
220
240
  def alembic_version(self) -> AlembicVersion:
221
241
  alembic_config = pg_alembic_config(__file__)
222
242
  with self.connect() as conn:
@@ -1 +1,3 @@
1
- from .schedule_storage import PostgresScheduleStorage as PostgresScheduleStorage
1
+ from dagster_postgres.schedule_storage.schedule_storage import (
2
+ PostgresScheduleStorage as PostgresScheduleStorage,
3
+ )
@@ -1,15 +1,22 @@
1
- from typing import ContextManager, Optional
1
+ from collections.abc import Sequence
2
+ from typing import ContextManager, Optional # noqa: UP035
2
3
 
3
4
  import dagster._check as check
4
- import pendulum
5
5
  import sqlalchemy as db
6
6
  import sqlalchemy.dialects as db_dialects
7
7
  import sqlalchemy.pool as db_pool
8
8
  from dagster._config.config_schema import UserConfigSchema
9
+ from dagster._core.definitions.asset_key import EntityKey
10
+ from dagster._core.definitions.declarative_automation.serialized_objects import (
11
+ AutomationConditionEvaluationWithRunIds,
12
+ )
9
13
  from dagster._core.scheduler.instigation import InstigatorState
10
14
  from dagster._core.storage.config import PostgresStorageConfig, pg_config
11
15
  from dagster._core.storage.schedules import ScheduleStorageSqlMetadata, SqlScheduleStorage
12
- from dagster._core.storage.schedules.schema import InstigatorsTable
16
+ from dagster._core.storage.schedules.schema import (
17
+ AssetDaemonAssetEvaluationsTable,
18
+ InstigatorsTable,
19
+ )
13
20
  from dagster._core.storage.sql import (
14
21
  AlembicVersion,
15
22
  check_alembic_revision,
@@ -18,15 +25,17 @@ from dagster._core.storage.sql import (
18
25
  stamp_alembic_rev,
19
26
  )
20
27
  from dagster._serdes import ConfigurableClass, ConfigurableClassData, serialize_value
28
+ from dagster._time import get_current_datetime
29
+ from sqlalchemy import event
21
30
  from sqlalchemy.engine import Connection
22
31
 
23
- from ..utils import (
32
+ from dagster_postgres.utils import (
24
33
  create_pg_connection,
25
34
  pg_alembic_config,
26
- pg_statement_timeout,
27
35
  pg_url_from_config,
28
36
  retry_pg_connection_fn,
29
37
  retry_pg_creation_fn,
38
+ set_pg_statement_timeout,
30
39
  )
31
40
 
32
41
 
@@ -34,7 +43,7 @@ class PostgresScheduleStorage(SqlScheduleStorage, ConfigurableClass):
34
43
  """Postgres-backed run storage.
35
44
 
36
45
  Users should not directly instantiate this class; it is instantiated by internal machinery when
37
- ``dagit`` and ``dagster-graphql`` load, based on the values in the ``dagster.yaml`` file in
46
+ ``dagster-webserver`` and ``dagster-graphql`` load, based on the values in the ``dagster.yaml`` file in
38
47
  ``$DAGSTER_HOME``. Configuration of this class should be done by setting values in that file.
39
48
 
40
49
  To use Postgres for all of the components of your instance storage, you can add the following
@@ -95,20 +104,24 @@ class PostgresScheduleStorage(SqlScheduleStorage, ConfigurableClass):
95
104
  self.migrate()
96
105
  self.optimize()
97
106
 
98
- def optimize_for_dagit(self, statement_timeout: int, pool_recycle: int) -> None:
99
- # When running in dagit, hold an open connection and set statement_timeout
107
+ def optimize_for_webserver(
108
+ self, statement_timeout: int, pool_recycle: int, max_overflow: int
109
+ ) -> None:
110
+ # When running in dagster-webserver, hold an open connection and set statement_timeout
111
+ kwargs = {
112
+ "isolation_level": "AUTOCOMMIT",
113
+ "pool_size": 1,
114
+ "pool_recycle": pool_recycle,
115
+ "max_overflow": max_overflow,
116
+ }
100
117
  existing_options = self._engine.url.query.get("options")
101
- timeout_option = pg_statement_timeout(statement_timeout)
102
118
  if existing_options:
103
- options = f"{timeout_option} {existing_options}"
104
- else:
105
- options = timeout_option
106
- self._engine = create_engine(
107
- self.postgres_url,
108
- isolation_level="AUTOCOMMIT",
109
- pool_size=1,
110
- connect_args={"options": options},
111
- pool_recycle=pool_recycle,
119
+ kwargs["connect_args"] = {"options": existing_options}
120
+ self._engine = create_engine(self.postgres_url, **kwargs)
121
+ event.listen(
122
+ self._engine,
123
+ "connect",
124
+ lambda connection, _: set_pg_statement_timeout(connection, statement_timeout),
112
125
  )
113
126
 
114
127
  @property
@@ -120,7 +133,7 @@ class PostgresScheduleStorage(SqlScheduleStorage, ConfigurableClass):
120
133
  return pg_config()
121
134
 
122
135
  @classmethod
123
- def from_config_value(
136
+ def from_config_value( # pyright: ignore[reportIncompatibleMethodOverride]
124
137
  cls, inst_data: Optional[ConfigurableClassData], config_value: PostgresStorageConfig
125
138
  ) -> "PostgresScheduleStorage":
126
139
  return PostgresScheduleStorage(
@@ -167,11 +180,44 @@ class PostgresScheduleStorage(SqlScheduleStorage, ConfigurableClass):
167
180
  "status": state.status.value,
168
181
  "instigator_type": state.instigator_type.value,
169
182
  "instigator_body": serialize_value(state),
170
- "update_timestamp": pendulum.now("UTC"),
183
+ "update_timestamp": get_current_datetime(),
171
184
  },
172
185
  )
173
186
  )
174
187
 
188
+ def add_auto_materialize_asset_evaluations(
189
+ self,
190
+ evaluation_id: int,
191
+ asset_evaluations: Sequence[AutomationConditionEvaluationWithRunIds[EntityKey]],
192
+ ):
193
+ if not asset_evaluations:
194
+ return
195
+
196
+ insert_stmt = db_dialects.postgresql.insert(AssetDaemonAssetEvaluationsTable).values(
197
+ [
198
+ {
199
+ "evaluation_id": evaluation_id,
200
+ "asset_key": evaluation.key.to_db_string(),
201
+ "asset_evaluation_body": serialize_value(evaluation),
202
+ "num_requested": evaluation.num_requested,
203
+ }
204
+ for evaluation in asset_evaluations
205
+ ]
206
+ )
207
+ upsert_stmt = insert_stmt.on_conflict_do_update(
208
+ index_elements=[
209
+ AssetDaemonAssetEvaluationsTable.c.evaluation_id,
210
+ AssetDaemonAssetEvaluationsTable.c.asset_key,
211
+ ],
212
+ set_={
213
+ "asset_evaluation_body": insert_stmt.excluded.asset_evaluation_body,
214
+ "num_requested": insert_stmt.excluded.num_requested,
215
+ },
216
+ )
217
+
218
+ with self.connect() as conn:
219
+ conn.execute(upsert_stmt)
220
+
175
221
  def alembic_version(self) -> AlembicVersion:
176
222
  alembic_config = pg_alembic_config(__file__)
177
223
  with self.connect() as conn:
@@ -9,17 +9,17 @@ from dagster._core.storage.runs import RunStorage
9
9
  from dagster._core.storage.schedules import ScheduleStorage
10
10
  from dagster._serdes import ConfigurableClass, ConfigurableClassData
11
11
 
12
- from .event_log import PostgresEventLogStorage
13
- from .run_storage import PostgresRunStorage
14
- from .schedule_storage import PostgresScheduleStorage
15
- from .utils import pg_url_from_config
12
+ from dagster_postgres.event_log import PostgresEventLogStorage
13
+ from dagster_postgres.run_storage import PostgresRunStorage
14
+ from dagster_postgres.schedule_storage import PostgresScheduleStorage
15
+ from dagster_postgres.utils import pg_url_from_config
16
16
 
17
17
 
18
18
  class DagsterPostgresStorage(DagsterStorage, ConfigurableClass):
19
19
  """Postgres-backed dagster storage.
20
20
 
21
21
  Users should not directly instantiate this class; it is instantiated by internal machinery when
22
- ``dagit`` and ``dagster-daemon`` load, based on the values in the ``dagster.yaml`` file in
22
+ ``dagster-webserver`` and ``dagster-daemon`` load, based on the values in the ``dagster.yaml`` file in
23
23
  ``$DAGSTER_HOME``. Configuration of this class should be done by setting values in that file.
24
24
 
25
25
  To use Postgres for storage, you can add a block such as the following to your
@@ -59,7 +59,7 @@ class DagsterPostgresStorage(DagsterStorage, ConfigurableClass):
59
59
  return pg_config()
60
60
 
61
61
  @classmethod
62
- def from_config_value(
62
+ def from_config_value( # pyright: ignore[reportIncompatibleMethodOverride]
63
63
  cls, inst_data: Optional[ConfigurableClassData], config_value: PostgresStorageConfig
64
64
  ) -> "DagsterPostgresStorage":
65
65
  return DagsterPostgresStorage(
@@ -0,0 +1,75 @@
1
+ import tempfile
2
+ from contextlib import contextmanager
3
+ from pathlib import Path
4
+
5
+ import pytest
6
+ from dagster._core.test_utils import instance_for_test
7
+ from dagster._utils.merger import merge_dicts
8
+ from dagster_test.fixtures import docker_compose_cm, network_name_from_yml
9
+
10
+ from dagster_postgres.utils import get_conn_string, wait_for_connection
11
+
12
+ compose_file = Path(__file__).parent / "docker-compose.yml"
13
+
14
+
15
+ @pytest.fixture(scope="session")
16
+ def postgres_network():
17
+ yield network_name_from_yml(compose_file)
18
+
19
+
20
+ @pytest.fixture(scope="session")
21
+ def postgres_hostname():
22
+ with docker_compose_cm(docker_compose_yml=compose_file) as hostnames:
23
+ yield hostnames["postgres"]
24
+
25
+
26
+ @pytest.fixture(scope="session")
27
+ def postgres_conn_str(postgres_hostname):
28
+ conn_str = get_conn_string(
29
+ username="test",
30
+ password="test",
31
+ hostname=postgres_hostname,
32
+ db_name="test",
33
+ params=dict(connect_timeout=5),
34
+ )
35
+ wait_for_connection(
36
+ conn_str,
37
+ retry_limit=10,
38
+ retry_wait=3,
39
+ )
40
+
41
+ yield conn_str
42
+
43
+
44
+ @pytest.fixture
45
+ def postgres_instance(postgres_conn_str):
46
+ @contextmanager
47
+ def _instance(overrides=None):
48
+ with tempfile.TemporaryDirectory() as temp_dir:
49
+ with instance_for_test(
50
+ temp_dir=temp_dir,
51
+ overrides=merge_dicts(
52
+ {
53
+ "run_storage": {
54
+ "module": "dagster_postgres.run_storage.run_storage",
55
+ "class": "PostgresRunStorage",
56
+ "config": {"postgres_url": postgres_conn_str},
57
+ },
58
+ "event_log_storage": {
59
+ "module": "dagster_postgres.event_log.event_log",
60
+ "class": "PostgresEventLogStorage",
61
+ "config": {"postgres_url": postgres_conn_str},
62
+ },
63
+ "schedule_storage": {
64
+ "module": "dagster_postgres.schedule_storage.schedule_storage",
65
+ "class": "PostgresScheduleStorage",
66
+ "config": {"postgres_url": postgres_conn_str},
67
+ },
68
+ },
69
+ overrides if overrides else {},
70
+ ),
71
+ ) as instance:
72
+ instance.wipe()
73
+ yield instance
74
+
75
+ return _instance
@@ -0,0 +1,10 @@
1
+ services:
2
+ postgres:
3
+ image: postgres:16
4
+ container_name: postgres
5
+ ports:
6
+ - "5432:5432"
7
+ environment:
8
+ POSTGRES_PASSWORD: "test"
9
+ POSTGRES_USER: "test"
10
+ POSTGRES_DB: "test"
dagster_postgres/utils.py CHANGED
@@ -1,7 +1,8 @@
1
1
  import logging
2
2
  import time
3
+ from collections.abc import Callable, Iterator, Mapping
3
4
  from contextlib import contextmanager
4
- from typing import Any, Callable, Iterator, Mapping, Optional, TypeVar
5
+ from typing import Any, Optional, TypeVar
5
6
  from urllib.parse import quote, urlencode
6
7
 
7
8
  import alembic.config
@@ -169,6 +170,8 @@ def create_pg_connection(
169
170
  conn.close()
170
171
 
171
172
 
172
- def pg_statement_timeout(millis: int) -> str:
173
+ def set_pg_statement_timeout(conn: psycopg2.extensions.connection, millis: int):
173
174
  check.int_param(millis, "millis")
174
- return f"-c statement_timeout={millis}"
175
+ with conn:
176
+ with conn.cursor() as curs:
177
+ curs.execute(f"SET statement_timeout = {millis};")
@@ -1 +1 @@
1
- __version__ = "0.19.2"
1
+ __version__ = "0.28.11"
@@ -0,0 +1,28 @@
1
+ Metadata-Version: 2.4
2
+ Name: dagster-postgres
3
+ Version: 0.28.11
4
+ Summary: A Dagster integration for postgres
5
+ Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-postgres
6
+ Author: Dagster Labs
7
+ Author-email: hello@dagsterlabs.com
8
+ License: Apache-2.0
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Programming Language :: Python :: 3.14
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: OS Independent
16
+ Requires-Python: >=3.10,<3.15
17
+ License-File: LICENSE
18
+ Requires-Dist: dagster==1.12.11
19
+ Requires-Dist: psycopg2-binary
20
+ Dynamic: author
21
+ Dynamic: author-email
22
+ Dynamic: classifier
23
+ Dynamic: home-page
24
+ Dynamic: license
25
+ Dynamic: license-file
26
+ Dynamic: requires-dist
27
+ Dynamic: requires-python
28
+ Dynamic: summary
@@ -0,0 +1,19 @@
1
+ dagster_postgres/__init__.py,sha256=rZfjhBDGC_SIGnKuiSCzi7D0xD36mAnchDLRL-n1Tmk,561
2
+ dagster_postgres/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
3
+ dagster_postgres/storage.py,sha256=CiPA773j0BQvPtCEz5vVqKjap9h0ODAxR_R_CvbI-bI,4310
4
+ dagster_postgres/utils.py,sha256=BzkzfQ9t9U9FMCs_3NazAKW7stK0Wrz_nH0KK4NQfi8,5930
5
+ dagster_postgres/version.py,sha256=MlSqB7SmZ8R1F0K_PtG4rMWRGGllC7hEsX-reHAF_8A,24
6
+ dagster_postgres/alembic/alembic.ini,sha256=GovyDEhu_6HvkWV6txqjdDBOe4BseSM0YDWGxXM5_cA,986
7
+ dagster_postgres/event_log/__init__.py,sha256=wRcUR-StRNrPCqpEzi0MRY8b-r_TEWV17OsEynFqlLs,100
8
+ dagster_postgres/event_log/event_log.py,sha256=w8_ZsvDb0vt7qDVSS912jmNRpLDI6F0KQrwribyAmwM,16145
9
+ dagster_postgres/run_storage/__init__.py,sha256=oW_546mJ5K-e-RF0Ou7r-4fHWxFthHgPPhWxklsVK1g,94
10
+ dagster_postgres/run_storage/run_storage.py,sha256=VMXT5sb5n04ViWOYj2eOoZqOk78hqL38J6CuikM79No,9757
11
+ dagster_postgres/schedule_storage/__init__.py,sha256=-jW-1S4Xf5Ew-cz-DjKjU5sVs9EEly_2ELMLOXTewv0,123
12
+ dagster_postgres/schedule_storage/schedule_storage.py,sha256=iPKcvnj1eEeG5LH73GoFvoS3hmE8MFddiFyf7QoKZ3g,8826
13
+ dagster_postgres/test_fixtures/__init__.py,sha256=ZIcbGYa_CMmVF0YkuiX4uI3wRjBke_2VE8QXKcji0Y4,2447
14
+ dagster_postgres/test_fixtures/docker-compose.yml,sha256=hp2VTnENYK6CL2Yae3IsktssahPFQiHyhpNcG36CivM,208
15
+ dagster_postgres-0.28.11.dist-info/licenses/LICENSE,sha256=4lsMW-RCvfVD4_F57wrmpe3vX1xwUk_OAKKmV_XT7Z0,11348
16
+ dagster_postgres-0.28.11.dist-info/METADATA,sha256=971I43x8Ltg7CRFX9AILtVaxOlw1PK7fsQ0jwuGdEYo,947
17
+ dagster_postgres-0.28.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
+ dagster_postgres-0.28.11.dist-info/top_level.txt,sha256=lScMtAEKDX1yIv2tGa1nzntBa0HEStfWPfCwD8FWlHk,17
19
+ dagster_postgres-0.28.11.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.33.6)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -186,7 +186,7 @@
186
186
  same "printed page" as the copyright notice for easier
187
187
  identification within third-party archives.
188
188
 
189
- Copyright 2023 Elementl, Inc.
189
+ Copyright 2025 Dagster Labs, Inc.
190
190
 
191
191
  Licensed under the Apache License, Version 2.0 (the "License");
192
192
  you may not use this file except in compliance with the License.
@@ -1,137 +0,0 @@
1
- import logging
2
- import threading
3
- from collections import defaultdict
4
- from typing import Callable, List, MutableMapping, Optional, Sequence
5
-
6
- import dagster._check as check
7
- from dagster._core.event_api import EventHandlerFn
8
- from dagster._core.events.log import EventLogEntry
9
- from dagster._core.storage.event_log.base import EventLogCursor
10
- from dagster._core.storage.event_log.polling_event_watcher import CallbackAfterCursor
11
-
12
- from ..pynotify import await_pg_notifications
13
-
14
- POLLING_CADENCE = 0.25
15
-
16
-
17
- def watcher_thread(
18
- conn_string: str,
19
- handlers_dict: MutableMapping[str, Sequence[CallbackAfterCursor]],
20
- dict_lock: threading.Lock,
21
- watcher_thread_exit: threading.Event,
22
- watcher_thread_started: threading.Event,
23
- channels: Sequence[str],
24
- gen_event_log_entry_from_cursor: Callable[[int], EventLogEntry],
25
- ) -> None:
26
- for notif in await_pg_notifications(
27
- conn_string,
28
- channels=channels,
29
- timeout=POLLING_CADENCE,
30
- yield_on_timeout=True,
31
- exit_event=watcher_thread_exit,
32
- started_event=watcher_thread_started,
33
- ):
34
- if notif is None:
35
- if watcher_thread_exit.is_set():
36
- break
37
- else:
38
- run_id, index_str = notif.payload.split("_")
39
- with dict_lock:
40
- if run_id not in handlers_dict:
41
- continue
42
-
43
- index = int(index_str)
44
- with dict_lock:
45
- handlers = handlers_dict.get(run_id, [])
46
-
47
- dagster_event = gen_event_log_entry_from_cursor(index)
48
-
49
- for callback_with_cursor in handlers:
50
- try:
51
- if (
52
- callback_with_cursor.cursor is None
53
- or EventLogCursor.parse(callback_with_cursor.cursor).storage_id() < index
54
- ):
55
- callback_with_cursor.callback(
56
- dagster_event, str(EventLogCursor.from_storage_id(index))
57
- )
58
- except:
59
- logging.exception("Exception in callback for event watch on run %s.", run_id)
60
-
61
-
62
- class PostgresEventWatcher:
63
- def __init__(
64
- self,
65
- conn_string: str,
66
- channels: Sequence[str],
67
- gen_event_log_entry_from_cursor: Callable[[int], EventLogEntry],
68
- ):
69
- self._conn_string: str = check.str_param(conn_string, "conn_string")
70
- self._handlers_dict: MutableMapping[str, List[CallbackAfterCursor]] = defaultdict(list)
71
- self._dict_lock: threading.Lock = threading.Lock()
72
- self._watcher_thread_exit: Optional[threading.Event] = None
73
- self._watcher_thread_started: Optional[threading.Event] = None
74
- self._watcher_thread: Optional[threading.Thread] = None
75
- self._channels: Sequence[str] = check.sequence_param(channels, "channels")
76
- self._gen_event_log_entry_from_cursor: Callable[
77
- [int], EventLogEntry
78
- ] = check.callable_param(gen_event_log_entry_from_cursor, "gen_event_log_entry_from_cursor")
79
-
80
- def watch_run(
81
- self,
82
- run_id: str,
83
- cursor: Optional[str],
84
- callback: EventHandlerFn,
85
- start_timeout: int = 15,
86
- ) -> None:
87
- check.str_param(run_id, "run_id")
88
- check.opt_str_param(cursor, "cursor")
89
- check.callable_param(callback, "callback")
90
- if not self._watcher_thread:
91
- self._watcher_thread_exit = threading.Event()
92
- self._watcher_thread_started = threading.Event()
93
-
94
- self._watcher_thread = threading.Thread(
95
- target=watcher_thread,
96
- args=(
97
- self._conn_string,
98
- self._handlers_dict,
99
- self._dict_lock,
100
- self._watcher_thread_exit,
101
- self._watcher_thread_started,
102
- self._channels,
103
- self._gen_event_log_entry_from_cursor,
104
- ),
105
- name="postgres-event-watch",
106
- )
107
- self._watcher_thread.daemon = True
108
- self._watcher_thread.start()
109
-
110
- # Wait until the watcher thread is actually listening before returning
111
- self._watcher_thread_started.wait(start_timeout)
112
- if not self._watcher_thread_started.is_set():
113
- raise Exception("Watcher thread never started")
114
-
115
- with self._dict_lock:
116
- self._handlers_dict[run_id].append(CallbackAfterCursor(cursor, callback))
117
-
118
- def unwatch_run(self, run_id: str, handler: EventHandlerFn) -> None:
119
- check.str_param(run_id, "run_id")
120
- check.callable_param(handler, "handler")
121
- with self._dict_lock:
122
- if run_id in self._handlers_dict:
123
- self._handlers_dict[run_id] = [
124
- callback_with_cursor
125
- for callback_with_cursor in self._handlers_dict[run_id]
126
- if callback_with_cursor.callback != handler
127
- ]
128
- if not self._handlers_dict[run_id]:
129
- del self._handlers_dict[run_id]
130
-
131
- def close(self) -> None:
132
- if self._watcher_thread:
133
- self._watcher_thread_exit.set() # type: ignore
134
- if self._watcher_thread.is_alive():
135
- self._watcher_thread.join()
136
- self._watcher_thread_exit = None
137
- self._watcher_thread = None
@@ -1,138 +0,0 @@
1
- # copied from https://github.com/djrobstep/pgnotify/blob/43bbe7bd3cedfb99700e4ab370cb6f5d7426bea3/pgnotify/notify.py
2
-
3
- # This is free and unencumbered software released into the public domain.
4
-
5
- # Anyone is free to copy, modify, publish, use, compile, sell, or
6
- # distribute this software, either in source code form or as a compiled
7
- # binary, for any purpose, commercial or non-commercial, and by any
8
- # means.
9
-
10
- # In jurisdictions that recognize copyright laws, the author or authors
11
- # of this software dedicate any and all copyright interest in the
12
- # software to the public domain. We make this dedication for the benefit
13
- # of the public at large and to the detriment of our heirs and
14
- # successors. We intend this dedication to be an overt act of
15
- # relinquishment in perpetuity of all present and future rights to this
16
- # software under copyright law.
17
-
18
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
- # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22
- # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23
- # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24
- # OTHER DEALINGS IN THE SOFTWARE.
25
-
26
- # For more information, please refer to <http://unlicense.org>
27
-
28
- import errno
29
- import os
30
- import select
31
- import signal
32
- import sys
33
- from threading import Event
34
- from typing import Iterator, Optional, Sequence
35
-
36
- import dagster._check as check
37
- from dagster._core.storage.sql import create_engine
38
- from psycopg2.extensions import Notify
39
-
40
- from .utils import create_pg_connection
41
-
42
-
43
- def get_wakeup_fd() -> int:
44
- pipe_r, pipe_w = os.pipe()
45
- if "win" not in sys.platform:
46
- import fcntl
47
-
48
- flags = fcntl.fcntl(pipe_w, fcntl.F_GETFL, 0)
49
- flags = os.O_NONBLOCK
50
- flags = fcntl.fcntl(pipe_w, fcntl.F_SETFL, flags)
51
- signal.set_wakeup_fd(pipe_w)
52
- return pipe_r
53
-
54
-
55
- def _empty_handler(_signal, _frame):
56
- pass
57
-
58
-
59
- def quote_table_name(name: str) -> str:
60
- return f'"{name}"'
61
-
62
-
63
- def start_listening(connection, channels):
64
- names = (quote_table_name(each) for each in channels)
65
- listens = "; ".join([f"LISTEN {n}" for n in names])
66
-
67
- with connection.cursor() as curs:
68
- curs.execute(listens)
69
-
70
-
71
- def construct_signals(arg) -> signal.Signals:
72
- # function exists to consolidate and scope pylint directive
73
- return signal.Signals(arg)
74
-
75
-
76
- def await_pg_notifications(
77
- conn_string: str,
78
- channels: Optional[Sequence[str]] = None,
79
- timeout: float = 5.0,
80
- yield_on_timeout: bool = False,
81
- exit_event: Optional[Event] = None,
82
- started_event: Optional[Event] = None,
83
- ) -> Iterator[Optional[Notify]]:
84
- """Subscribe to PostgreSQL notifications, and handle them
85
- in infinite-loop style.
86
-
87
- Args:
88
- conn_string (str): connection string to PG DB
89
- channels (Optional[List[str]], optional): List of channel names to listen to. Defaults to None.
90
- timeout (float, optional): Timeout interval. Defaults to 5.0.
91
- yield_on_timeout (bool, optional): Should the function yield on timeout. Defaults to False.
92
- exit_event (Optional[Event], optional): Event that indicates that polling for new notifications should stop. Defaults to None.
93
- started_event (Optional[Event], optional): Event that this function can set to notify that the subscription has been established. Defaults to None.
94
-
95
- Yields:
96
- Iterator[Optional[Notify]]: Can yield one of two types:
97
- 1: None, in case of timeout
98
- 2: Notify, in case of successful notification reception
99
- """
100
- check.str_param(conn_string, "conn_string")
101
- channels = None if channels is None else check.sequence_param(channels, "channels", of_type=str)
102
- check.float_param(timeout, "timeout")
103
- check.bool_param(yield_on_timeout, "yield_on_timeout")
104
-
105
- engine = create_engine(
106
- conn_string,
107
- isolation_level="AUTOCOMMIT",
108
- )
109
-
110
- with create_pg_connection(engine) as conn:
111
- connection = conn.connection.connection # DBAPI connection # type: ignore
112
-
113
- if channels:
114
- start_listening(connection, channels)
115
-
116
- if started_event:
117
- started_event.set()
118
-
119
- while True and not (exit_event and exit_event.is_set()):
120
- try:
121
- r, w, x = select.select([connection], [], [], max(0, timeout))
122
- if (r, w, x) == ([], [], []):
123
- if yield_on_timeout:
124
- yield None
125
-
126
- if connection in r:
127
- connection.poll()
128
-
129
- # copy the conn.notifies list/queue & empty it
130
- notify_list, connection.notifies = connection.notifies, []
131
- for notif in notify_list:
132
- yield notif
133
-
134
- except select.error as e:
135
- if e.errno == errno.EINTR:
136
- pass
137
- else:
138
- raise
@@ -1,19 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: dagster-postgres
3
- Version: 0.19.2
4
- Summary: A Dagster integration for postgres
5
- Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-postgres
6
- Author: Elementl
7
- Author-email: hello@elementl.com
8
- License: Apache-2.0
9
- Classifier: Programming Language :: Python :: 3.7
10
- Classifier: Programming Language :: Python :: 3.8
11
- Classifier: Programming Language :: Python :: 3.9
12
- Classifier: Programming Language :: Python :: 3.10
13
- Classifier: Programming Language :: Python :: 3.11
14
- Classifier: License :: OSI Approved :: Apache Software License
15
- Classifier: Operating System :: OS Independent
16
- License-File: LICENSE
17
- Requires-Dist: dagster (==1.3.2)
18
- Requires-Dist: psycopg2-binary
19
-
@@ -1,19 +0,0 @@
1
- dagster_postgres/__init__.py,sha256=LEcXwiGeuZgIvfDg0PVcRuKjUmg0t9Uslx5nLZ8Yg08,480
2
- dagster_postgres/py.typed,sha256=la67KBlbjXN-_-DfGNcdOcjYumVpKG_Tkw-8n5dnGB4,8
3
- dagster_postgres/pynotify.py,sha256=kpAZ24THE1qYQIxScUk21Mp3QV9qd92P3CMXYGEgRIU,5004
4
- dagster_postgres/storage.py,sha256=cEr8Hkab9jgLzimC-efkSwwQXWpVKUvN-RHhf4IpuB8,4181
5
- dagster_postgres/utils.py,sha256=b_KukmD2puOJdvTFy0OUZtM5sGWyE1Fq6nP9hmUmRCc,5798
6
- dagster_postgres/version.py,sha256=qZNYBPCf5y9eOSOCnc71mXgsDo0Xwb9v1HTFq7X3sdU,23
7
- dagster_postgres/alembic/alembic.ini,sha256=S0-B6cHIaS_CJaJc2dd0NiQfDLIWfvaZJGZYQsH7tSY,1040
8
- dagster_postgres/event_log/__init__.py,sha256=T95tRLYxnC-g69K_5G9i1NLTXi3pGexLVKeGug8niMA,146
9
- dagster_postgres/event_log/event_log.py,sha256=tSBNxccx8bXFMDcWYPNqBpjG-EbF86zgFNZ_Ix3-ttc,13001
10
- dagster_postgres/event_log/event_watcher.py,sha256=59TuxkNX56Va54GJFqdQSzSCmA14onS_4l5IFYbRxb8,5346
11
- dagster_postgres/run_storage/__init__.py,sha256=4bI2C2kkZO9glXH8jIUViYBRSeMjdDHiVQVi_k1HkYw,66
12
- dagster_postgres/run_storage/run_storage.py,sha256=x35qMKJVhOtEw-CiYiHBw8K4WcdCd8AUYYao7mYMQiQ,8867
13
- dagster_postgres/schedule_storage/__init__.py,sha256=GKJlFZYh27fUxJDMQwNJUwibbTnip1ledbcokczutmk,81
14
- dagster_postgres/schedule_storage/schedule_storage.py,sha256=UDKJ1ZEWYtNWHACI8ap1mZlfB8WXSwGtNgqgXrleSLU,7045
15
- dagster_postgres-0.19.2.dist-info/LICENSE,sha256=-gtoVIAZYUHYmNHISZg982FI4Oh19mV1nxgTVW8eCB8,11344
16
- dagster_postgres-0.19.2.dist-info/METADATA,sha256=whK5RB_yBbNjR367aSYLqTOh80XZ9mEzhdXg7pFo5aI,727
17
- dagster_postgres-0.19.2.dist-info/WHEEL,sha256=p46_5Uhzqz6AzeSosiOnxK-zmFja1i22CrQCjmYe8ec,92
18
- dagster_postgres-0.19.2.dist-info/top_level.txt,sha256=lScMtAEKDX1yIv2tGa1nzntBa0HEStfWPfCwD8FWlHk,17
19
- dagster_postgres-0.19.2.dist-info/RECORD,,