dagster 1.12.12__py3-none-any.whl → 1.12.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster/_core/asset_graph_view/asset_graph_view.py +83 -19
- dagster/_core/asset_graph_view/entity_subset.py +14 -9
- dagster/_core/asset_graph_view/serializable_entity_subset.py +6 -0
- dagster/_core/definitions/asset_checks/asset_check_evaluation.py +41 -68
- dagster/_core/definitions/asset_checks/asset_check_result.py +10 -0
- dagster/_core/definitions/asset_checks/asset_check_spec.py +11 -0
- dagster/_core/definitions/assets/graph/asset_graph.py +1 -0
- dagster/_core/definitions/assets/graph/base_asset_graph.py +29 -2
- dagster/_core/definitions/assets/graph/remote_asset_graph.py +9 -5
- dagster/_core/definitions/declarative_automation/legacy/valid_asset_subset.py +4 -4
- dagster/_core/definitions/declarative_automation/operands/operands.py +10 -4
- dagster/_core/definitions/decorators/asset_check_decorator.py +6 -0
- dagster/_core/event_api.py +10 -0
- dagster/_core/execution/context/asset_check_execution_context.py +39 -0
- dagster/_core/execution/plan/execute_step.py +4 -3
- dagster/_core/instance/runs/run_domain.py +73 -90
- dagster/_core/remote_representation/external_data.py +6 -0
- dagster/_core/storage/asset_check_execution_record.py +49 -5
- dagster/_core/storage/asset_check_state.py +263 -0
- dagster/_core/storage/dagster_run.py +77 -0
- dagster/_core/storage/event_log/base.py +59 -1
- dagster/_core/storage/event_log/sql_event_log.py +174 -7
- dagster/_core/storage/event_log/sqlite/sqlite_event_log.py +6 -1
- dagster/_core/storage/legacy_storage.py +26 -5
- dagster/_core/workspace/load_target.py +1 -1
- dagster/_daemon/monitoring/run_monitoring.py +5 -1
- dagster/_utils/__init__.py +11 -0
- dagster/version.py +1 -1
- {dagster-1.12.12.dist-info → dagster-1.12.13.dist-info}/METADATA +3 -3
- {dagster-1.12.12.dist-info → dagster-1.12.13.dist-info}/RECORD +34 -33
- {dagster-1.12.12.dist-info → dagster-1.12.13.dist-info}/WHEEL +1 -1
- {dagster-1.12.12.dist-info → dagster-1.12.13.dist-info}/entry_points.txt +0 -0
- {dagster-1.12.12.dist-info → dagster-1.12.13.dist-info}/licenses/LICENSE +0 -0
- {dagster-1.12.12.dist-info → dagster-1.12.13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from collections.abc import Iterable, Mapping, Sequence
|
|
3
|
+
from typing import TYPE_CHECKING, Optional, TypeAlias
|
|
4
|
+
|
|
5
|
+
from dagster_shared.record import record
|
|
6
|
+
from dagster_shared.serdes import whitelist_for_serdes
|
|
7
|
+
|
|
8
|
+
from dagster._core.asset_graph_view.serializable_entity_subset import SerializableEntitySubset
|
|
9
|
+
from dagster._core.definitions.asset_key import AssetCheckKey
|
|
10
|
+
from dagster._core.definitions.partitions.definition.partitions_definition import (
|
|
11
|
+
PartitionsDefinition,
|
|
12
|
+
)
|
|
13
|
+
from dagster._core.loader import LoadableBy, LoadingContext
|
|
14
|
+
from dagster._core.storage.asset_check_execution_record import (
|
|
15
|
+
AssetCheckExecutionRecordStatus,
|
|
16
|
+
AssetCheckExecutionResolvedStatus,
|
|
17
|
+
AssetCheckPartitionInfo,
|
|
18
|
+
)
|
|
19
|
+
from dagster._core.storage.dagster_run import FINISHED_STATUSES, DagsterRunStatus, RunsFilter
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from dagster._core.instance import DagsterInstance
|
|
23
|
+
|
|
24
|
+
StatusSubsets: TypeAlias = Mapping[
|
|
25
|
+
AssetCheckExecutionResolvedStatus, SerializableEntitySubset[AssetCheckKey]
|
|
26
|
+
]
|
|
27
|
+
InProgressRuns: TypeAlias = Mapping[str, SerializableEntitySubset[AssetCheckKey]]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@whitelist_for_serdes
|
|
31
|
+
@record
|
|
32
|
+
class AssetCheckState(LoadableBy[tuple[AssetCheckKey, Optional[PartitionsDefinition]]]):
|
|
33
|
+
latest_storage_id: int
|
|
34
|
+
subsets: StatusSubsets
|
|
35
|
+
in_progress_runs: InProgressRuns
|
|
36
|
+
|
|
37
|
+
def compatible_with(self, partitions_def: Optional[PartitionsDefinition]) -> bool:
|
|
38
|
+
subset = next(iter(self.subsets.values()), None)
|
|
39
|
+
if subset is None:
|
|
40
|
+
return True
|
|
41
|
+
return subset.is_compatible_with_partitions_def(partitions_def)
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def empty(cls) -> "AssetCheckState":
|
|
45
|
+
return cls(latest_storage_id=0, subsets={}, in_progress_runs={})
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def _blocking_batch_load(
|
|
49
|
+
cls,
|
|
50
|
+
keys: Iterable[tuple[AssetCheckKey, Optional[PartitionsDefinition]]],
|
|
51
|
+
context: LoadingContext,
|
|
52
|
+
) -> Iterable[Optional["AssetCheckState"]]:
|
|
53
|
+
keys = list(keys)
|
|
54
|
+
mapping = context.instance.event_log_storage.get_asset_check_state(keys)
|
|
55
|
+
return [mapping.get(check_key) for check_key, _ in keys]
|
|
56
|
+
|
|
57
|
+
def with_updates(
|
|
58
|
+
self,
|
|
59
|
+
key: AssetCheckKey,
|
|
60
|
+
partitions_def: Optional[PartitionsDefinition],
|
|
61
|
+
partition_records: Sequence[AssetCheckPartitionInfo],
|
|
62
|
+
run_statuses: Mapping[str, DagsterRunStatus],
|
|
63
|
+
) -> "AssetCheckState":
|
|
64
|
+
latest_storage_id = max(
|
|
65
|
+
(
|
|
66
|
+
max(r.latest_check_event_storage_id, r.latest_materialization_storage_id or 0)
|
|
67
|
+
for r in partition_records
|
|
68
|
+
),
|
|
69
|
+
default=self.latest_storage_id,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
subsets = {
|
|
73
|
+
status: self.subsets.get(status, SerializableEntitySubset.empty(key, partitions_def))
|
|
74
|
+
for status in AssetCheckExecutionResolvedStatus
|
|
75
|
+
}
|
|
76
|
+
in_progress_runs = dict(self.in_progress_runs)
|
|
77
|
+
|
|
78
|
+
# update all subsets based on the new partition records
|
|
79
|
+
subsets, in_progress_runs = _process_partition_records(
|
|
80
|
+
key, partitions_def, subsets, in_progress_runs, partition_records
|
|
81
|
+
)
|
|
82
|
+
# then check the run statuses and resolve any previously in-progress runs that have completed
|
|
83
|
+
subsets, in_progress_runs = _process_run_statuses(
|
|
84
|
+
key, partitions_def, subsets, in_progress_runs, run_statuses
|
|
85
|
+
)
|
|
86
|
+
return AssetCheckState(
|
|
87
|
+
latest_storage_id=latest_storage_id,
|
|
88
|
+
subsets=subsets,
|
|
89
|
+
in_progress_runs=in_progress_runs,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def bulk_update_asset_check_state(
|
|
94
|
+
instance: "DagsterInstance",
|
|
95
|
+
keys: Sequence[tuple[AssetCheckKey, Optional[PartitionsDefinition]]],
|
|
96
|
+
initial_states: Mapping[AssetCheckKey, "AssetCheckState"],
|
|
97
|
+
) -> Mapping[AssetCheckKey, "AssetCheckState"]:
|
|
98
|
+
check_keys = [key for key, _ in keys]
|
|
99
|
+
partitions_defs_by_key = {key: partitions_def for key, partitions_def in keys}
|
|
100
|
+
|
|
101
|
+
# we prefer to do a single fetch for all keys, so we use the minimum storage id of the initial states
|
|
102
|
+
storage_id = min((state.latest_storage_id for state in initial_states.values()), default=0)
|
|
103
|
+
infos = instance.event_log_storage.get_asset_check_partition_info(
|
|
104
|
+
check_keys, after_storage_id=storage_id
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# find the set of run ids we need to fetch to resolve the in-progress runs, and
|
|
108
|
+
# group the partition infos by check key
|
|
109
|
+
run_ids_to_fetch: set[str] = set().union(
|
|
110
|
+
*(state.in_progress_runs.keys() for state in initial_states.values())
|
|
111
|
+
)
|
|
112
|
+
infos_by_key: dict[AssetCheckKey, list[AssetCheckPartitionInfo]] = defaultdict(list)
|
|
113
|
+
for info in infos:
|
|
114
|
+
infos_by_key[info.check_key].append(info)
|
|
115
|
+
if info.latest_execution_status == AssetCheckExecutionRecordStatus.PLANNED:
|
|
116
|
+
run_ids_to_fetch.add(info.latest_planned_run_id)
|
|
117
|
+
|
|
118
|
+
# do a bulk fetch for runs across all states
|
|
119
|
+
finished_runs = (
|
|
120
|
+
instance.get_runs(
|
|
121
|
+
filters=RunsFilter(run_ids=list(run_ids_to_fetch), statuses=FINISHED_STATUSES)
|
|
122
|
+
)
|
|
123
|
+
if len(run_ids_to_fetch) > 0
|
|
124
|
+
else []
|
|
125
|
+
)
|
|
126
|
+
finished_runs_status_by_id = {run.run_id: run.status for run in finished_runs}
|
|
127
|
+
return {
|
|
128
|
+
key: initial_states[key].with_updates(
|
|
129
|
+
key, partitions_defs_by_key[key], infos_by_key[key], finished_runs_status_by_id
|
|
130
|
+
)
|
|
131
|
+
for key in check_keys
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _valid_partition_key(
|
|
136
|
+
partition_key: Optional[str], partitions_def: Optional[PartitionsDefinition]
|
|
137
|
+
) -> bool:
|
|
138
|
+
if partitions_def is None:
|
|
139
|
+
return partition_key is None
|
|
140
|
+
else:
|
|
141
|
+
return partition_key is not None and partitions_def.has_partition_key(partition_key)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _process_partition_records(
|
|
145
|
+
key: AssetCheckKey,
|
|
146
|
+
partitions_def: Optional[PartitionsDefinition],
|
|
147
|
+
subsets: StatusSubsets,
|
|
148
|
+
in_progress_runs: InProgressRuns,
|
|
149
|
+
partition_infos: Sequence[AssetCheckPartitionInfo],
|
|
150
|
+
) -> tuple[StatusSubsets, InProgressRuns]:
|
|
151
|
+
"""Returns a set of updated subsets based on new partition records and the latest materialization storage ids."""
|
|
152
|
+
new_subsets = dict(subsets)
|
|
153
|
+
new_in_progress_runs = dict(in_progress_runs)
|
|
154
|
+
|
|
155
|
+
for partition_record in partition_infos:
|
|
156
|
+
pk = partition_record.partition_key
|
|
157
|
+
if not _valid_partition_key(pk, partitions_def):
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
partition_subset = SerializableEntitySubset.from_coercible_value(key, pk, partitions_def)
|
|
161
|
+
|
|
162
|
+
if partition_record.latest_execution_status == AssetCheckExecutionRecordStatus.PLANNED:
|
|
163
|
+
# Add to IN_PROGRESS and track run
|
|
164
|
+
new_subsets[AssetCheckExecutionResolvedStatus.IN_PROGRESS] = new_subsets[
|
|
165
|
+
AssetCheckExecutionResolvedStatus.IN_PROGRESS
|
|
166
|
+
].compute_union(partition_subset)
|
|
167
|
+
run_id = partition_record.latest_planned_run_id
|
|
168
|
+
new_in_progress_runs[run_id] = new_in_progress_runs.get(
|
|
169
|
+
run_id, SerializableEntitySubset.empty(key, partitions_def)
|
|
170
|
+
).compute_union(partition_subset)
|
|
171
|
+
|
|
172
|
+
elif partition_record.latest_execution_status in (
|
|
173
|
+
AssetCheckExecutionRecordStatus.SUCCEEDED,
|
|
174
|
+
AssetCheckExecutionRecordStatus.FAILED,
|
|
175
|
+
):
|
|
176
|
+
if partition_record.is_current:
|
|
177
|
+
# Check is current, set appropriate status
|
|
178
|
+
status = (
|
|
179
|
+
AssetCheckExecutionResolvedStatus.SUCCEEDED
|
|
180
|
+
if partition_record.latest_execution_status
|
|
181
|
+
== AssetCheckExecutionRecordStatus.SUCCEEDED
|
|
182
|
+
else AssetCheckExecutionResolvedStatus.FAILED
|
|
183
|
+
)
|
|
184
|
+
new_subsets[status] = new_subsets[status].compute_union(partition_subset)
|
|
185
|
+
else:
|
|
186
|
+
# new materialization, clear the check status
|
|
187
|
+
for status in new_subsets:
|
|
188
|
+
new_subsets[status] = new_subsets[status].compute_difference(partition_subset)
|
|
189
|
+
|
|
190
|
+
return new_subsets, new_in_progress_runs
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _process_run_statuses(
|
|
194
|
+
key: AssetCheckKey,
|
|
195
|
+
partitions_def: Optional[PartitionsDefinition],
|
|
196
|
+
subsets: StatusSubsets,
|
|
197
|
+
in_progress_runs: InProgressRuns,
|
|
198
|
+
run_statuses: Mapping[str, DagsterRunStatus],
|
|
199
|
+
) -> tuple[StatusSubsets, InProgressRuns]:
|
|
200
|
+
"""Resolve in-progress runs that have completed.
|
|
201
|
+
|
|
202
|
+
This checks if any runs tracked in in_progress_runs have finished,
|
|
203
|
+
and moves their partitions to SKIPPED or EXECUTION_FAILED.
|
|
204
|
+
"""
|
|
205
|
+
if not in_progress_runs:
|
|
206
|
+
return subsets, in_progress_runs
|
|
207
|
+
|
|
208
|
+
delta_skipped, delta_execution_failed, resolved_run_ids = _resolve_in_progress_subsets(
|
|
209
|
+
key, partitions_def, in_progress_runs, run_statuses
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
new_in_progress_runs = {
|
|
213
|
+
run_id: subset
|
|
214
|
+
for run_id, subset in in_progress_runs.items()
|
|
215
|
+
if run_id not in resolved_run_ids
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
new_subsets = dict(subsets)
|
|
219
|
+
new_subsets[AssetCheckExecutionResolvedStatus.IN_PROGRESS] = (
|
|
220
|
+
new_subsets[AssetCheckExecutionResolvedStatus.IN_PROGRESS]
|
|
221
|
+
.compute_difference(delta_skipped)
|
|
222
|
+
.compute_difference(delta_execution_failed)
|
|
223
|
+
)
|
|
224
|
+
new_subsets[AssetCheckExecutionResolvedStatus.SKIPPED] = new_subsets[
|
|
225
|
+
AssetCheckExecutionResolvedStatus.SKIPPED
|
|
226
|
+
].compute_union(delta_skipped)
|
|
227
|
+
new_subsets[AssetCheckExecutionResolvedStatus.EXECUTION_FAILED] = new_subsets[
|
|
228
|
+
AssetCheckExecutionResolvedStatus.EXECUTION_FAILED
|
|
229
|
+
].compute_union(delta_execution_failed)
|
|
230
|
+
|
|
231
|
+
return new_subsets, new_in_progress_runs
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _resolve_in_progress_subsets(
|
|
235
|
+
key: AssetCheckKey,
|
|
236
|
+
partitions_def: Optional[PartitionsDefinition],
|
|
237
|
+
in_progress_runs: InProgressRuns,
|
|
238
|
+
run_statuses: Mapping[str, DagsterRunStatus],
|
|
239
|
+
) -> tuple[
|
|
240
|
+
SerializableEntitySubset[AssetCheckKey],
|
|
241
|
+
SerializableEntitySubset[AssetCheckKey],
|
|
242
|
+
set[str],
|
|
243
|
+
]:
|
|
244
|
+
"""Resolve in-progress runs that have completed.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
Tuple of (delta_skipped, delta_execution_failed, resolved_run_ids)
|
|
248
|
+
"""
|
|
249
|
+
empty_subset = SerializableEntitySubset.empty(key, partitions_def)
|
|
250
|
+
delta_skipped = empty_subset
|
|
251
|
+
delta_execution_failed = empty_subset
|
|
252
|
+
resolved_run_ids: set[str] = set()
|
|
253
|
+
|
|
254
|
+
for run_id, run_status in run_statuses.items():
|
|
255
|
+
if run_status in FINISHED_STATUSES and run_id in in_progress_runs:
|
|
256
|
+
resolved_run_ids.add(run_id)
|
|
257
|
+
run_subset = in_progress_runs[run_id]
|
|
258
|
+
if run_status == DagsterRunStatus.FAILURE:
|
|
259
|
+
delta_execution_failed = delta_execution_failed.compute_union(run_subset)
|
|
260
|
+
else:
|
|
261
|
+
delta_skipped = delta_skipped.compute_union(run_subset)
|
|
262
|
+
|
|
263
|
+
return delta_skipped, delta_execution_failed, resolved_run_ids
|
|
@@ -41,6 +41,9 @@ from dagster._utils.tags import get_boolean_tag_value
|
|
|
41
41
|
|
|
42
42
|
if TYPE_CHECKING:
|
|
43
43
|
from dagster._core.definitions.assets.graph.base_asset_graph import EntityKey
|
|
44
|
+
from dagster._core.definitions.partitions.definition.partitions_definition import (
|
|
45
|
+
PartitionsDefinition,
|
|
46
|
+
)
|
|
44
47
|
from dagster._core.definitions.schedule_definition import ScheduleDefinition
|
|
45
48
|
from dagster._core.definitions.sensor_definition import SensorDefinition
|
|
46
49
|
from dagster._core.remote_representation.external import RemoteSchedule, RemoteSensor
|
|
@@ -372,6 +375,80 @@ class DagsterRun(
|
|
|
372
375
|
def get_parent_run_id(self) -> Optional[str]:
|
|
373
376
|
return self.tags.get(PARENT_RUN_ID_TAG)
|
|
374
377
|
|
|
378
|
+
@property
|
|
379
|
+
def is_partitioned(self) -> bool:
|
|
380
|
+
from dagster._core.storage.tags import (
|
|
381
|
+
ASSET_PARTITION_RANGE_END_TAG,
|
|
382
|
+
ASSET_PARTITION_RANGE_START_TAG,
|
|
383
|
+
PARTITION_NAME_TAG,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
has_partition_tags = any(
|
|
387
|
+
self.tags.get(tag) is not None
|
|
388
|
+
for tag in [
|
|
389
|
+
PARTITION_NAME_TAG,
|
|
390
|
+
ASSET_PARTITION_RANGE_START_TAG,
|
|
391
|
+
ASSET_PARTITION_RANGE_END_TAG,
|
|
392
|
+
]
|
|
393
|
+
)
|
|
394
|
+
return has_partition_tags or self.partitions_subset is not None
|
|
395
|
+
|
|
396
|
+
def get_resolved_partitions_subset_for_events(
|
|
397
|
+
self, partitions_def: Optional["PartitionsDefinition"]
|
|
398
|
+
) -> Optional[PartitionsSubset]:
|
|
399
|
+
"""Get the partitions subset targeted by a run based on its partition tags. Does not always use the
|
|
400
|
+
partition_subset that is directly stored on the run as this can contain a KeyRangesPartitionSubset
|
|
401
|
+
which can not be deserialized without additional information.
|
|
402
|
+
"""
|
|
403
|
+
from dagster._core.definitions.partitions.definition import DynamicPartitionsDefinition
|
|
404
|
+
from dagster._core.definitions.partitions.partition_key_range import PartitionKeyRange
|
|
405
|
+
from dagster._core.errors import DagsterInvariantViolationError
|
|
406
|
+
from dagster._core.storage.tags import (
|
|
407
|
+
ASSET_PARTITION_RANGE_END_TAG,
|
|
408
|
+
ASSET_PARTITION_RANGE_START_TAG,
|
|
409
|
+
PARTITION_NAME_TAG,
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# KeyRangesPartitionsSubset cannot be deserialized without access to the instance, so ignore it for now.
|
|
413
|
+
if self.partitions_subset is not None and not isinstance(
|
|
414
|
+
self.partitions_subset, KeyRangesPartitionsSubset
|
|
415
|
+
):
|
|
416
|
+
return self.partitions_subset
|
|
417
|
+
|
|
418
|
+
# fetch information from the tags
|
|
419
|
+
partition_tag = self.tags.get(PARTITION_NAME_TAG)
|
|
420
|
+
partition_range_start = self.tags.get(ASSET_PARTITION_RANGE_START_TAG)
|
|
421
|
+
partition_range_end = self.tags.get(ASSET_PARTITION_RANGE_END_TAG)
|
|
422
|
+
|
|
423
|
+
if partition_range_start or partition_range_end:
|
|
424
|
+
if not partition_range_start or not partition_range_end:
|
|
425
|
+
raise DagsterInvariantViolationError(
|
|
426
|
+
f"Cannot have {ASSET_PARTITION_RANGE_START_TAG} or"
|
|
427
|
+
f" {ASSET_PARTITION_RANGE_END_TAG} set without the other"
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
if (
|
|
431
|
+
isinstance(partitions_def, DynamicPartitionsDefinition)
|
|
432
|
+
and partitions_def.name is None
|
|
433
|
+
):
|
|
434
|
+
raise DagsterInvariantViolationError(
|
|
435
|
+
"Creating a run targeting a partition range is not supported for assets "
|
|
436
|
+
"partitioned with function-based dynamic partitions"
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
if partitions_def is not None:
|
|
440
|
+
return partitions_def.subset_with_partition_keys(
|
|
441
|
+
partitions_def.get_partition_keys_in_range(
|
|
442
|
+
PartitionKeyRange(partition_range_start, partition_range_end),
|
|
443
|
+
)
|
|
444
|
+
).to_serializable_subset()
|
|
445
|
+
elif partition_tag and partitions_def is not None:
|
|
446
|
+
return partitions_def.subset_with_partition_keys(
|
|
447
|
+
[partition_tag]
|
|
448
|
+
).to_serializable_subset()
|
|
449
|
+
|
|
450
|
+
return None
|
|
451
|
+
|
|
375
452
|
@cached_property
|
|
376
453
|
def dagster_execution_info(self) -> Mapping[str, str]:
|
|
377
454
|
"""Key-value pairs encoding metadata about the current Dagster run, typically attached to external execution resources.
|
|
@@ -19,6 +19,7 @@ from dagster._core.event_api import (
|
|
|
19
19
|
EventLogRecord,
|
|
20
20
|
EventRecordsFilter,
|
|
21
21
|
EventRecordsResult,
|
|
22
|
+
PartitionKeyFilter,
|
|
22
23
|
RunStatusChangeRecordsFilter,
|
|
23
24
|
)
|
|
24
25
|
from dagster._core.events import DagsterEventType
|
|
@@ -35,6 +36,7 @@ from dagster._core.loader import LoadableBy, LoadingContext
|
|
|
35
36
|
from dagster._core.storage.asset_check_execution_record import (
|
|
36
37
|
AssetCheckExecutionRecord,
|
|
37
38
|
AssetCheckExecutionRecordStatus,
|
|
39
|
+
AssetCheckPartitionInfo,
|
|
38
40
|
)
|
|
39
41
|
from dagster._core.storage.dagster_run import DagsterRunStatsSnapshot
|
|
40
42
|
from dagster._core.storage.partition_status_cache import get_and_update_asset_status_cache_value
|
|
@@ -48,6 +50,7 @@ from dagster._utils.warnings import deprecation_warning
|
|
|
48
50
|
|
|
49
51
|
if TYPE_CHECKING:
|
|
50
52
|
from dagster._core.events.log import EventLogEntry
|
|
53
|
+
from dagster._core.storage.asset_check_state import AssetCheckState
|
|
51
54
|
from dagster._core.storage.partition_status_cache import AssetStatusCacheValue
|
|
52
55
|
|
|
53
56
|
|
|
@@ -633,17 +636,52 @@ class EventLogStorage(ABC, MayHaveInstanceWeakref[T_DagsterInstance]):
|
|
|
633
636
|
limit: int,
|
|
634
637
|
cursor: Optional[int] = None,
|
|
635
638
|
status: Optional[Set[AssetCheckExecutionRecordStatus]] = None,
|
|
639
|
+
partition_filter: Optional[PartitionKeyFilter] = None,
|
|
636
640
|
) -> Sequence[AssetCheckExecutionRecord]:
|
|
637
641
|
"""Get executions for one asset check, sorted by recency."""
|
|
638
642
|
pass
|
|
639
643
|
|
|
640
644
|
@abstractmethod
|
|
641
645
|
def get_latest_asset_check_execution_by_key(
|
|
642
|
-
self,
|
|
646
|
+
self,
|
|
647
|
+
check_keys: Sequence[AssetCheckKey],
|
|
648
|
+
partition_filter: Optional[PartitionKeyFilter] = None,
|
|
643
649
|
) -> Mapping[AssetCheckKey, AssetCheckExecutionRecord]:
|
|
644
650
|
"""Get the latest executions for a list of asset checks."""
|
|
645
651
|
pass
|
|
646
652
|
|
|
653
|
+
@abstractmethod
|
|
654
|
+
def get_asset_check_partition_info(
|
|
655
|
+
self,
|
|
656
|
+
keys: Sequence[AssetCheckKey],
|
|
657
|
+
after_storage_id: Optional[int] = None,
|
|
658
|
+
partition_keys: Optional[Sequence[str]] = None,
|
|
659
|
+
) -> Sequence[AssetCheckPartitionInfo]:
|
|
660
|
+
"""Get asset check partition records with execution status and planned run info."""
|
|
661
|
+
pass
|
|
662
|
+
|
|
663
|
+
def get_checkpointed_asset_check_state(
|
|
664
|
+
self, keys: Sequence[AssetCheckKey]
|
|
665
|
+
) -> Mapping[AssetCheckKey, "AssetCheckState"]:
|
|
666
|
+
"""Get the current stored asset check state for a list of asset checks and their
|
|
667
|
+
associated partitions definitions. This method is not guaranteed to return a
|
|
668
|
+
state object that is up to date with the latest events.
|
|
669
|
+
"""
|
|
670
|
+
from dagster._core.storage.asset_check_state import AssetCheckState
|
|
671
|
+
|
|
672
|
+
return {key: AssetCheckState.empty() for key in keys}
|
|
673
|
+
|
|
674
|
+
def get_asset_check_state(
|
|
675
|
+
self, keys: Sequence[tuple[AssetCheckKey, Optional[PartitionsDefinition]]]
|
|
676
|
+
) -> Mapping[AssetCheckKey, "AssetCheckState"]:
|
|
677
|
+
from dagster._core.storage.asset_check_state import bulk_update_asset_check_state
|
|
678
|
+
|
|
679
|
+
return bulk_update_asset_check_state(
|
|
680
|
+
self._instance,
|
|
681
|
+
keys,
|
|
682
|
+
initial_states=self.get_checkpointed_asset_check_state([key for key, _ in keys]),
|
|
683
|
+
)
|
|
684
|
+
|
|
647
685
|
@abstractmethod
|
|
648
686
|
def fetch_materializations(
|
|
649
687
|
self,
|
|
@@ -740,3 +778,23 @@ class EventLogStorage(ABC, MayHaveInstanceWeakref[T_DagsterInstance]):
|
|
|
740
778
|
# Base implementation of fetching pool config. To be overriden for remote storage
|
|
741
779
|
# implementations where the local instance might not match the remote instance.
|
|
742
780
|
return self._instance.get_concurrency_config().pool_config
|
|
781
|
+
|
|
782
|
+
def _get_latest_unpartitioned_materialization_storage_ids(
|
|
783
|
+
self, keys: Sequence[AssetKey]
|
|
784
|
+
) -> Mapping[AssetKey, int]:
|
|
785
|
+
# Returns a mapping of asset key to the latest recorded materialization storage id for the asset,
|
|
786
|
+
# ignoring partitioned assets. Used purely for the `get_asset_check_partition_info` method across
|
|
787
|
+
# different storage implementations.
|
|
788
|
+
asset_records = self.get_asset_records(keys)
|
|
789
|
+
latest_unpartitioned_materialization_storage_ids = {}
|
|
790
|
+
for asset_record in asset_records:
|
|
791
|
+
if (
|
|
792
|
+
asset_record.asset_entry.last_materialization_record is not None
|
|
793
|
+
and asset_record.asset_entry.last_materialization_record.event_log_entry.get_dagster_event().partition
|
|
794
|
+
is None
|
|
795
|
+
):
|
|
796
|
+
latest_unpartitioned_materialization_storage_ids[
|
|
797
|
+
asset_record.asset_entry.asset_key
|
|
798
|
+
] = asset_record.asset_entry.last_materialization_storage_id
|
|
799
|
+
|
|
800
|
+
return latest_unpartitioned_materialization_storage_ids
|