dagster 1.12.11__py3-none-any.whl → 1.12.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster/_cli/asset.py +15 -4
- dagster/_cli/job.py +8 -3
- dagster/_core/asset_graph_view/asset_graph_view.py +83 -19
- dagster/_core/asset_graph_view/entity_subset.py +14 -9
- dagster/_core/asset_graph_view/serializable_entity_subset.py +15 -0
- dagster/_core/code_pointer.py +8 -1
- dagster/_core/definitions/asset_checks/asset_check_evaluation.py +41 -68
- dagster/_core/definitions/asset_checks/asset_check_result.py +10 -0
- dagster/_core/definitions/asset_checks/asset_check_spec.py +11 -0
- dagster/_core/definitions/assets/graph/asset_graph.py +1 -0
- dagster/_core/definitions/assets/graph/base_asset_graph.py +29 -2
- dagster/_core/definitions/assets/graph/remote_asset_graph.py +9 -5
- dagster/_core/definitions/declarative_automation/legacy/valid_asset_subset.py +4 -4
- dagster/_core/definitions/declarative_automation/operands/operands.py +10 -4
- dagster/_core/definitions/declarative_automation/serialized_objects.py +36 -0
- dagster/_core/definitions/decorators/asset_check_decorator.py +6 -0
- dagster/_core/definitions/decorators/asset_decorator.py +13 -13
- dagster/_core/event_api.py +10 -0
- dagster/_core/execution/context/asset_check_execution_context.py +39 -0
- dagster/_core/execution/plan/execute_step.py +4 -3
- dagster/_core/execution/run_cancellation_thread.py +1 -0
- dagster/_core/instance/runs/run_domain.py +73 -90
- dagster/_core/remote_representation/external_data.py +6 -0
- dagster/_core/storage/asset_check_execution_record.py +49 -5
- dagster/_core/storage/asset_check_state.py +263 -0
- dagster/_core/storage/dagster_run.py +77 -0
- dagster/_core/storage/event_log/base.py +59 -1
- dagster/_core/storage/event_log/sql_event_log.py +174 -7
- dagster/_core/storage/event_log/sqlite/sqlite_event_log.py +6 -1
- dagster/_core/storage/legacy_storage.py +26 -5
- dagster/_core/telemetry.py +3 -0
- dagster/_core/workspace/load_target.py +1 -1
- dagster/_daemon/monitoring/run_monitoring.py +5 -1
- dagster/_generate/download.py +1 -0
- dagster/_utils/__init__.py +11 -0
- dagster/components/list/list.py +4 -1
- dagster/version.py +1 -1
- {dagster-1.12.11.dist-info → dagster-1.12.13.dist-info}/METADATA +4 -4
- {dagster-1.12.11.dist-info → dagster-1.12.13.dist-info}/RECORD +43 -42
- {dagster-1.12.11.dist-info → dagster-1.12.13.dist-info}/WHEEL +1 -1
- {dagster-1.12.11.dist-info → dagster-1.12.13.dist-info}/entry_points.txt +0 -0
- {dagster-1.12.11.dist-info → dagster-1.12.13.dist-info}/licenses/LICENSE +0 -0
- {dagster-1.12.11.dist-info → dagster-1.12.13.dist-info}/top_level.txt +0 -0
dagster/_cli/asset.py
CHANGED
|
@@ -43,7 +43,11 @@ def asset_cli():
|
|
|
43
43
|
@click.option("--partition", help="Asset partition to target", required=False)
|
|
44
44
|
@click.option(
|
|
45
45
|
"--partition-range",
|
|
46
|
-
help=
|
|
46
|
+
help=(
|
|
47
|
+
"Asset partition range to materialize in the format <start>...<end>. "
|
|
48
|
+
"Requires all assets to have a BackfillPolicy.single_run() policy, which allows "
|
|
49
|
+
"the partition range to be executed in a single run. For example: 2025-01-01...2025-01-05"
|
|
50
|
+
),
|
|
47
51
|
required=False,
|
|
48
52
|
)
|
|
49
53
|
@click.option(
|
|
@@ -163,12 +167,18 @@ def execute_materialize_command(
|
|
|
163
167
|
for asset_key in asset_keys:
|
|
164
168
|
backfill_policy = implicit_job_def.asset_layer.get(asset_key).backfill_policy
|
|
165
169
|
if (
|
|
166
|
-
backfill_policy is
|
|
167
|
-
|
|
170
|
+
backfill_policy is None
|
|
171
|
+
or backfill_policy.policy_type != BackfillPolicyType.SINGLE_RUN
|
|
168
172
|
):
|
|
169
173
|
check.failed(
|
|
170
|
-
"
|
|
174
|
+
"Partition ranges with the CLI require all selected assets to have a "
|
|
175
|
+
"BackfillPolicy.single_run() policy. This allows the partition range to be "
|
|
176
|
+
"executed in a single run. Assets without this policy would require creating "
|
|
177
|
+
"a backfill with separate runs per partition, which needs a running daemon "
|
|
178
|
+
"process. Consider using the Dagster UI or a running daemon to execute "
|
|
179
|
+
"partition ranges for assets without a single-run backfill policy."
|
|
171
180
|
)
|
|
181
|
+
|
|
172
182
|
try:
|
|
173
183
|
implicit_job_def.validate_partition_key(
|
|
174
184
|
partition_range_start, selected_asset_keys=asset_keys, context=context
|
|
@@ -181,6 +191,7 @@ def execute_materialize_command(
|
|
|
181
191
|
"All selected assets must have a PartitionsDefinition containing the passed"
|
|
182
192
|
f" partition key `{partition_range_start}` or have no PartitionsDefinition."
|
|
183
193
|
)
|
|
194
|
+
|
|
184
195
|
tags = {
|
|
185
196
|
ASSET_PARTITION_RANGE_START_TAG: partition_range_start,
|
|
186
197
|
ASSET_PARTITION_RANGE_END_TAG: partition_range_end,
|
dagster/_cli/job.py
CHANGED
|
@@ -405,11 +405,16 @@ def execute_execute_command(
|
|
|
405
405
|
for asset_key in job_def.asset_layer.executable_asset_keys:
|
|
406
406
|
backfill_policy = job_def.asset_layer.get(asset_key).backfill_policy
|
|
407
407
|
if (
|
|
408
|
-
backfill_policy is
|
|
409
|
-
|
|
408
|
+
backfill_policy is None
|
|
409
|
+
or backfill_policy.policy_type != BackfillPolicyType.SINGLE_RUN
|
|
410
410
|
):
|
|
411
411
|
check.failed(
|
|
412
|
-
"
|
|
412
|
+
"Partition ranges with the CLI require all selected assets to have a "
|
|
413
|
+
"BackfillPolicy.single_run() policy. This allows the partition range to be "
|
|
414
|
+
"executed in a single run. Assets without this policy would require creating "
|
|
415
|
+
"a backfill with separate runs per partition, which needs a running daemon "
|
|
416
|
+
"process. Consider using the Dagster UI or a running daemon to execute "
|
|
417
|
+
"partition ranges for assets without a single-run backfill policy."
|
|
413
418
|
)
|
|
414
419
|
try:
|
|
415
420
|
job_def.validate_partition_key(
|
|
@@ -21,6 +21,7 @@ from dagster._core.definitions.events import AssetKeyPartitionKey
|
|
|
21
21
|
from dagster._core.definitions.freshness import FreshnessState
|
|
22
22
|
from dagster._core.definitions.partitions.context import (
|
|
23
23
|
PartitionLoadingContext,
|
|
24
|
+
partition_loading_context,
|
|
24
25
|
use_partition_loading_context,
|
|
25
26
|
)
|
|
26
27
|
from dagster._core.definitions.partitions.definition import (
|
|
@@ -162,10 +163,7 @@ class AssetGraphView(LoadingContext):
|
|
|
162
163
|
return self._queryer
|
|
163
164
|
|
|
164
165
|
def _get_partitions_def(self, key: T_EntityKey) -> Optional["PartitionsDefinition"]:
|
|
165
|
-
|
|
166
|
-
return self.asset_graph.get(key).partitions_def
|
|
167
|
-
else:
|
|
168
|
-
return None
|
|
166
|
+
return self.asset_graph.get(key).partitions_def
|
|
169
167
|
|
|
170
168
|
@cached_method
|
|
171
169
|
@use_partition_loading_context
|
|
@@ -374,6 +372,16 @@ class AssetGraphView(LoadingContext):
|
|
|
374
372
|
)
|
|
375
373
|
return EntitySubset(self, key=key, value=_ValidatedEntitySubsetValue(value))
|
|
376
374
|
|
|
375
|
+
@use_partition_loading_context
|
|
376
|
+
def get_subset_from_partition_keys(
|
|
377
|
+
self,
|
|
378
|
+
key: T_EntityKey,
|
|
379
|
+
partitions_def: "PartitionsDefinition",
|
|
380
|
+
partition_keys: AbstractSet[str],
|
|
381
|
+
) -> EntitySubset[T_EntityKey]:
|
|
382
|
+
value = partitions_def.subset_with_partition_keys(partition_keys)
|
|
383
|
+
return EntitySubset(self, key=key, value=_ValidatedEntitySubsetValue(value))
|
|
384
|
+
|
|
377
385
|
@use_partition_loading_context
|
|
378
386
|
def compute_parent_subset_and_required_but_nonexistent_subset(
|
|
379
387
|
self, parent_key, subset: EntitySubset[T_EntityKey]
|
|
@@ -548,11 +556,22 @@ class AssetGraphView(LoadingContext):
|
|
|
548
556
|
check.failed(f"Unsupported partitions_def: {partitions_def}")
|
|
549
557
|
|
|
550
558
|
async def compute_subset_with_status(
|
|
551
|
-
self,
|
|
552
|
-
|
|
559
|
+
self,
|
|
560
|
+
key: AssetCheckKey,
|
|
561
|
+
status: Optional["AssetCheckExecutionResolvedStatus"],
|
|
562
|
+
from_subset: EntitySubset,
|
|
563
|
+
) -> EntitySubset[AssetCheckKey]:
|
|
564
|
+
"""Returns the subset of an asset check that matches a given status."""
|
|
553
565
|
from dagster._core.storage.event_log.base import AssetCheckSummaryRecord
|
|
554
566
|
|
|
555
|
-
|
|
567
|
+
# Handle partitioned asset checks
|
|
568
|
+
if self._get_partitions_def(key):
|
|
569
|
+
with partition_loading_context(new_ctx=self._partition_loading_context):
|
|
570
|
+
return await self._get_partitioned_check_subset_with_status(
|
|
571
|
+
key, status, from_subset
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
# Handle non-partitioned asset checks with existing logic
|
|
556
575
|
summary = await AssetCheckSummaryRecord.gen(self, key)
|
|
557
576
|
latest_record = summary.last_check_execution_record if summary else None
|
|
558
577
|
resolved_status = (
|
|
@@ -586,31 +605,64 @@ class AssetGraphView(LoadingContext):
|
|
|
586
605
|
return self.get_empty_subset(key=key)
|
|
587
606
|
|
|
588
607
|
async def _compute_run_in_progress_check_subset(
|
|
589
|
-
self, key: AssetCheckKey
|
|
608
|
+
self, key: AssetCheckKey, from_subset: EntitySubset
|
|
590
609
|
) -> EntitySubset[AssetCheckKey]:
|
|
591
610
|
from dagster._core.storage.asset_check_execution_record import (
|
|
592
611
|
AssetCheckExecutionResolvedStatus,
|
|
593
612
|
)
|
|
594
613
|
|
|
595
614
|
return await self.compute_subset_with_status(
|
|
596
|
-
key, AssetCheckExecutionResolvedStatus.IN_PROGRESS
|
|
615
|
+
key, AssetCheckExecutionResolvedStatus.IN_PROGRESS, from_subset
|
|
597
616
|
)
|
|
598
617
|
|
|
599
618
|
async def _compute_execution_failed_check_subset(
|
|
600
|
-
self, key: AssetCheckKey
|
|
619
|
+
self, key: AssetCheckKey, from_subset: EntitySubset
|
|
601
620
|
) -> EntitySubset[AssetCheckKey]:
|
|
602
621
|
from dagster._core.storage.asset_check_execution_record import (
|
|
603
622
|
AssetCheckExecutionResolvedStatus,
|
|
604
623
|
)
|
|
605
624
|
|
|
606
625
|
return await self.compute_subset_with_status(
|
|
607
|
-
key, AssetCheckExecutionResolvedStatus.EXECUTION_FAILED
|
|
626
|
+
key, AssetCheckExecutionResolvedStatus.EXECUTION_FAILED, from_subset
|
|
608
627
|
)
|
|
609
628
|
|
|
610
629
|
async def _compute_missing_check_subset(
|
|
611
|
-
self, key: AssetCheckKey
|
|
630
|
+
self, key: AssetCheckKey, from_subset: EntitySubset
|
|
631
|
+
) -> EntitySubset[AssetCheckKey]:
|
|
632
|
+
return await self.compute_subset_with_status(key, None, from_subset)
|
|
633
|
+
|
|
634
|
+
@use_partition_loading_context
|
|
635
|
+
async def _get_partitioned_check_subset_with_status(
|
|
636
|
+
self,
|
|
637
|
+
key: AssetCheckKey,
|
|
638
|
+
status: Optional["AssetCheckExecutionResolvedStatus"],
|
|
639
|
+
from_subset: EntitySubset,
|
|
612
640
|
) -> EntitySubset[AssetCheckKey]:
|
|
613
|
-
|
|
641
|
+
from dagster._core.storage.asset_check_state import AssetCheckState
|
|
642
|
+
|
|
643
|
+
check_node = self.asset_graph.get(key)
|
|
644
|
+
if not check_node or not check_node.partitions_def:
|
|
645
|
+
check.failed(f"Asset check {key} not found or not partitioned.")
|
|
646
|
+
|
|
647
|
+
cache_value = (
|
|
648
|
+
await AssetCheckState.gen(self, (key, check_node.partitions_def))
|
|
649
|
+
or AssetCheckState.empty()
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
if status is None:
|
|
653
|
+
known_statuses = self.get_empty_subset(key=key)
|
|
654
|
+
for serializable_subset in cache_value.subsets.values():
|
|
655
|
+
subset = self.get_subset_from_serializable_subset(serializable_subset)
|
|
656
|
+
if subset:
|
|
657
|
+
known_statuses = known_statuses.compute_union(subset)
|
|
658
|
+
return from_subset.compute_difference(known_statuses) or self.get_empty_subset(key=key)
|
|
659
|
+
else:
|
|
660
|
+
serializable_subset = cache_value.subsets.get(status)
|
|
661
|
+
if serializable_subset is None:
|
|
662
|
+
return self.get_empty_subset(key=key)
|
|
663
|
+
return self.get_subset_from_serializable_subset(
|
|
664
|
+
serializable_subset
|
|
665
|
+
) or self.get_empty_subset(key=key)
|
|
614
666
|
|
|
615
667
|
async def _compute_run_in_progress_asset_subset(self, key: AssetKey) -> EntitySubset[AssetKey]:
|
|
616
668
|
from dagster._core.storage.partition_status_cache import AssetStatusCacheValue
|
|
@@ -735,15 +787,21 @@ class AssetGraphView(LoadingContext):
|
|
|
735
787
|
)
|
|
736
788
|
|
|
737
789
|
@cached_method
|
|
738
|
-
async def compute_run_in_progress_subset(
|
|
790
|
+
async def compute_run_in_progress_subset(
|
|
791
|
+
self, *, key: EntityKey, from_subset: EntitySubset
|
|
792
|
+
) -> EntitySubset:
|
|
739
793
|
return await _dispatch(
|
|
740
794
|
key=key,
|
|
741
|
-
check_method=
|
|
795
|
+
check_method=functools.partial(
|
|
796
|
+
self._compute_run_in_progress_check_subset, from_subset=from_subset
|
|
797
|
+
),
|
|
742
798
|
asset_method=self._compute_run_in_progress_asset_subset,
|
|
743
799
|
)
|
|
744
800
|
|
|
745
801
|
@cached_method
|
|
746
|
-
async def compute_backfill_in_progress_subset(
|
|
802
|
+
async def compute_backfill_in_progress_subset(
|
|
803
|
+
self, *, key: EntityKey, from_subset: EntitySubset
|
|
804
|
+
) -> EntitySubset:
|
|
747
805
|
async def get_empty_subset(key: EntityKey) -> EntitySubset:
|
|
748
806
|
return self.get_empty_subset(key=key)
|
|
749
807
|
|
|
@@ -755,10 +813,14 @@ class AssetGraphView(LoadingContext):
|
|
|
755
813
|
)
|
|
756
814
|
|
|
757
815
|
@cached_method
|
|
758
|
-
async def compute_execution_failed_subset(
|
|
816
|
+
async def compute_execution_failed_subset(
|
|
817
|
+
self, *, key: EntityKey, from_subset: EntitySubset
|
|
818
|
+
) -> EntitySubset:
|
|
759
819
|
return await _dispatch(
|
|
760
820
|
key=key,
|
|
761
|
-
check_method=
|
|
821
|
+
check_method=functools.partial(
|
|
822
|
+
self._compute_execution_failed_check_subset, from_subset=from_subset
|
|
823
|
+
),
|
|
762
824
|
asset_method=self._compute_execution_failed_asset_subset,
|
|
763
825
|
)
|
|
764
826
|
|
|
@@ -768,7 +830,9 @@ class AssetGraphView(LoadingContext):
|
|
|
768
830
|
) -> EntitySubset:
|
|
769
831
|
return await _dispatch(
|
|
770
832
|
key=key,
|
|
771
|
-
check_method=
|
|
833
|
+
check_method=functools.partial(
|
|
834
|
+
self._compute_missing_check_subset, from_subset=from_subset
|
|
835
|
+
),
|
|
772
836
|
asset_method=functools.partial(
|
|
773
837
|
self._compute_missing_asset_subset, from_subset=from_subset
|
|
774
838
|
),
|
|
@@ -69,10 +69,13 @@ class EntitySubset(Generic[T_EntityKey]):
|
|
|
69
69
|
return SerializableEntitySubset(key=self._key, value=self._value)
|
|
70
70
|
|
|
71
71
|
def expensively_compute_partition_keys(self) -> AbstractSet[str]:
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
72
|
+
internal_value = self.get_internal_value()
|
|
73
|
+
if isinstance(internal_value, PartitionsSubset):
|
|
74
|
+
return set(internal_value.get_partition_keys())
|
|
75
|
+
elif internal_value:
|
|
76
|
+
check.failed("Subset is not partitioned")
|
|
77
|
+
else:
|
|
78
|
+
return set()
|
|
76
79
|
|
|
77
80
|
def expensively_compute_asset_partitions(self) -> AbstractSet[AssetKeyPartitionKey]:
|
|
78
81
|
if not isinstance(self.key, AssetKey):
|
|
@@ -106,11 +109,13 @@ class EntitySubset(Generic[T_EntityKey]):
|
|
|
106
109
|
return self._oper(other, operator.and_)
|
|
107
110
|
|
|
108
111
|
def compute_intersection_with_partition_keys(
|
|
109
|
-
self: "EntitySubset[
|
|
110
|
-
) -> "EntitySubset[
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
112
|
+
self: "EntitySubset[T_EntityKey]", partition_keys: AbstractSet[str]
|
|
113
|
+
) -> "EntitySubset[T_EntityKey]":
|
|
114
|
+
if self.partitions_def is None:
|
|
115
|
+
return self._asset_graph_view.get_empty_subset(key=self.key)
|
|
116
|
+
|
|
117
|
+
partition_subset = self._asset_graph_view.get_subset_from_partition_keys(
|
|
118
|
+
self.key, self.partitions_def, partition_keys
|
|
114
119
|
)
|
|
115
120
|
return self.compute_intersection(partition_subset)
|
|
116
121
|
|
|
@@ -50,6 +50,12 @@ class SerializableEntitySubset(Generic[T_EntityKey]):
|
|
|
50
50
|
key: T_EntityKey
|
|
51
51
|
value: EntitySubsetValue
|
|
52
52
|
|
|
53
|
+
@classmethod
|
|
54
|
+
def empty(
|
|
55
|
+
cls, key: T_EntityKey, partitions_def: Optional[PartitionsDefinition]
|
|
56
|
+
) -> "SerializableEntitySubset[T_EntityKey]":
|
|
57
|
+
return cls(key=key, value=partitions_def.empty_subset() if partitions_def else False)
|
|
58
|
+
|
|
53
59
|
@classmethod
|
|
54
60
|
def from_coercible_value(
|
|
55
61
|
cls,
|
|
@@ -132,6 +138,10 @@ class SerializableEntitySubset(Generic[T_EntityKey]):
|
|
|
132
138
|
def is_compatible_with_partitions_def(
|
|
133
139
|
self, partitions_def: Optional[PartitionsDefinition]
|
|
134
140
|
) -> bool:
|
|
141
|
+
from dagster._core.definitions.partitions.definition.time_window import (
|
|
142
|
+
TimeWindowPartitionsDefinition,
|
|
143
|
+
)
|
|
144
|
+
|
|
135
145
|
if self.is_partitioned:
|
|
136
146
|
# for some PartitionSubset types, we have access to the underlying partitions
|
|
137
147
|
# definitions, so we can ensure those are identical
|
|
@@ -150,6 +160,11 @@ class SerializableEntitySubset(Generic[T_EntityKey]):
|
|
|
150
160
|
and partitions_def.has_partition_key(r.end)
|
|
151
161
|
for r in self.value.key_ranges
|
|
152
162
|
)
|
|
163
|
+
elif isinstance(self.value, DefaultPartitionsSubset) and isinstance(
|
|
164
|
+
partitions_def, TimeWindowPartitionsDefinition
|
|
165
|
+
):
|
|
166
|
+
return all(partitions_def.has_partition_key(k) for k in self.value.subset)
|
|
167
|
+
|
|
153
168
|
else:
|
|
154
169
|
return partitions_def is not None
|
|
155
170
|
else:
|
dagster/_core/code_pointer.py
CHANGED
|
@@ -2,6 +2,7 @@ import importlib
|
|
|
2
2
|
import inspect
|
|
3
3
|
import os
|
|
4
4
|
import sys
|
|
5
|
+
import uuid
|
|
5
6
|
from abc import ABC, abstractmethod
|
|
6
7
|
from collections.abc import Callable, Sequence
|
|
7
8
|
from pathlib import Path
|
|
@@ -59,7 +60,11 @@ def rebase_file(relative_path_in_file: str, file_path_resides_in: str) -> str:
|
|
|
59
60
|
)
|
|
60
61
|
|
|
61
62
|
|
|
62
|
-
def load_python_file(
|
|
63
|
+
def load_python_file(
|
|
64
|
+
python_file: Union[str, Path],
|
|
65
|
+
working_directory: Optional[str],
|
|
66
|
+
add_uuid_suffix: bool = False,
|
|
67
|
+
) -> ModuleType:
|
|
63
68
|
"""Takes a path to a python file and returns a loaded module."""
|
|
64
69
|
check.inst_param(python_file, "python_file", (str, Path))
|
|
65
70
|
check.opt_str_param(working_directory, "working_directory")
|
|
@@ -68,6 +73,8 @@ def load_python_file(python_file: Union[str, Path], working_directory: Optional[
|
|
|
68
73
|
os.stat(python_file)
|
|
69
74
|
|
|
70
75
|
module_name = os.path.splitext(os.path.basename(python_file))[0]
|
|
76
|
+
if add_uuid_suffix:
|
|
77
|
+
module_name = f"{module_name}_{uuid.uuid4().hex}"
|
|
71
78
|
|
|
72
79
|
# Use the passed in working directory for local imports (sys.path[0] isn't
|
|
73
80
|
# consistently set in the different entry points that Dagster uses to import code)
|
|
@@ -1,34 +1,27 @@
|
|
|
1
1
|
from collections.abc import Mapping
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import dagster_shared.check as check
|
|
5
|
+
from dagster_shared.record import IHaveNew, record, record_custom, replace
|
|
3
6
|
|
|
4
|
-
import dagster._check as check
|
|
5
7
|
from dagster._core.definitions.asset_checks.asset_check_spec import (
|
|
6
8
|
AssetCheckKey,
|
|
7
9
|
AssetCheckSeverity,
|
|
8
10
|
)
|
|
9
11
|
from dagster._core.definitions.events import AssetKey, MetadataValue, RawMetadataValue
|
|
10
12
|
from dagster._core.definitions.metadata import normalize_metadata
|
|
13
|
+
from dagster._core.definitions.partitions.subset import PartitionsSubset
|
|
11
14
|
from dagster._serdes import whitelist_for_serdes
|
|
12
15
|
|
|
13
16
|
|
|
14
17
|
@whitelist_for_serdes
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
"_AssetCheckEvaluationPlanned",
|
|
18
|
-
[
|
|
19
|
-
("asset_key", AssetKey),
|
|
20
|
-
("check_name", str),
|
|
21
|
-
],
|
|
22
|
-
)
|
|
23
|
-
):
|
|
18
|
+
@record
|
|
19
|
+
class AssetCheckEvaluationPlanned:
|
|
24
20
|
"""Metadata for the event when an asset check is launched."""
|
|
25
21
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
asset_key=check.inst_param(asset_key, "asset_key", AssetKey),
|
|
30
|
-
check_name=check.str_param(check_name, "check_name"),
|
|
31
|
-
)
|
|
22
|
+
asset_key: AssetKey
|
|
23
|
+
check_name: str
|
|
24
|
+
partitions_subset: Optional[PartitionsSubset] = None
|
|
32
25
|
|
|
33
26
|
@property
|
|
34
27
|
def asset_check_key(self) -> AssetCheckKey:
|
|
@@ -36,46 +29,18 @@ class AssetCheckEvaluationPlanned(
|
|
|
36
29
|
|
|
37
30
|
|
|
38
31
|
@whitelist_for_serdes
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
"_AssetCheckEvaluationTargetMaterializationData",
|
|
42
|
-
[
|
|
43
|
-
("storage_id", int),
|
|
44
|
-
("run_id", str),
|
|
45
|
-
("timestamp", float),
|
|
46
|
-
],
|
|
47
|
-
)
|
|
48
|
-
):
|
|
32
|
+
@record
|
|
33
|
+
class AssetCheckEvaluationTargetMaterializationData:
|
|
49
34
|
"""A pointer to the latest materialization at execution time of an asset check."""
|
|
50
35
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
storage_id=check.int_param(storage_id, "storage_id"),
|
|
55
|
-
run_id=check.str_param(run_id, "run_id"),
|
|
56
|
-
timestamp=check.float_param(timestamp, "timestamp"),
|
|
57
|
-
)
|
|
36
|
+
storage_id: int
|
|
37
|
+
run_id: str
|
|
38
|
+
timestamp: float
|
|
58
39
|
|
|
59
40
|
|
|
60
41
|
@whitelist_for_serdes(storage_field_names={"passed": "success"})
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
"_AssetCheckEvaluation",
|
|
64
|
-
[
|
|
65
|
-
("asset_key", AssetKey),
|
|
66
|
-
("check_name", str),
|
|
67
|
-
("passed", bool),
|
|
68
|
-
("metadata", Mapping[str, MetadataValue]),
|
|
69
|
-
(
|
|
70
|
-
"target_materialization_data",
|
|
71
|
-
Optional[AssetCheckEvaluationTargetMaterializationData],
|
|
72
|
-
),
|
|
73
|
-
("severity", AssetCheckSeverity),
|
|
74
|
-
("description", Optional[str]),
|
|
75
|
-
("blocking", Optional[bool]),
|
|
76
|
-
],
|
|
77
|
-
)
|
|
78
|
-
):
|
|
42
|
+
@record_custom
|
|
43
|
+
class AssetCheckEvaluation(IHaveNew):
|
|
79
44
|
"""Represents the outcome of a evaluating an asset check.
|
|
80
45
|
|
|
81
46
|
Args:
|
|
@@ -97,8 +62,20 @@ class AssetCheckEvaluation(
|
|
|
97
62
|
A text description of the result of the check evaluation.
|
|
98
63
|
blocking (Optional[bool]):
|
|
99
64
|
Whether the check is blocking.
|
|
65
|
+
partition (Optional[str]):
|
|
66
|
+
The partition that the check was evaluated on, if applicable.
|
|
100
67
|
"""
|
|
101
68
|
|
|
69
|
+
asset_key: AssetKey
|
|
70
|
+
check_name: str
|
|
71
|
+
passed: bool
|
|
72
|
+
metadata: Mapping[str, MetadataValue]
|
|
73
|
+
target_materialization_data: Optional[AssetCheckEvaluationTargetMaterializationData]
|
|
74
|
+
severity: AssetCheckSeverity
|
|
75
|
+
description: Optional[str]
|
|
76
|
+
blocking: Optional[bool]
|
|
77
|
+
partition: Optional[str]
|
|
78
|
+
|
|
102
79
|
def __new__(
|
|
103
80
|
cls,
|
|
104
81
|
asset_key: AssetKey,
|
|
@@ -109,25 +86,21 @@ class AssetCheckEvaluation(
|
|
|
109
86
|
severity: AssetCheckSeverity = AssetCheckSeverity.ERROR,
|
|
110
87
|
description: Optional[str] = None,
|
|
111
88
|
blocking: Optional[bool] = None,
|
|
89
|
+
partition: Optional[str] = None,
|
|
112
90
|
):
|
|
113
|
-
normed_metadata = normalize_metadata(
|
|
114
|
-
check.opt_mapping_param(metadata, "metadata", key_type=str),
|
|
115
|
-
)
|
|
116
|
-
|
|
117
91
|
return super().__new__(
|
|
118
92
|
cls,
|
|
119
|
-
asset_key=
|
|
120
|
-
check_name=
|
|
121
|
-
passed=
|
|
122
|
-
metadata=
|
|
123
|
-
|
|
124
|
-
target_materialization_data,
|
|
125
|
-
"target_materialization_data",
|
|
126
|
-
AssetCheckEvaluationTargetMaterializationData,
|
|
93
|
+
asset_key=asset_key,
|
|
94
|
+
check_name=check_name,
|
|
95
|
+
passed=passed,
|
|
96
|
+
metadata=normalize_metadata(
|
|
97
|
+
check.opt_mapping_param(metadata, "metadata", key_type=str)
|
|
127
98
|
),
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
99
|
+
target_materialization_data=target_materialization_data,
|
|
100
|
+
severity=severity,
|
|
101
|
+
description=description,
|
|
102
|
+
blocking=blocking,
|
|
103
|
+
partition=partition,
|
|
131
104
|
)
|
|
132
105
|
|
|
133
106
|
@property
|
|
@@ -138,4 +111,4 @@ class AssetCheckEvaluation(
|
|
|
138
111
|
normed_metadata = normalize_metadata(
|
|
139
112
|
check.opt_mapping_param(metadata, "metadata", key_type=str),
|
|
140
113
|
)
|
|
141
|
-
return self
|
|
114
|
+
return replace(self, metadata=normed_metadata)
|
|
@@ -172,6 +172,15 @@ class AssetCheckResult(
|
|
|
172
172
|
else:
|
|
173
173
|
target_materialization_data = None
|
|
174
174
|
|
|
175
|
+
if step_context.has_partition_key:
|
|
176
|
+
check_spec = assets_def_for_check.get_spec_for_check_key(check_key)
|
|
177
|
+
if check_spec.partitions_def is not None:
|
|
178
|
+
partition = step_context.partition_key
|
|
179
|
+
else:
|
|
180
|
+
partition = None
|
|
181
|
+
else:
|
|
182
|
+
partition = None
|
|
183
|
+
|
|
175
184
|
return AssetCheckEvaluation(
|
|
176
185
|
check_name=check_key.name,
|
|
177
186
|
asset_key=check_key.asset_key,
|
|
@@ -181,6 +190,7 @@ class AssetCheckResult(
|
|
|
181
190
|
severity=self.severity,
|
|
182
191
|
description=self.description,
|
|
183
192
|
blocking=assets_def_for_check.get_spec_for_check_key(check_key).blocking,
|
|
193
|
+
partition=partition,
|
|
184
194
|
)
|
|
185
195
|
|
|
186
196
|
def with_metadata(self, metadata: Mapping[str, RawMetadataValue]) -> "AssetCheckResult": # pyright: ignore[reportIncompatibleMethodOverride]
|
|
@@ -10,9 +10,11 @@ from dagster_shared.record import (
|
|
|
10
10
|
replace,
|
|
11
11
|
)
|
|
12
12
|
from dagster_shared.serdes import whitelist_for_serdes
|
|
13
|
+
from dagster_shared.utils.warnings import preview_warning
|
|
13
14
|
|
|
14
15
|
from dagster._annotations import PublicAttr, public
|
|
15
16
|
from dagster._core.definitions.asset_key import AssetCheckKey, AssetKey, CoercibleToAssetKey
|
|
17
|
+
from dagster._core.definitions.partitions.definition import PartitionsDefinition
|
|
16
18
|
|
|
17
19
|
if TYPE_CHECKING:
|
|
18
20
|
from dagster._core.definitions.assets.definition.asset_dep import AssetDep, CoercibleToAssetDep
|
|
@@ -58,6 +60,7 @@ class AssetCheckSpec(IHaveNew, LegacyNamedTupleMixin):
|
|
|
58
60
|
blocking: PublicAttr[bool]
|
|
59
61
|
metadata: PublicAttr[Mapping[str, Any]]
|
|
60
62
|
automation_condition: PublicAttr[Optional[LazyAutomationCondition]]
|
|
63
|
+
partitions_def: PublicAttr[Optional[PartitionsDefinition]]
|
|
61
64
|
|
|
62
65
|
"""Defines information about an asset check, except how to execute it.
|
|
63
66
|
|
|
@@ -80,6 +83,9 @@ class AssetCheckSpec(IHaveNew, LegacyNamedTupleMixin):
|
|
|
80
83
|
that multi-asset is responsible for enforcing that downstream assets within the
|
|
81
84
|
same step do not execute after a blocking asset check fails.
|
|
82
85
|
metadata (Optional[Mapping[str, Any]]): A dict of static metadata for this asset check.
|
|
86
|
+
automation_condition (Optional[AutomationCondition[AssetCheckKey]]): The AutomationCondition for this asset check.
|
|
87
|
+
partitions_def (Optional[PartitionsDefinition]): The PartitionsDefinition for this asset check. Must be either None
|
|
88
|
+
or the same as the PartitionsDefinition of the asset specified by `asset`.
|
|
83
89
|
"""
|
|
84
90
|
|
|
85
91
|
def __new__(
|
|
@@ -92,11 +98,15 @@ class AssetCheckSpec(IHaveNew, LegacyNamedTupleMixin):
|
|
|
92
98
|
blocking: bool = False,
|
|
93
99
|
metadata: Optional[Mapping[str, Any]] = None,
|
|
94
100
|
automation_condition: Optional["AutomationCondition[AssetCheckKey]"] = None,
|
|
101
|
+
partitions_def: Optional[PartitionsDefinition] = None,
|
|
95
102
|
):
|
|
96
103
|
from dagster._core.definitions.assets.definition.asset_dep import (
|
|
97
104
|
coerce_to_deps_and_check_duplicates,
|
|
98
105
|
)
|
|
99
106
|
|
|
107
|
+
if partitions_def is not None:
|
|
108
|
+
preview_warning("Specifying a partitions_def on an AssetCheckSpec")
|
|
109
|
+
|
|
100
110
|
asset_key = AssetKey.from_coercible_or_definition(asset)
|
|
101
111
|
|
|
102
112
|
additional_asset_deps = coerce_to_deps_and_check_duplicates(
|
|
@@ -119,6 +129,7 @@ class AssetCheckSpec(IHaveNew, LegacyNamedTupleMixin):
|
|
|
119
129
|
blocking=blocking,
|
|
120
130
|
metadata=metadata or {},
|
|
121
131
|
automation_condition=automation_condition,
|
|
132
|
+
partitions_def=partitions_def,
|
|
122
133
|
)
|
|
123
134
|
|
|
124
135
|
def get_python_identifier(self) -> str:
|
|
@@ -189,6 +189,7 @@ class AssetGraph(BaseAssetGraph[AssetNode]):
|
|
|
189
189
|
v.get_spec_for_check_key(k).description,
|
|
190
190
|
v.get_spec_for_check_key(k).automation_condition,
|
|
191
191
|
v.get_spec_for_check_key(k).metadata,
|
|
192
|
+
v.get_spec_for_check_key(k).partitions_def,
|
|
192
193
|
)
|
|
193
194
|
for k, v in assets_defs_by_check_key.items()
|
|
194
195
|
}
|
|
@@ -217,6 +217,7 @@ class AssetCheckNode(BaseEntityNode[AssetCheckKey]):
|
|
|
217
217
|
description: Optional[str],
|
|
218
218
|
automation_condition: Optional["AutomationCondition[AssetCheckKey]"],
|
|
219
219
|
metadata: ArbitraryMetadataMapping,
|
|
220
|
+
partitions_def: Optional[PartitionsDefinition],
|
|
220
221
|
):
|
|
221
222
|
self.key = key
|
|
222
223
|
self.blocking = blocking
|
|
@@ -224,6 +225,7 @@ class AssetCheckNode(BaseEntityNode[AssetCheckKey]):
|
|
|
224
225
|
self._additional_deps = additional_deps
|
|
225
226
|
self._description = description
|
|
226
227
|
self._metadata = metadata
|
|
228
|
+
self._partitions_def = partitions_def
|
|
227
229
|
|
|
228
230
|
@property
|
|
229
231
|
def parent_entity_keys(self) -> AbstractSet[AssetKey]:
|
|
@@ -235,8 +237,7 @@ class AssetCheckNode(BaseEntityNode[AssetCheckKey]):
|
|
|
235
237
|
|
|
236
238
|
@property
|
|
237
239
|
def partitions_def(self) -> Optional[PartitionsDefinition]:
|
|
238
|
-
|
|
239
|
-
return None
|
|
240
|
+
return self._partitions_def
|
|
240
241
|
|
|
241
242
|
@property
|
|
242
243
|
def partition_mappings(self) -> Mapping[EntityKey, PartitionMapping]:
|
|
@@ -266,6 +267,10 @@ class BaseAssetGraph(ABC, Generic[T_AssetNode]):
|
|
|
266
267
|
def asset_nodes(self) -> Iterable[T_AssetNode]:
|
|
267
268
|
return self._asset_nodes_by_key.values()
|
|
268
269
|
|
|
270
|
+
@property
|
|
271
|
+
def asset_check_nodes(self) -> Iterable[AssetCheckNode]:
|
|
272
|
+
return self._asset_check_nodes_by_key.values()
|
|
273
|
+
|
|
269
274
|
@property
|
|
270
275
|
def nodes(self) -> Iterable[BaseEntityNode]:
|
|
271
276
|
return [
|
|
@@ -668,6 +673,28 @@ class BaseAssetGraph(ABC, Generic[T_AssetNode]):
|
|
|
668
673
|
f"Invalid partition mapping from {node.key.to_user_string()} to {parent.key.to_user_string()}"
|
|
669
674
|
) from e
|
|
670
675
|
|
|
676
|
+
# Validate that asset checks have compatible partitions_def with their target asset
|
|
677
|
+
for node in self.asset_check_nodes:
|
|
678
|
+
if node.partitions_def is None:
|
|
679
|
+
continue
|
|
680
|
+
|
|
681
|
+
target_asset_key = node.key.asset_key
|
|
682
|
+
if not self.has(target_asset_key):
|
|
683
|
+
raise DagsterInvalidDefinitionError(
|
|
684
|
+
f"Partitioned asset check '{node.key.to_user_string()}' targets "
|
|
685
|
+
f"asset '{target_asset_key.to_user_string()}' "
|
|
686
|
+
"but the asset does not exist in the graph."
|
|
687
|
+
)
|
|
688
|
+
# If the check is partitioned, it must have the same partitions_def as the asset
|
|
689
|
+
if node.partitions_def != self.get(target_asset_key).partitions_def:
|
|
690
|
+
raise DagsterInvalidDefinitionError(
|
|
691
|
+
f"Asset check '{node.key.to_user_string()}' targets asset '{target_asset_key.to_user_string()}' "
|
|
692
|
+
"but has a different partitions definition. "
|
|
693
|
+
f"Asset check partitions_def: {node.partitions_def}, "
|
|
694
|
+
f"Asset partitions_def: {self.get(target_asset_key).partitions_def}. "
|
|
695
|
+
"Partitioned asset checks must have the same partitions definition as their target asset."
|
|
696
|
+
)
|
|
697
|
+
|
|
671
698
|
def upstream_key_iterator(self, asset_key: AssetKey) -> Iterator[AssetKey]:
|
|
672
699
|
"""Iterates through all asset keys which are upstream of the given key."""
|
|
673
700
|
visited: set[AssetKey] = set()
|