dagster-datacontract 0.1.2__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-datacontract
3
- Version: 0.1.2
3
+ Version: 0.2.1
4
4
  Summary: Load metadata and asset check spesifications from data contracts.
5
5
  Author-email: Fredrik Bakken <fredrik@dataheim.io>
6
6
  Requires-Python: >=3.11.10
@@ -52,7 +52,7 @@ data_contract = DataContractLoader(
52
52
  name=asset_name,
53
53
  metadata=data_contract.metadata,
54
54
  tags=data_contract.tags,
55
- description=data_contract.description,
55
+ description=data_contract.load_description(),
56
56
  owners=data_contract.owner,
57
57
  code_version=data_contract.version,
58
58
  )
@@ -39,7 +39,7 @@ data_contract = DataContractLoader(
39
39
  name=asset_name,
40
40
  metadata=data_contract.metadata,
41
41
  tags=data_contract.tags,
42
- description=data_contract.description,
42
+ description=data_contract.load_description(),
43
43
  owners=data_contract.owner,
44
44
  code_version=data_contract.version,
45
45
  )
@@ -23,7 +23,6 @@ class DataContractLoader:
23
23
  )
24
24
  self.metadata = self._load_metadata()
25
25
  self.tags = self._load_tags()
26
- self.description = self._load_description()
27
26
  self.owner = self._load_owner()
28
27
  self.version = self._load_version()
29
28
  self.cron_schedule = self._load_cron_schedule()
@@ -76,23 +75,6 @@ class DataContractLoader:
76
75
 
77
76
  return tags
78
77
 
79
- def _load_description(self) -> str | None:
80
- model_description = self.data_contract_specification.models.get(
81
- self.asset_name
82
- ).description.replace("\n", "\n\n")
83
- info_description = self.data_contract_specification.info.description.replace(
84
- "\n", "\n\n"
85
- )
86
-
87
- if model_description and info_description:
88
- return f"{model_description}\n\n{info_description}"
89
- elif model_description:
90
- return textwrap.dedent(model_description)
91
- elif info_description:
92
- return textwrap.dedent(info_description)
93
-
94
- return None
95
-
96
78
  def _load_owner(self) -> list[str] | None:
97
79
  owner = self.data_contract_specification.info.owner
98
80
 
@@ -112,7 +94,69 @@ class DataContractLoader:
112
94
  except AttributeError:
113
95
  return None
114
96
 
97
+ def load_description(
98
+ self, config: dict[str, Any] | None = None, separator: str = "\n"
99
+ ) -> str | None:
100
+ """Load and return a formatted description string based on the data contract specification.
101
+
102
+ This method composes a description by pulling text from different parts
103
+ of the data contract specification (e.g., model and info descriptions),
104
+ joining them using the specified separator.
105
+
106
+ Args:
107
+ config (dict[str, Any] | None, optional): A configuration dictionary
108
+ specifying the order in which to concatenate the description parts.
109
+ Defaults to `{"order": ["model", "info"]}`.
110
+ separator (str, optional): A string used to separate different parts
111
+ of the description. Defaults to a newline character (`"\n"`).
112
+
113
+ Returns:
114
+ str | None: A single string combining the specified description parts
115
+ if available, otherwise `None`.
116
+
117
+
118
+ Example:
119
+ >>> self.load_description()
120
+ 'Model description...\nInfo description...'
121
+ """
122
+ default_config = {"order": ["model", "info"]}
123
+
124
+ configuration = default_config | (config or {})
125
+
126
+ descriptions = {
127
+ "model": self.data_contract_specification.models.get(
128
+ self.asset_name
129
+ ).description,
130
+ "info": self.data_contract_specification.info.description,
131
+ }
132
+
133
+ parts = []
134
+ for key in configuration["order"]:
135
+ desc = descriptions.get(key).replace("\n", f"{separator}\n")
136
+ if desc:
137
+ parts.append(textwrap.dedent(desc))
138
+
139
+ if parts:
140
+ return f"{separator}\n".join(parts)
141
+
142
+ return None
143
+
115
144
  def load_data_quality_checks(self) -> dg.AssetChecksDefinition:
145
+ """Define and return a data quality check for the specified asset.
146
+
147
+ This method registers a data quality check using the `@dg.asset_check`
148
+ decorator. The check runs the data contract's `test()` method and returns
149
+ the result as a `dg.AssetCheckResult`. The result is considered "passed"
150
+ if the test outcome matches `ResultEnum.passed`.
151
+
152
+ The check is marked as blocking, which means failures may halt downstream
153
+ processing in a data pipeline.
154
+
155
+ Returns:
156
+ dg.AssetChecksDefinition: The defined asset quality check function,
157
+ registered with Dagster's data quality framework.
158
+ """
159
+
116
160
  @dg.asset_check(
117
161
  asset=self.asset_key,
118
162
  blocking=True,
@@ -130,6 +174,27 @@ class DataContractLoader:
130
174
  return check_asset
131
175
 
132
176
  def load_freshness_checks(self, lower_bound_delta: timedelta):
177
+ """Generate and return freshness checks for the asset based on update recency.
178
+
179
+ This method builds freshness checks using Dagster's
180
+ `build_last_update_freshness_checks` utility. It ensures that the specified
181
+ asset has been updated within a given time window (`lower_bound_delta`).
182
+ A cron schedule (`self.cron_schedule`) defines when the check should run.
183
+
184
+ Args:
185
+ lower_bound_delta (timedelta): The minimum acceptable time difference
186
+ between the current time and the asset's last update timestamp.
187
+ If the asset is older than this delta, the check will fail.
188
+
189
+ Returns:
190
+ list[AssetCheckSpec] | AssetChecksDefinition: A freshness check definition
191
+ that can be returned from `define_asset_checks` to register the check.
192
+
193
+
194
+ Example:
195
+ >>> self.load_freshness_checks(timedelta(hours=24))
196
+ # Ensures the asset was updated in the last 24 hours.
197
+ """
133
198
  freshness_checks = dg.build_last_update_freshness_checks(
134
199
  assets=[self.asset_name],
135
200
  lower_bound_delta=lower_bound_delta,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-datacontract
3
- Version: 0.1.2
3
+ Version: 0.2.1
4
4
  Summary: Load metadata and asset check spesifications from data contracts.
5
5
  Author-email: Fredrik Bakken <fredrik@dataheim.io>
6
6
  Requires-Python: >=3.11.10
@@ -52,7 +52,7 @@ data_contract = DataContractLoader(
52
52
  name=asset_name,
53
53
  metadata=data_contract.metadata,
54
54
  tags=data_contract.tags,
55
- description=data_contract.description,
55
+ description=data_contract.load_description(),
56
56
  owners=data_contract.owner,
57
57
  code_version=data_contract.version,
58
58
  )
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dagster-datacontract"
3
- version = "0.1.2"
3
+ version = "0.2.1"
4
4
  description = "Load metadata and asset check spesifications from data contracts."
5
5
  authors = [
6
6
  { name = "Fredrik Bakken", email = "fredrik@dataheim.io" }