dagster-datacontract 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_datacontract/__init__.py +89 -15
- dagster_datacontract/utils/__init__.py +8 -28
- dagster_datacontract/utils/combine_strings.py +29 -0
- dagster_datacontract/utils/paths.py +51 -0
- {dagster_datacontract-0.4.0.dist-info → dagster_datacontract-0.4.1.dist-info}/METADATA +5 -4
- {dagster_datacontract-0.4.0.dist-info → dagster_datacontract-0.4.1.dist-info}/RECORD +9 -7
- {dagster_datacontract-0.4.0.dist-info → dagster_datacontract-0.4.1.dist-info}/WHEEL +1 -1
- {dagster_datacontract-0.4.0.dist-info → dagster_datacontract-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {dagster_datacontract-0.4.0.dist-info → dagster_datacontract-0.4.1.dist-info}/top_level.txt +0 -0
dagster_datacontract/__init__.py
CHANGED
@@ -2,7 +2,6 @@ from datetime import timedelta
|
|
2
2
|
from typing import Any
|
3
3
|
|
4
4
|
import dagster as dg
|
5
|
-
from dagster import TableColumnLineage, TableSchema
|
6
5
|
from datacontract.data_contract import DataContract
|
7
6
|
from datacontract.model.run import ResultEnum
|
8
7
|
from loguru import logger
|
@@ -16,18 +15,28 @@ from dagster_datacontract.metadata import (
|
|
16
15
|
)
|
17
16
|
from dagster_datacontract.owners import get_owner
|
18
17
|
from dagster_datacontract.tags import get_tags
|
19
|
-
from dagster_datacontract.utils import normalize_path
|
18
|
+
from dagster_datacontract.utils import combine_parts, normalize_path
|
20
19
|
|
21
20
|
|
22
21
|
class DataContractLoader:
|
23
22
|
def __init__(
|
24
23
|
self,
|
25
24
|
asset_name: str,
|
26
|
-
data_contract: DataContract,
|
25
|
+
data_contract: DataContract | None = None,
|
26
|
+
data_contract_path: str | None = None,
|
27
27
|
):
|
28
|
+
if data_contract is None and data_contract_path is None:
|
29
|
+
raise ValueError(
|
30
|
+
"Either 'data_contract' or 'data_contract_path' must be provided."
|
31
|
+
)
|
32
|
+
|
28
33
|
self.asset_name = asset_name
|
29
34
|
self.asset_key = dg.AssetKey(path=self.asset_name)
|
30
|
-
self.data_contract =
|
35
|
+
self.data_contract = (
|
36
|
+
data_contract
|
37
|
+
if data_contract
|
38
|
+
else DataContract(data_contract_file=data_contract_path)
|
39
|
+
)
|
31
40
|
self.data_contract_specification = (
|
32
41
|
self.data_contract.get_data_contract_specification()
|
33
42
|
)
|
@@ -40,10 +49,18 @@ class DataContractLoader:
|
|
40
49
|
self.owner = get_owner(self.data_contract_specification)
|
41
50
|
self.version = self._load_version()
|
42
51
|
self.cron_schedule = self._load_cron_schedule()
|
52
|
+
self.asset_spec = dg.AssetSpec(
|
53
|
+
key=asset_name,
|
54
|
+
description=self.description,
|
55
|
+
metadata=self.metadata,
|
56
|
+
code_version=self.version,
|
57
|
+
owners=self.owner,
|
58
|
+
tags=self.tags,
|
59
|
+
)
|
43
60
|
|
44
61
|
def _load_metadata(
|
45
62
|
self,
|
46
|
-
) -> dict[str, TableColumnLineage | TableSchema | Any] | None:
|
63
|
+
) -> dict[str, dg.TableColumnLineage | dg.TableSchema | Any] | None:
|
47
64
|
metadata = (
|
48
65
|
{
|
49
66
|
"datacontract/path": dg.MetadataValue.url(
|
@@ -56,19 +73,24 @@ class DataContractLoader:
|
|
56
73
|
columns = []
|
57
74
|
deps_by_column = {}
|
58
75
|
|
59
|
-
|
76
|
+
try:
|
77
|
+
fields = self.data_contract_specification.models.get(self.asset_name).fields
|
60
78
|
|
61
|
-
|
62
|
-
|
63
|
-
|
79
|
+
for column_name, column_field in fields.items():
|
80
|
+
table_column = get_table_column(column_name, column_field)
|
81
|
+
columns.append(table_column)
|
64
82
|
|
65
|
-
|
66
|
-
|
83
|
+
table_column_lineage = get_column_lineage(column_field)
|
84
|
+
deps_by_column[column_name] = table_column_lineage
|
67
85
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
86
|
+
metadata["dagster/column_schema"] = dg.TableSchema(columns=columns)
|
87
|
+
metadata["dagster/column_lineage"] = dg.TableColumnLineage(
|
88
|
+
deps_by_column=deps_by_column
|
89
|
+
)
|
90
|
+
except AttributeError as e:
|
91
|
+
logger.warning(
|
92
|
+
f"No field named {self.asset_name} found in data contract.\n{e}"
|
93
|
+
)
|
72
94
|
|
73
95
|
server_information = get_server_information(
|
74
96
|
self.data_contract_specification,
|
@@ -158,3 +180,55 @@ class DataContractLoader:
|
|
158
180
|
)
|
159
181
|
|
160
182
|
return freshness_checks
|
183
|
+
|
184
|
+
def combine_asset_specs(
|
185
|
+
self,
|
186
|
+
asset_spec: dg.AssetSpec,
|
187
|
+
) -> dg.AssetSpec:
|
188
|
+
"""Merge the given AssetSpec with the current object's attributes to produce a new AssetSpec.
|
189
|
+
|
190
|
+
This method combines metadata, descriptions, code versions, owners, and tags from the
|
191
|
+
provided `asset_spec` and the current instance. Preference is generally given to the
|
192
|
+
current instance's values where appropriate. Fields like dependencies, skippability,
|
193
|
+
group name, automation condition, kinds, and partition definitions are taken directly
|
194
|
+
from the input `asset_spec`.
|
195
|
+
|
196
|
+
Args:
|
197
|
+
asset_spec (dg.AssetSpec): The base asset specification to merge with the current one.
|
198
|
+
|
199
|
+
Returns:
|
200
|
+
dg.AssetSpec: A new AssetSpec instance containing the combined data.
|
201
|
+
|
202
|
+
Notes:
|
203
|
+
- Descriptions are joined with double newlines (`"\n\n"`).
|
204
|
+
- Code versions are joined with an underscore (`"_"`).
|
205
|
+
- Owners are concatenated.
|
206
|
+
- Metadata and tags are merged with the current instance taking precedence.
|
207
|
+
"""
|
208
|
+
description = combine_parts(
|
209
|
+
[asset_spec.description, self.description], delimiter="\n\n"
|
210
|
+
)
|
211
|
+
metadata = {
|
212
|
+
**asset_spec.metadata,
|
213
|
+
**self.metadata,
|
214
|
+
}
|
215
|
+
code_version = combine_parts(
|
216
|
+
[asset_spec.code_version, self.version], delimiter="_"
|
217
|
+
)
|
218
|
+
owners = list(asset_spec.owners) + self.owner
|
219
|
+
tags = {**asset_spec.tags, **self.tags}
|
220
|
+
|
221
|
+
return dg.AssetSpec(
|
222
|
+
key=self.asset_name,
|
223
|
+
deps=asset_spec.deps,
|
224
|
+
description=description,
|
225
|
+
metadata=metadata,
|
226
|
+
skippable=asset_spec.skippable,
|
227
|
+
group_name=asset_spec.group_name,
|
228
|
+
code_version=code_version,
|
229
|
+
automation_condition=asset_spec.automation_condition,
|
230
|
+
owners=owners,
|
231
|
+
tags=tags,
|
232
|
+
kinds=asset_spec.kinds,
|
233
|
+
partitions_def=asset_spec.partitions_def,
|
234
|
+
)
|
@@ -1,28 +1,8 @@
|
|
1
|
-
import
|
2
|
-
import
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
""
|
7
|
-
|
8
|
-
|
9
|
-
or with the 'file' scheme) and converts it into a fully qualified file URI.
|
10
|
-
If the path already has a non-'file' scheme (e.g., 's3://', 'http://'),
|
11
|
-
it is returned unchanged.
|
12
|
-
|
13
|
-
Parameters:
|
14
|
-
path (str): The input file path. This can be a relative or absolute local path,
|
15
|
-
a path starting with `~`, or a URI with a supported scheme.
|
16
|
-
|
17
|
-
Returns:
|
18
|
-
str: A normalized path string:
|
19
|
-
- If the input is a local path or has a "file" scheme, returns it in the form "file:///absolute/path".
|
20
|
-
- If the input has another scheme (e.g., "s3://", "http://"), returns it unchanged.
|
21
|
-
"""
|
22
|
-
parsed = urllib.parse.urlparse(path)
|
23
|
-
|
24
|
-
if not parsed.scheme or parsed.scheme == "file":
|
25
|
-
full_path = os.path.abspath(os.path.expanduser(path))
|
26
|
-
return f"file://{full_path}"
|
27
|
-
else:
|
28
|
-
return path
|
1
|
+
from dagster_datacontract.utils.combine_strings import combine_parts
|
2
|
+
from dagster_datacontract.utils.paths import get_absolute_path, normalize_path
|
3
|
+
|
4
|
+
__all__ = [
|
5
|
+
"combine_parts",
|
6
|
+
"get_absolute_path",
|
7
|
+
"normalize_path",
|
8
|
+
]
|
@@ -0,0 +1,29 @@
|
|
1
|
+
from collections.abc import Iterable
|
2
|
+
|
3
|
+
|
4
|
+
def combine_parts(parts: Iterable[str | None], delimiter: str = "_") -> str:
|
5
|
+
"""
|
6
|
+
Combine multiple optional strings using a specified delimiter.
|
7
|
+
|
8
|
+
This function takes an iterable of optional strings and joins the non-None,
|
9
|
+
non-empty strings using the given delimiter. None values and empty strings
|
10
|
+
are ignored. If all values are None or empty, the result is an empty string.
|
11
|
+
|
12
|
+
Args:
|
13
|
+
parts (Iterable[Optional[str]]): An iterable of strings or None values to combine.
|
14
|
+
delimiter (str): A string used to separate the non-None parts. Defaults to "_".
|
15
|
+
|
16
|
+
Returns:
|
17
|
+
str: A single combined string of all non-None, non-empty parts separated by the delimiter.
|
18
|
+
|
19
|
+
Examples:
|
20
|
+
>>> combine_parts(["v1", "2023", None])
|
21
|
+
'v1_2023'
|
22
|
+
|
23
|
+
>>> combine_parts([None, None])
|
24
|
+
''
|
25
|
+
|
26
|
+
>>> combine_parts(["", "alpha", None])
|
27
|
+
'alpha'
|
28
|
+
"""
|
29
|
+
return delimiter.join(filter(None, parts))
|
@@ -0,0 +1,51 @@
|
|
1
|
+
import os
|
2
|
+
from pathlib import Path
|
3
|
+
from urllib.parse import urlparse
|
4
|
+
|
5
|
+
import dagster as dg
|
6
|
+
|
7
|
+
|
8
|
+
def normalize_path(path: str) -> str:
|
9
|
+
"""Normalizes a file path to ensure it is returned in a consistent URI format.
|
10
|
+
|
11
|
+
This function checks if the provided path is a local file path (with no scheme
|
12
|
+
or with the 'file' scheme) and converts it into a fully qualified file URI.
|
13
|
+
If the path already has a non-'file' scheme (e.g., 's3://', 'http://'),
|
14
|
+
it is returned unchanged.
|
15
|
+
|
16
|
+
Parameters:
|
17
|
+
path (str): The input file path. This can be a relative or absolute local path,
|
18
|
+
a path starting with `~`, or a URI with a supported scheme.
|
19
|
+
|
20
|
+
Returns:
|
21
|
+
str: A normalized path string:
|
22
|
+
- If the input is a local path or has a "file" scheme, returns it in the form "file:///absolute/path".
|
23
|
+
- If the input has another scheme (e.g., "s3://", "http://"), returns it unchanged.
|
24
|
+
"""
|
25
|
+
parsed = urlparse(path)
|
26
|
+
|
27
|
+
if not parsed.scheme or parsed.scheme == "file":
|
28
|
+
full_path = os.path.abspath(os.path.expanduser(path))
|
29
|
+
return f"file://{full_path}"
|
30
|
+
else:
|
31
|
+
return path
|
32
|
+
|
33
|
+
|
34
|
+
def get_absolute_path(
|
35
|
+
context_path: Path,
|
36
|
+
full_path: str,
|
37
|
+
) -> Path:
|
38
|
+
"""TODO."""
|
39
|
+
if isinstance(full_path, dg.UrlMetadataValue):
|
40
|
+
full_path = full_path.url
|
41
|
+
|
42
|
+
parsed_path = urlparse(full_path)
|
43
|
+
if parsed_path.scheme == "file":
|
44
|
+
full_path = Path(parsed_path.path)
|
45
|
+
else:
|
46
|
+
full_path = Path(full_path)
|
47
|
+
|
48
|
+
if full_path.is_absolute():
|
49
|
+
return full_path
|
50
|
+
|
51
|
+
return Path(context_path, full_path).absolute()
|
@@ -1,13 +1,14 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: dagster-datacontract
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.1
|
4
4
|
Summary: Load metadata and asset check spesifications from data contracts.
|
5
5
|
Author-email: Fredrik Bakken <fredrik@dataheim.io>
|
6
|
-
Requires-Python: >=3.10
|
6
|
+
Requires-Python: >=3.10
|
7
7
|
Description-Content-Type: text/markdown
|
8
8
|
License-File: LICENSE
|
9
9
|
Requires-Dist: dagster>=1.10.10
|
10
|
-
Requires-Dist:
|
10
|
+
Requires-Dist: dagster-dg>=0.26.11
|
11
|
+
Requires-Dist: datacontract-cli>=0.10.24
|
11
12
|
Requires-Dist: loguru>=0.7.3
|
12
13
|
Dynamic: license-file
|
13
14
|
|
@@ -25,7 +26,7 @@ uv add dagster-datacontract
|
|
25
26
|
|
26
27
|
## Simple Example
|
27
28
|
|
28
|
-
The following example can be found inside the [
|
29
|
+
The following example can be found inside the [examples/simple](https://github.com/dataheim-io/dagster-datacontract/tree/main/examples/simple)-directory:
|
29
30
|
|
30
31
|
```python
|
31
32
|
from datetime import timedelta
|
@@ -1,4 +1,4 @@
|
|
1
|
-
dagster_datacontract/__init__.py,sha256=
|
1
|
+
dagster_datacontract/__init__.py,sha256=Tv6_G45c16Yq5Sh9A5Xw9rxkkIiAm9BBxuK79kkvSvE,8585
|
2
2
|
dagster_datacontract/description/__init__.py,sha256=ulWqPp5jIPvCzaDFZcjLjcDkljJ5j_FRsE0dXhK8Wlc,104
|
3
3
|
dagster_datacontract/description/description.py,sha256=FmjgCYDpJ9UHrvAv0sAthfRohDjdG0lL1XcMKK8QMmI,1646
|
4
4
|
dagster_datacontract/metadata/__init__.py,sha256=Gj7Htl3rYRXsE-631yr4LWqL7Tf5bZKFEFjTeglxZek,359
|
@@ -8,9 +8,11 @@ dagster_datacontract/metadata/table_colums.py,sha256=Q7ZCiMReWU4-T2YfBvtt5vvoVXE
|
|
8
8
|
dagster_datacontract/owners/__init__.py,sha256=c0AhLQRzfw-QPmsF9rPXRyE6VoLmgDRRNgVqG8JUvFs,882
|
9
9
|
dagster_datacontract/tags/__init__.py,sha256=2Ph-M0WbBKUjJWIzM_cEBW3SQZh7Nq8oy5MbD5bt_lc,76
|
10
10
|
dagster_datacontract/tags/tags.py,sha256=aZ_HTkc-vjJ_rofT32fT_zrLCt9x1ZGn8XoihhOMhfU,1414
|
11
|
-
dagster_datacontract/utils/__init__.py,sha256=
|
12
|
-
dagster_datacontract
|
13
|
-
dagster_datacontract
|
14
|
-
dagster_datacontract-0.4.
|
15
|
-
dagster_datacontract-0.4.
|
16
|
-
dagster_datacontract-0.4.
|
11
|
+
dagster_datacontract/utils/__init__.py,sha256=GSQ2Zry9pKlLhWI5Vjoj3X7iZiEgt-SjqnQRwfplbHM,231
|
12
|
+
dagster_datacontract/utils/combine_strings.py,sha256=nWy3unX6yuPi8YmvDTl_mO10K6MbJP8cxQWPOMDj6G4,987
|
13
|
+
dagster_datacontract/utils/paths.py,sha256=Rh-l5GSmxZwhIVQ_aBJPHTKU5afEma8hlA5RT31EhbY,1611
|
14
|
+
dagster_datacontract-0.4.1.dist-info/licenses/LICENSE,sha256=9ULsEM1ICzCaGoso40plwO-d_SCQ7nsU6ZA4xgfaRq8,11338
|
15
|
+
dagster_datacontract-0.4.1.dist-info/METADATA,sha256=KAjnQQkr1UUNBAPcQ44mJcqUqmcEqxHDvr3pU-5YyWU,3078
|
16
|
+
dagster_datacontract-0.4.1.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
17
|
+
dagster_datacontract-0.4.1.dist-info/top_level.txt,sha256=_HUQ6OJ50Q0VZxEkdocTtxk1QkJpztb1QY7A0rcvtCE,21
|
18
|
+
dagster_datacontract-0.4.1.dist-info/RECORD,,
|
{dagster_datacontract-0.4.0.dist-info → dagster_datacontract-0.4.1.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|