dagster-datacontract 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_datacontract/__init__.py +17 -50
- dagster_datacontract/metadata/__init__.py +6 -0
- dagster_datacontract/metadata/table_colums.py +109 -0
- dagster_datacontract/tags/__init__.py +3 -0
- dagster_datacontract/tags/load_tags.py +45 -0
- {dagster_datacontract-0.2.3.dist-info → dagster_datacontract-0.2.5.dist-info}/METADATA +1 -1
- dagster_datacontract-0.2.5.dist-info/RECORD +10 -0
- dagster_datacontract-0.2.3.dist-info/RECORD +0 -6
- {dagster_datacontract-0.2.3.dist-info → dagster_datacontract-0.2.5.dist-info}/WHEEL +0 -0
- {dagster_datacontract-0.2.3.dist-info → dagster_datacontract-0.2.5.dist-info}/licenses/LICENSE +0 -0
- {dagster_datacontract-0.2.3.dist-info → dagster_datacontract-0.2.5.dist-info}/top_level.txt +0 -0
dagster_datacontract/__init__.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
import json
|
2
|
-
import re
|
3
1
|
import textwrap
|
4
2
|
from datetime import timedelta
|
5
3
|
from typing import Any
|
@@ -10,6 +8,12 @@ from datacontract.data_contract import DataContract
|
|
10
8
|
from datacontract.model.run import ResultEnum
|
11
9
|
from loguru import logger
|
12
10
|
|
11
|
+
from dagster_datacontract.metadata import (
|
12
|
+
get_column_lineage,
|
13
|
+
get_table_column,
|
14
|
+
)
|
15
|
+
from dagster_datacontract.tags import get_tags
|
16
|
+
|
13
17
|
|
14
18
|
class DataContractLoader:
|
15
19
|
def __init__(
|
@@ -24,7 +28,7 @@ class DataContractLoader:
|
|
24
28
|
self.data_contract.get_data_contract_specification()
|
25
29
|
)
|
26
30
|
self.metadata = self._load_metadata()
|
27
|
-
self.tags = self.
|
31
|
+
self.tags = get_tags(self.data_contract_specification.tags)
|
28
32
|
self.description = self.load_description()
|
29
33
|
self.owner = self._load_owner()
|
30
34
|
self.version = self._load_version()
|
@@ -33,33 +37,17 @@ class DataContractLoader:
|
|
33
37
|
def _load_metadata(
|
34
38
|
self,
|
35
39
|
) -> dict[str, TableColumnLineage | TableSchema | Any] | None:
|
36
|
-
fields = self.data_contract_specification.models.get(self.asset_name).fields
|
37
|
-
|
38
40
|
columns = []
|
39
41
|
deps_by_column = {}
|
40
42
|
|
41
|
-
|
42
|
-
columns.append(
|
43
|
-
dg.TableColumn(
|
44
|
-
name=column_name,
|
45
|
-
type=column_field.type,
|
46
|
-
description=column_field.description,
|
47
|
-
)
|
48
|
-
)
|
43
|
+
fields = self.data_contract_specification.models.get(self.asset_name).fields
|
49
44
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
else:
|
54
|
-
lineage_entries = lineage.get("inputFields")
|
45
|
+
for column_name, column_field in fields.items():
|
46
|
+
table_column = get_table_column(column_name, column_field)
|
47
|
+
columns.append(table_column)
|
55
48
|
|
56
|
-
|
57
|
-
|
58
|
-
asset_key=dg.AssetKey(lineage_entry["name"]),
|
59
|
-
column_name=lineage_entry["field"],
|
60
|
-
)
|
61
|
-
for lineage_entry in lineage_entries
|
62
|
-
]
|
49
|
+
table_column_lineage = get_column_lineage(column_field)
|
50
|
+
deps_by_column[column_name] = table_column_lineage
|
63
51
|
|
64
52
|
return {
|
65
53
|
"dagster/column_schema": dg.TableSchema(columns=columns),
|
@@ -68,30 +56,6 @@ class DataContractLoader:
|
|
68
56
|
),
|
69
57
|
}
|
70
58
|
|
71
|
-
def _load_tags(self) -> dict[str, str]:
|
72
|
-
"""Safely load tags from data contract.
|
73
|
-
|
74
|
-
More information about Dagster tags:
|
75
|
-
https://docs.dagster.io/guides/build/assets/metadata-and-tags/tags
|
76
|
-
"""
|
77
|
-
key_pattern = re.compile(r"^[\w.-]{1,63}$")
|
78
|
-
val_pattern = re.compile(r"^[\w.-]{0,63}$")
|
79
|
-
|
80
|
-
tags = {}
|
81
|
-
|
82
|
-
for item in self.data_contract_specification.tags:
|
83
|
-
if ":" in item:
|
84
|
-
key, val = map(str.strip, item.split(":", 1))
|
85
|
-
else:
|
86
|
-
key, val = item.strip(), ""
|
87
|
-
|
88
|
-
if key_pattern.match(key) and val_pattern.match(val):
|
89
|
-
tags[key] = val
|
90
|
-
else:
|
91
|
-
logger.warning(f"Ignoring invalid tag: {item}")
|
92
|
-
|
93
|
-
return tags
|
94
|
-
|
95
59
|
def _load_owner(self) -> list[str] | None:
|
96
60
|
owner = self.data_contract_specification.info.owner
|
97
61
|
|
@@ -109,10 +73,13 @@ class DataContractLoader:
|
|
109
73
|
)
|
110
74
|
return cron_schedule
|
111
75
|
except AttributeError:
|
76
|
+
logger.warning("'servicelevels.frequency.cron' not found in Data Contract.")
|
112
77
|
return None
|
113
78
|
|
114
79
|
def load_description(
|
115
|
-
self,
|
80
|
+
self,
|
81
|
+
config: dict[str, Any] | None = None,
|
82
|
+
separator: str = "\n",
|
116
83
|
) -> str | None:
|
117
84
|
"""Load and return a formatted description string based on the data contract specification.
|
118
85
|
|
@@ -0,0 +1,109 @@
|
|
1
|
+
import json
|
2
|
+
from typing import Any
|
3
|
+
|
4
|
+
import dagster as dg
|
5
|
+
from dagster import TableColumnDep
|
6
|
+
from datacontract.model.data_contract_specification import Field
|
7
|
+
|
8
|
+
from dagster_datacontract.tags import get_tags
|
9
|
+
|
10
|
+
|
11
|
+
def get_other_item(name: str, column_field: Field) -> list[str] | None:
|
12
|
+
"""Retrieve a list containing a single formatted string representing an attribute of a Field, if it exists.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
name (str): The name of the attribute to fetch from the Field.
|
16
|
+
column_field (Field): The Field instance from which to extract the attribute.
|
17
|
+
|
18
|
+
Returns:
|
19
|
+
list[str] | None: A list with a formatted string (e.g., "format=csv")
|
20
|
+
if the attribute exists and is truthy, otherwise an empty list.
|
21
|
+
"""
|
22
|
+
value = getattr(column_field, name, None)
|
23
|
+
return [f"{name}={value}"] if value else []
|
24
|
+
|
25
|
+
|
26
|
+
def get_table_column_constraints(column_field: Field) -> dg.TableColumnConstraints:
|
27
|
+
"""Convert a Field object to Dagster TableColumnConstraints, including nullability, uniqueness, and other properties.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
column_field (Field): A data contract field specification containing
|
31
|
+
column metadata.
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
dg.TableColumnConstraints: A Dagster representation of the field's
|
35
|
+
column constraints.
|
36
|
+
"""
|
37
|
+
nullable = column_field.required if column_field.required else True
|
38
|
+
unique = column_field.unique if column_field.unique else False
|
39
|
+
other = [
|
40
|
+
*(get_other_item("title", column_field)),
|
41
|
+
*(get_other_item("primaryKey", column_field)),
|
42
|
+
*(get_other_item("format", column_field)),
|
43
|
+
*(get_other_item("minLength", column_field)),
|
44
|
+
*(get_other_item("maxLength", column_field)),
|
45
|
+
*(get_other_item("pattern", column_field)),
|
46
|
+
*(get_other_item("minimum", column_field)),
|
47
|
+
*(get_other_item("exclusiveMinimum", column_field)),
|
48
|
+
*(get_other_item("maximum", column_field)),
|
49
|
+
*(get_other_item("exclusiveMaximum", column_field)),
|
50
|
+
*(get_other_item("pii", column_field)),
|
51
|
+
*(get_other_item("classification", column_field)),
|
52
|
+
]
|
53
|
+
|
54
|
+
return dg.TableColumnConstraints(
|
55
|
+
nullable=nullable,
|
56
|
+
unique=unique,
|
57
|
+
other=other,
|
58
|
+
)
|
59
|
+
|
60
|
+
|
61
|
+
def get_table_column(column_name: str, column_field: Field) -> dg.TableColumn:
|
62
|
+
"""Create a Dagster TableColumn from a given column name and Field metadata.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
column_name (str): The name of the column.
|
66
|
+
column_field (Field): The Field instance containing metadata such as
|
67
|
+
type, description, constraints, and tags.
|
68
|
+
|
69
|
+
Returns:
|
70
|
+
dg.TableColumn: A Dagster TableColumn object representing the column
|
71
|
+
definition.
|
72
|
+
"""
|
73
|
+
return dg.TableColumn(
|
74
|
+
name=column_name,
|
75
|
+
type=column_field.type,
|
76
|
+
description=column_field.description,
|
77
|
+
constraints=get_table_column_constraints(column_field),
|
78
|
+
tags=get_tags(column_field.tags),
|
79
|
+
)
|
80
|
+
|
81
|
+
|
82
|
+
def get_column_lineage(column_field: Field) -> list[Any] | list[TableColumnDep | Any]:
|
83
|
+
"""Extract column-level lineage information from a Field and return it as a list of TableColumnDep objects.
|
84
|
+
|
85
|
+
The function parses the JSON-serialized Field to retrieve any input lineage
|
86
|
+
defined under the "lineage.inputFields" key. Each lineage entry is converted
|
87
|
+
into a Dagster TableColumnDep representing a dependency on a specific column
|
88
|
+
of another asset.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
column_field (Field): The Field instance that may contain lineage metadata.
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
list[Any] | list[TableColumnDep | Any]: A list of TableColumnDep objects
|
95
|
+
if lineage is defined; otherwise, an empty list.
|
96
|
+
"""
|
97
|
+
lineage = json.loads(column_field.model_dump_json()).get("lineage")
|
98
|
+
|
99
|
+
if not lineage:
|
100
|
+
return []
|
101
|
+
|
102
|
+
lineage_entries = lineage.get("inputFields")
|
103
|
+
return [
|
104
|
+
dg.TableColumnDep(
|
105
|
+
asset_key=dg.AssetKey(lineage_entry["name"]),
|
106
|
+
column_name=lineage_entry["field"],
|
107
|
+
)
|
108
|
+
for lineage_entry in lineage_entries
|
109
|
+
]
|
@@ -0,0 +1,45 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
from loguru import logger
|
4
|
+
|
5
|
+
|
6
|
+
def get_tags(
|
7
|
+
tags_list: list[str] | None,
|
8
|
+
) -> dict[str, str]:
|
9
|
+
"""Parse and validate a list of string tags into a dictionary format.
|
10
|
+
|
11
|
+
Each tag in the input list should be in the form "key:value" or simply "key".
|
12
|
+
- Keys must match the pattern: ^[\w.-]{1,63}$
|
13
|
+
- Values (if provided) must match the pattern: ^[\w.-]{0,63}$
|
14
|
+
|
15
|
+
Invalid tags (those that do not match the expected format) will be ignored,
|
16
|
+
and a warning will be logged.
|
17
|
+
|
18
|
+
More information about Dagster tags:
|
19
|
+
https://docs.dagster.io/guides/build/assets/metadata-and-tags/tags
|
20
|
+
|
21
|
+
Args:
|
22
|
+
tags_list (list[str] | None): A list of tags as strings. Each tag may be
|
23
|
+
formatted as "key:value" or just "key". If None, an empty dict is returned.
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
dict[str, str]: A dictionary of validated tags, where keys are tag names
|
27
|
+
and values are tag values (empty string if not provided).
|
28
|
+
"""
|
29
|
+
key_pattern = re.compile(r"^[\w.-]{1,63}$")
|
30
|
+
val_pattern = re.compile(r"^[\w.-]{0,63}$")
|
31
|
+
|
32
|
+
tags = {}
|
33
|
+
|
34
|
+
for item in tags_list:
|
35
|
+
if ":" in item:
|
36
|
+
key, val = map(str.strip, item.split(":", 1))
|
37
|
+
else:
|
38
|
+
key, val = item.strip(), ""
|
39
|
+
|
40
|
+
if key_pattern.match(key) and val_pattern.match(val):
|
41
|
+
tags[key] = val
|
42
|
+
else:
|
43
|
+
logger.warning(f"Ignoring invalid tag: {item}")
|
44
|
+
|
45
|
+
return tags
|
@@ -0,0 +1,10 @@
|
|
1
|
+
dagster_datacontract/__init__.py,sha256=Be_sIJXm0gaX5lDuj5xk7FbM5hZsEmQTwHcsztmkEoo,6730
|
2
|
+
dagster_datacontract/metadata/__init__.py,sha256=c5giTkxAgr-05ivY5hgsdVuA5UEDzdskVyjNczZdjHM,159
|
3
|
+
dagster_datacontract/metadata/table_colums.py,sha256=Q7ZCiMReWU4-T2YfBvtt5vvoVXEoUgzK5OPMxQEgzpQ,4013
|
4
|
+
dagster_datacontract/tags/__init__.py,sha256=jmHogoOunDs8YvgnXiMTSoCEGrxWbWnQCKH2_x2uQz8,81
|
5
|
+
dagster_datacontract/tags/load_tags.py,sha256=aZ_HTkc-vjJ_rofT32fT_zrLCt9x1ZGn8XoihhOMhfU,1414
|
6
|
+
dagster_datacontract-0.2.5.dist-info/licenses/LICENSE,sha256=9ULsEM1ICzCaGoso40plwO-d_SCQ7nsU6ZA4xgfaRq8,11338
|
7
|
+
dagster_datacontract-0.2.5.dist-info/METADATA,sha256=RGX_hGPEYpNeXxF9clpc8oBoCx1XqX4H6mHwcQetL3I,3029
|
8
|
+
dagster_datacontract-0.2.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
9
|
+
dagster_datacontract-0.2.5.dist-info/top_level.txt,sha256=_HUQ6OJ50Q0VZxEkdocTtxk1QkJpztb1QY7A0rcvtCE,21
|
10
|
+
dagster_datacontract-0.2.5.dist-info/RECORD,,
|
@@ -1,6 +0,0 @@
|
|
1
|
-
dagster_datacontract/__init__.py,sha256=bfHWETu-RpUyE6_Z3mZoZMJWQSLLgVxnWqbbFTKubRw,7779
|
2
|
-
dagster_datacontract-0.2.3.dist-info/licenses/LICENSE,sha256=9ULsEM1ICzCaGoso40plwO-d_SCQ7nsU6ZA4xgfaRq8,11338
|
3
|
-
dagster_datacontract-0.2.3.dist-info/METADATA,sha256=jXtkDSL9P1xLjsAdRBPRhAfMpdYDQqK9udijD8ZtamU,3029
|
4
|
-
dagster_datacontract-0.2.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
5
|
-
dagster_datacontract-0.2.3.dist-info/top_level.txt,sha256=_HUQ6OJ50Q0VZxEkdocTtxk1QkJpztb1QY7A0rcvtCE,21
|
6
|
-
dagster_datacontract-0.2.3.dist-info/RECORD,,
|
File without changes
|
{dagster_datacontract-0.2.3.dist-info → dagster_datacontract-0.2.5.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|