dagster-datacontract 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,3 @@
1
- import json
2
- import re
3
1
  import textwrap
4
2
  from datetime import timedelta
5
3
  from typing import Any
@@ -10,6 +8,12 @@ from datacontract.data_contract import DataContract
10
8
  from datacontract.model.run import ResultEnum
11
9
  from loguru import logger
12
10
 
11
+ from dagster_datacontract.metadata import (
12
+ get_column_lineage,
13
+ get_table_column,
14
+ )
15
+ from dagster_datacontract.tags import get_tags
16
+
13
17
 
14
18
  class DataContractLoader:
15
19
  def __init__(
@@ -24,7 +28,7 @@ class DataContractLoader:
24
28
  self.data_contract.get_data_contract_specification()
25
29
  )
26
30
  self.metadata = self._load_metadata()
27
- self.tags = self._load_tags()
31
+ self.tags = get_tags(self.data_contract_specification.tags)
28
32
  self.description = self.load_description()
29
33
  self.owner = self._load_owner()
30
34
  self.version = self._load_version()
@@ -33,33 +37,17 @@ class DataContractLoader:
33
37
  def _load_metadata(
34
38
  self,
35
39
  ) -> dict[str, TableColumnLineage | TableSchema | Any] | None:
36
- fields = self.data_contract_specification.models.get(self.asset_name).fields
37
-
38
40
  columns = []
39
41
  deps_by_column = {}
40
42
 
41
- for column_name, column_field in fields.items():
42
- columns.append(
43
- dg.TableColumn(
44
- name=column_name,
45
- type=column_field.type,
46
- description=column_field.description,
47
- )
48
- )
43
+ fields = self.data_contract_specification.models.get(self.asset_name).fields
49
44
 
50
- lineage = json.loads(column_field.model_dump_json()).get("lineage")
51
- if not lineage:
52
- deps_by_column[column_name] = []
53
- else:
54
- lineage_entries = lineage.get("inputFields")
45
+ for column_name, column_field in fields.items():
46
+ table_column = get_table_column(column_name, column_field)
47
+ columns.append(table_column)
55
48
 
56
- deps_by_column[column_name] = [
57
- dg.TableColumnDep(
58
- asset_key=dg.AssetKey(lineage_entry["name"]),
59
- column_name=lineage_entry["field"],
60
- )
61
- for lineage_entry in lineage_entries
62
- ]
49
+ table_column_lineage = get_column_lineage(column_field)
50
+ deps_by_column[column_name] = table_column_lineage
63
51
 
64
52
  return {
65
53
  "dagster/column_schema": dg.TableSchema(columns=columns),
@@ -68,30 +56,6 @@ class DataContractLoader:
68
56
  ),
69
57
  }
70
58
 
71
- def _load_tags(self) -> dict[str, str]:
72
- """Safely load tags from data contract.
73
-
74
- More information about Dagster tags:
75
- https://docs.dagster.io/guides/build/assets/metadata-and-tags/tags
76
- """
77
- key_pattern = re.compile(r"^[\w.-]{1,63}$")
78
- val_pattern = re.compile(r"^[\w.-]{0,63}$")
79
-
80
- tags = {}
81
-
82
- for item in self.data_contract_specification.tags:
83
- if ":" in item:
84
- key, val = map(str.strip, item.split(":", 1))
85
- else:
86
- key, val = item.strip(), ""
87
-
88
- if key_pattern.match(key) and val_pattern.match(val):
89
- tags[key] = val
90
- else:
91
- logger.warning(f"Ignoring invalid tag: {item}")
92
-
93
- return tags
94
-
95
59
  def _load_owner(self) -> list[str] | None:
96
60
  owner = self.data_contract_specification.info.owner
97
61
 
@@ -109,10 +73,13 @@ class DataContractLoader:
109
73
  )
110
74
  return cron_schedule
111
75
  except AttributeError:
76
+ logger.warning("'servicelevels.frequency.cron' not found in Data Contract.")
112
77
  return None
113
78
 
114
79
  def load_description(
115
- self, config: dict[str, Any] | None = None, separator: str = "\n"
80
+ self,
81
+ config: dict[str, Any] | None = None,
82
+ separator: str = "\n",
116
83
  ) -> str | None:
117
84
  """Load and return a formatted description string based on the data contract specification.
118
85
 
@@ -0,0 +1,6 @@
1
+ from dagster_datacontract.metadata.table_colums import (
2
+ get_column_lineage,
3
+ get_table_column,
4
+ )
5
+
6
+ __all__ = ["get_table_column", "get_column_lineage"]
@@ -0,0 +1,109 @@
1
+ import json
2
+ from typing import Any
3
+
4
+ import dagster as dg
5
+ from dagster import TableColumnDep
6
+ from datacontract.model.data_contract_specification import Field
7
+
8
+ from dagster_datacontract.tags import get_tags
9
+
10
+
11
+ def get_other_item(name: str, column_field: Field) -> list[str] | None:
12
+ """Retrieve a list containing a single formatted string representing an attribute of a Field, if it exists.
13
+
14
+ Args:
15
+ name (str): The name of the attribute to fetch from the Field.
16
+ column_field (Field): The Field instance from which to extract the attribute.
17
+
18
+ Returns:
19
+ list[str] | None: A list with a formatted string (e.g., "format=csv")
20
+ if the attribute exists and is truthy, otherwise an empty list.
21
+ """
22
+ value = getattr(column_field, name, None)
23
+ return [f"{name}={value}"] if value else []
24
+
25
+
26
+ def get_table_column_constraints(column_field: Field) -> dg.TableColumnConstraints:
27
+ """Convert a Field object to Dagster TableColumnConstraints, including nullability, uniqueness, and other properties.
28
+
29
+ Args:
30
+ column_field (Field): A data contract field specification containing
31
+ column metadata.
32
+
33
+ Returns:
34
+ dg.TableColumnConstraints: A Dagster representation of the field's
35
+ column constraints.
36
+ """
37
+ nullable = column_field.required if column_field.required else True
38
+ unique = column_field.unique if column_field.unique else False
39
+ other = [
40
+ *(get_other_item("title", column_field)),
41
+ *(get_other_item("primaryKey", column_field)),
42
+ *(get_other_item("format", column_field)),
43
+ *(get_other_item("minLength", column_field)),
44
+ *(get_other_item("maxLength", column_field)),
45
+ *(get_other_item("pattern", column_field)),
46
+ *(get_other_item("minimum", column_field)),
47
+ *(get_other_item("exclusiveMinimum", column_field)),
48
+ *(get_other_item("maximum", column_field)),
49
+ *(get_other_item("exclusiveMaximum", column_field)),
50
+ *(get_other_item("pii", column_field)),
51
+ *(get_other_item("classification", column_field)),
52
+ ]
53
+
54
+ return dg.TableColumnConstraints(
55
+ nullable=nullable,
56
+ unique=unique,
57
+ other=other,
58
+ )
59
+
60
+
61
+ def get_table_column(column_name: str, column_field: Field) -> dg.TableColumn:
62
+ """Create a Dagster TableColumn from a given column name and Field metadata.
63
+
64
+ Args:
65
+ column_name (str): The name of the column.
66
+ column_field (Field): The Field instance containing metadata such as
67
+ type, description, constraints, and tags.
68
+
69
+ Returns:
70
+ dg.TableColumn: A Dagster TableColumn object representing the column
71
+ definition.
72
+ """
73
+ return dg.TableColumn(
74
+ name=column_name,
75
+ type=column_field.type,
76
+ description=column_field.description,
77
+ constraints=get_table_column_constraints(column_field),
78
+ tags=get_tags(column_field.tags),
79
+ )
80
+
81
+
82
+ def get_column_lineage(column_field: Field) -> list[Any] | list[TableColumnDep | Any]:
83
+ """Extract column-level lineage information from a Field and return it as a list of TableColumnDep objects.
84
+
85
+ The function parses the JSON-serialized Field to retrieve any input lineage
86
+ defined under the "lineage.inputFields" key. Each lineage entry is converted
87
+ into a Dagster TableColumnDep representing a dependency on a specific column
88
+ of another asset.
89
+
90
+ Args:
91
+ column_field (Field): The Field instance that may contain lineage metadata.
92
+
93
+ Returns:
94
+ list[Any] | list[TableColumnDep | Any]: A list of TableColumnDep objects
95
+ if lineage is defined; otherwise, an empty list.
96
+ """
97
+ lineage = json.loads(column_field.model_dump_json()).get("lineage")
98
+
99
+ if not lineage:
100
+ return []
101
+
102
+ lineage_entries = lineage.get("inputFields")
103
+ return [
104
+ dg.TableColumnDep(
105
+ asset_key=dg.AssetKey(lineage_entry["name"]),
106
+ column_name=lineage_entry["field"],
107
+ )
108
+ for lineage_entry in lineage_entries
109
+ ]
@@ -0,0 +1,3 @@
1
+ from dagster_datacontract.tags.load_tags import get_tags
2
+
3
+ __all__ = ["get_tags"]
@@ -0,0 +1,45 @@
1
+ import re
2
+
3
+ from loguru import logger
4
+
5
+
6
+ def get_tags(
7
+ tags_list: list[str] | None,
8
+ ) -> dict[str, str]:
9
+ """Parse and validate a list of string tags into a dictionary format.
10
+
11
+ Each tag in the input list should be in the form "key:value" or simply "key".
12
+ - Keys must match the pattern: ^[\w.-]{1,63}$
13
+ - Values (if provided) must match the pattern: ^[\w.-]{0,63}$
14
+
15
+ Invalid tags (those that do not match the expected format) will be ignored,
16
+ and a warning will be logged.
17
+
18
+ More information about Dagster tags:
19
+ https://docs.dagster.io/guides/build/assets/metadata-and-tags/tags
20
+
21
+ Args:
22
+ tags_list (list[str] | None): A list of tags as strings. Each tag may be
23
+ formatted as "key:value" or just "key". If None, an empty dict is returned.
24
+
25
+ Returns:
26
+ dict[str, str]: A dictionary of validated tags, where keys are tag names
27
+ and values are tag values (empty string if not provided).
28
+ """
29
+ key_pattern = re.compile(r"^[\w.-]{1,63}$")
30
+ val_pattern = re.compile(r"^[\w.-]{0,63}$")
31
+
32
+ tags = {}
33
+
34
+ for item in tags_list:
35
+ if ":" in item:
36
+ key, val = map(str.strip, item.split(":", 1))
37
+ else:
38
+ key, val = item.strip(), ""
39
+
40
+ if key_pattern.match(key) and val_pattern.match(val):
41
+ tags[key] = val
42
+ else:
43
+ logger.warning(f"Ignoring invalid tag: {item}")
44
+
45
+ return tags
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-datacontract
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Summary: Load metadata and asset check spesifications from data contracts.
5
5
  Author-email: Fredrik Bakken <fredrik@dataheim.io>
6
6
  Requires-Python: >=3.10.0
@@ -0,0 +1,10 @@
1
+ dagster_datacontract/__init__.py,sha256=Be_sIJXm0gaX5lDuj5xk7FbM5hZsEmQTwHcsztmkEoo,6730
2
+ dagster_datacontract/metadata/__init__.py,sha256=c5giTkxAgr-05ivY5hgsdVuA5UEDzdskVyjNczZdjHM,159
3
+ dagster_datacontract/metadata/table_colums.py,sha256=Q7ZCiMReWU4-T2YfBvtt5vvoVXEoUgzK5OPMxQEgzpQ,4013
4
+ dagster_datacontract/tags/__init__.py,sha256=jmHogoOunDs8YvgnXiMTSoCEGrxWbWnQCKH2_x2uQz8,81
5
+ dagster_datacontract/tags/load_tags.py,sha256=aZ_HTkc-vjJ_rofT32fT_zrLCt9x1ZGn8XoihhOMhfU,1414
6
+ dagster_datacontract-0.2.5.dist-info/licenses/LICENSE,sha256=9ULsEM1ICzCaGoso40plwO-d_SCQ7nsU6ZA4xgfaRq8,11338
7
+ dagster_datacontract-0.2.5.dist-info/METADATA,sha256=RGX_hGPEYpNeXxF9clpc8oBoCx1XqX4H6mHwcQetL3I,3029
8
+ dagster_datacontract-0.2.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
9
+ dagster_datacontract-0.2.5.dist-info/top_level.txt,sha256=_HUQ6OJ50Q0VZxEkdocTtxk1QkJpztb1QY7A0rcvtCE,21
10
+ dagster_datacontract-0.2.5.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- dagster_datacontract/__init__.py,sha256=bfHWETu-RpUyE6_Z3mZoZMJWQSLLgVxnWqbbFTKubRw,7779
2
- dagster_datacontract-0.2.3.dist-info/licenses/LICENSE,sha256=9ULsEM1ICzCaGoso40plwO-d_SCQ7nsU6ZA4xgfaRq8,11338
3
- dagster_datacontract-0.2.3.dist-info/METADATA,sha256=jXtkDSL9P1xLjsAdRBPRhAfMpdYDQqK9udijD8ZtamU,3029
4
- dagster_datacontract-0.2.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
5
- dagster_datacontract-0.2.3.dist-info/top_level.txt,sha256=_HUQ6OJ50Q0VZxEkdocTtxk1QkJpztb1QY7A0rcvtCE,21
6
- dagster_datacontract-0.2.3.dist-info/RECORD,,