dagster-datacontract 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- import textwrap
2
1
  from datetime import timedelta
3
2
  from typing import Any
4
3
 
@@ -8,8 +7,10 @@ from datacontract.data_contract import DataContract
8
7
  from datacontract.model.run import ResultEnum
9
8
  from loguru import logger
10
9
 
10
+ from dagster_datacontract.description import get_description
11
11
  from dagster_datacontract.metadata import (
12
12
  get_column_lineage,
13
+ get_server_information,
13
14
  get_table_column,
14
15
  )
15
16
  from dagster_datacontract.tags import get_tags
@@ -29,7 +30,10 @@ class DataContractLoader:
29
30
  )
30
31
  self.metadata = self._load_metadata()
31
32
  self.tags = get_tags(self.data_contract_specification.tags)
32
- self.description = self.load_description()
33
+ self.description = get_description(
34
+ self.asset_name,
35
+ self.data_contract_specification,
36
+ )
33
37
  self.owner = self._load_owner()
34
38
  self.version = self._load_version()
35
39
  self.cron_schedule = self._load_cron_schedule()
@@ -49,11 +53,18 @@ class DataContractLoader:
49
53
  table_column_lineage = get_column_lineage(column_field)
50
54
  deps_by_column[column_name] = table_column_lineage
51
55
 
56
+ server_information = get_server_information(
57
+ self.data_contract_specification,
58
+ self.data_contract._server,
59
+ self.asset_name,
60
+ )
61
+
52
62
  return {
53
63
  "dagster/column_schema": dg.TableSchema(columns=columns),
54
64
  "dagster/column_lineage": dg.TableColumnLineage(
55
65
  deps_by_column=deps_by_column
56
66
  ),
67
+ **server_information,
57
68
  }
58
69
 
59
70
  def _load_owner(self) -> list[str] | None:
@@ -76,55 +87,6 @@ class DataContractLoader:
76
87
  logger.warning("'servicelevels.frequency.cron' not found in Data Contract.")
77
88
  return None
78
89
 
79
- def load_description(
80
- self,
81
- config: dict[str, Any] | None = None,
82
- separator: str = "\n",
83
- ) -> str | None:
84
- """Load and return a formatted description string based on the data contract specification.
85
-
86
- This method composes a description by pulling text from different parts
87
- of the data contract specification (e.g., model and info descriptions),
88
- joining them using the specified separator.
89
-
90
- Args:
91
- config (dict[str, Any] | None, optional): A configuration dictionary
92
- specifying the order in which to concatenate the description parts.
93
- Defaults to `{"order": ["model", "info"]}`.
94
- separator (str, optional): A string used to separate different parts
95
- of the description. Defaults to a newline character (`"\n"`).
96
-
97
- Returns:
98
- str | None: A single string combining the specified description parts
99
- if available, otherwise `None`.
100
-
101
-
102
- Example:
103
- >>> self.load_description()
104
- 'Model description...\nInfo description...'
105
- """
106
- default_config = {"order": ["model", "info"]}
107
-
108
- configuration = default_config | (config or {})
109
-
110
- descriptions = {
111
- "model": self.data_contract_specification.models.get(
112
- self.asset_name
113
- ).description,
114
- "info": self.data_contract_specification.info.description,
115
- }
116
-
117
- parts = []
118
- for key in configuration["order"]:
119
- desc = descriptions.get(key).replace("\n", f"{separator}\n")
120
- if desc:
121
- parts.append(textwrap.dedent(desc))
122
-
123
- if parts:
124
- return f"{separator}\n".join(parts)
125
-
126
- return None
127
-
128
90
  def load_data_quality_checks(self) -> dg.AssetChecksDefinition:
129
91
  """Define and return a data quality check for the specified asset.
130
92
 
@@ -146,7 +108,11 @@ class DataContractLoader:
146
108
  blocking=True,
147
109
  )
148
110
  def check_asset():
149
- run = self.data_contract.test()
111
+ data_contract = DataContract(
112
+ data_contract=self.data_contract_specification,
113
+ server=self.server_name,
114
+ )
115
+ run = data_contract.test()
150
116
 
151
117
  return dg.AssetCheckResult(
152
118
  passed=run.result == ResultEnum.passed,
@@ -0,0 +1,3 @@
1
+ from dagster_datacontract.description.description import get_description
2
+
3
+ __all__ = ["get_description"]
@@ -0,0 +1,48 @@
1
+ import textwrap
2
+ from typing import Any
3
+
4
+ from datacontract.data_contract import DataContractSpecification
5
+
6
+
7
+ def get_description(
8
+ asset_name: str,
9
+ data_contract_specification: DataContractSpecification,
10
+ config: dict[str, Any] | None = None,
11
+ separator: str = "\n",
12
+ ) -> str | None:
13
+ """Load and return a formatted description string based on the data contract specification.
14
+
15
+ This method composes a description by pulling text from different parts
16
+ of the data contract specification (e.g., model and info descriptions),
17
+ joining them using the specified separator.
18
+
19
+ Args:
20
+ config (dict[str, Any] | None, optional): A configuration dictionary
21
+ specifying the order in which to concatenate the description parts.
22
+ Defaults to `{"order": ["model", "info"]}`.
23
+ separator (str, optional): A string used to separate different parts
24
+ of the description. Defaults to a newline character (`"\n"`).
25
+
26
+ Returns:
27
+ str | None: A single string combining the specified description parts
28
+ if available, otherwise `None`.
29
+ """
30
+ default_config = {"order": ["model", "info"]}
31
+
32
+ configuration = default_config | (config or {})
33
+
34
+ descriptions = {
35
+ "model": data_contract_specification.models.get(asset_name).description,
36
+ "info": data_contract_specification.info.description,
37
+ }
38
+
39
+ parts = []
40
+ for key in configuration["order"]:
41
+ desc = descriptions.get(key).replace("\n", f"{separator}\n")
42
+ if desc:
43
+ parts.append(textwrap.dedent(desc))
44
+
45
+ if parts:
46
+ return f"{separator}\n".join(parts)
47
+
48
+ return None
@@ -1,6 +1,7 @@
1
+ from dagster_datacontract.metadata.server_information import get_server_information
1
2
  from dagster_datacontract.metadata.table_colums import (
2
3
  get_column_lineage,
3
4
  get_table_column,
4
5
  )
5
6
 
6
- __all__ = ["get_table_column", "get_column_lineage"]
7
+ __all__ = ["get_table_column", "get_column_lineage", "get_server_information"]
@@ -0,0 +1,75 @@
1
+ import os
2
+ import urllib.parse
3
+
4
+ from datacontract.data_contract import DataContractSpecification
5
+
6
+
7
+ def _normalize_path(path: str) -> str:
8
+ parsed = urllib.parse.urlparse(path)
9
+
10
+ if not parsed.scheme or parsed.scheme == "file":
11
+ full_path = os.path.abspath(os.path.expanduser(path))
12
+ return f"file://{full_path}"
13
+ else:
14
+ return path
15
+
16
+
17
+ def get_server_information(
18
+ data_contract_specification: DataContractSpecification,
19
+ server_name: str | None,
20
+ asset_name: str,
21
+ ) -> dict[str, str]:
22
+ server = data_contract_specification.servers.get(server_name)
23
+ if not server:
24
+ return {}
25
+
26
+ server_information = {}
27
+ match server.type:
28
+ case "azure":
29
+ server_information["dagster/uri"] = server.location
30
+ case "databricks":
31
+ server_information["dagster/uri"] = server.host
32
+ server_information["dagster/table_name"] = (
33
+ f"{server.catalog}.{server.schema}.{asset_name}"
34
+ )
35
+ case "kafka":
36
+ server_information["dagster/uri"] = server.host
37
+ case "kinesis":
38
+ server_information = {}
39
+ case "local":
40
+ server_information["dagster/uri"] = _normalize_path(server.path)
41
+ case "oracle":
42
+ server_information["dagster/uri"] = f"{server.host}:{server.port}"
43
+ case "postgres":
44
+ server_information["dagster/uri"] = f"{server.host}:{server.port}"
45
+ server_information["dagster/table_name"] = (
46
+ f"{server.database}.{server.schema}.{asset_name}"
47
+ )
48
+ case "pubsub":
49
+ server_information = {}
50
+ case "redshift":
51
+ server_information["dagster/uri"] = server.endpoint
52
+ server_information["dagster/table_name"] = (
53
+ f"{server.database}.{server.schema}.{asset_name}"
54
+ )
55
+ case "s3":
56
+ server_information["dagster/uri"] = server.location
57
+ case "sftp":
58
+ server_information["dagster/uri"] = server.location
59
+ case "snowflake":
60
+ server_information["dagster/table_name"] = (
61
+ f"{server.database}.{server.schema}.{asset_name}"
62
+ )
63
+ case "sqlserver":
64
+ server_information["dagster/table_name"] = (
65
+ f"{server.database}.{server.schema}.{asset_name}"
66
+ )
67
+ case "trino":
68
+ server_information["dagster/uri"] = f"{server.host}:{server.port}"
69
+ server_information["dagster/table_name"] = (
70
+ f"{server.catalog}.{server.schema}.{asset_name}"
71
+ )
72
+ case _:
73
+ server_information = {}
74
+
75
+ return server_information
@@ -1,3 +1,3 @@
1
- from dagster_datacontract.tags.load_tags import get_tags
1
+ from dagster_datacontract.tags.tags import get_tags
2
2
 
3
3
  __all__ = ["get_tags"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-datacontract
3
- Version: 0.2.5
3
+ Version: 0.3.0
4
4
  Summary: Load metadata and asset check spesifications from data contracts.
5
5
  Author-email: Fredrik Bakken <fredrik@dataheim.io>
6
6
  Requires-Python: >=3.10.0
@@ -0,0 +1,13 @@
1
+ dagster_datacontract/__init__.py,sha256=UVpDzZYjmskEDJqEewMD52uWvu183O5RV9hjnP8mYAc,5488
2
+ dagster_datacontract/description/__init__.py,sha256=ulWqPp5jIPvCzaDFZcjLjcDkljJ5j_FRsE0dXhK8Wlc,104
3
+ dagster_datacontract/description/description.py,sha256=FmjgCYDpJ9UHrvAv0sAthfRohDjdG0lL1XcMKK8QMmI,1646
4
+ dagster_datacontract/metadata/__init__.py,sha256=e-xmcWWoAhmKTwosshsxnyrjI1j-UyY6YpdpzA2ggF4,269
5
+ dagster_datacontract/metadata/server_information.py,sha256=m1pv9sMfVjGIjzYVJ9R-KF-ABXCgAGKKH12dgAbm_jQ,2669
6
+ dagster_datacontract/metadata/table_colums.py,sha256=Q7ZCiMReWU4-T2YfBvtt5vvoVXEoUgzK5OPMxQEgzpQ,4013
7
+ dagster_datacontract/tags/__init__.py,sha256=2Ph-M0WbBKUjJWIzM_cEBW3SQZh7Nq8oy5MbD5bt_lc,76
8
+ dagster_datacontract/tags/tags.py,sha256=aZ_HTkc-vjJ_rofT32fT_zrLCt9x1ZGn8XoihhOMhfU,1414
9
+ dagster_datacontract-0.3.0.dist-info/licenses/LICENSE,sha256=9ULsEM1ICzCaGoso40plwO-d_SCQ7nsU6ZA4xgfaRq8,11338
10
+ dagster_datacontract-0.3.0.dist-info/METADATA,sha256=1uuc5HEqV3OYmP7jNe9TZftEcirYwKbRXbnoAOe3pWQ,3029
11
+ dagster_datacontract-0.3.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
12
+ dagster_datacontract-0.3.0.dist-info/top_level.txt,sha256=_HUQ6OJ50Q0VZxEkdocTtxk1QkJpztb1QY7A0rcvtCE,21
13
+ dagster_datacontract-0.3.0.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- dagster_datacontract/__init__.py,sha256=Be_sIJXm0gaX5lDuj5xk7FbM5hZsEmQTwHcsztmkEoo,6730
2
- dagster_datacontract/metadata/__init__.py,sha256=c5giTkxAgr-05ivY5hgsdVuA5UEDzdskVyjNczZdjHM,159
3
- dagster_datacontract/metadata/table_colums.py,sha256=Q7ZCiMReWU4-T2YfBvtt5vvoVXEoUgzK5OPMxQEgzpQ,4013
4
- dagster_datacontract/tags/__init__.py,sha256=jmHogoOunDs8YvgnXiMTSoCEGrxWbWnQCKH2_x2uQz8,81
5
- dagster_datacontract/tags/load_tags.py,sha256=aZ_HTkc-vjJ_rofT32fT_zrLCt9x1ZGn8XoihhOMhfU,1414
6
- dagster_datacontract-0.2.5.dist-info/licenses/LICENSE,sha256=9ULsEM1ICzCaGoso40plwO-d_SCQ7nsU6ZA4xgfaRq8,11338
7
- dagster_datacontract-0.2.5.dist-info/METADATA,sha256=RGX_hGPEYpNeXxF9clpc8oBoCx1XqX4H6mHwcQetL3I,3029
8
- dagster_datacontract-0.2.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
9
- dagster_datacontract-0.2.5.dist-info/top_level.txt,sha256=_HUQ6OJ50Q0VZxEkdocTtxk1QkJpztb1QY7A0rcvtCE,21
10
- dagster_datacontract-0.2.5.dist-info/RECORD,,
File without changes