dagster-datacontract 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- import textwrap
2
1
  from datetime import timedelta
3
2
  from typing import Any
4
3
 
@@ -8,11 +7,14 @@ from datacontract.data_contract import DataContract
8
7
  from datacontract.model.run import ResultEnum
9
8
  from loguru import logger
10
9
 
10
+ from dagster_datacontract.description import get_description
11
11
  from dagster_datacontract.metadata import (
12
12
  get_column_lineage,
13
+ get_server_information,
13
14
  get_table_column,
14
15
  )
15
16
  from dagster_datacontract.tags import get_tags
17
+ from dagster_datacontract.utils import normalize_path
16
18
 
17
19
 
18
20
  class DataContractLoader:
@@ -29,7 +31,10 @@ class DataContractLoader:
29
31
  )
30
32
  self.metadata = self._load_metadata()
31
33
  self.tags = get_tags(self.data_contract_specification.tags)
32
- self.description = self.load_description()
34
+ self.description = get_description(
35
+ self.asset_name,
36
+ self.data_contract_specification,
37
+ )
33
38
  self.owner = self._load_owner()
34
39
  self.version = self._load_version()
35
40
  self.cron_schedule = self._load_cron_schedule()
@@ -37,6 +42,15 @@ class DataContractLoader:
37
42
  def _load_metadata(
38
43
  self,
39
44
  ) -> dict[str, TableColumnLineage | TableSchema | Any] | None:
45
+ metadata = (
46
+ {
47
+ "data contract path": dg.MetadataValue.url(
48
+ normalize_path(self.data_contract._data_contract_file)
49
+ ),
50
+ }
51
+ if self.data_contract._data_contract_file
52
+ else {}
53
+ )
40
54
  columns = []
41
55
  deps_by_column = {}
42
56
 
@@ -49,12 +63,19 @@ class DataContractLoader:
49
63
  table_column_lineage = get_column_lineage(column_field)
50
64
  deps_by_column[column_name] = table_column_lineage
51
65
 
52
- return {
53
- "dagster/column_schema": dg.TableSchema(columns=columns),
54
- "dagster/column_lineage": dg.TableColumnLineage(
55
- deps_by_column=deps_by_column
56
- ),
57
- }
66
+ metadata["dagster/column_schema"] = dg.TableSchema(columns=columns)
67
+ metadata["dagster/column_lineage"] = dg.TableColumnLineage(
68
+ deps_by_column=deps_by_column
69
+ )
70
+
71
+ server_information = get_server_information(
72
+ self.data_contract_specification,
73
+ self.data_contract._server,
74
+ self.asset_name,
75
+ )
76
+ metadata.update(server_information)
77
+
78
+ return metadata
58
79
 
59
80
  def _load_owner(self) -> list[str] | None:
60
81
  owner = self.data_contract_specification.info.owner
@@ -76,55 +97,6 @@ class DataContractLoader:
76
97
  logger.warning("'servicelevels.frequency.cron' not found in Data Contract.")
77
98
  return None
78
99
 
79
- def load_description(
80
- self,
81
- config: dict[str, Any] | None = None,
82
- separator: str = "\n",
83
- ) -> str | None:
84
- """Load and return a formatted description string based on the data contract specification.
85
-
86
- This method composes a description by pulling text from different parts
87
- of the data contract specification (e.g., model and info descriptions),
88
- joining them using the specified separator.
89
-
90
- Args:
91
- config (dict[str, Any] | None, optional): A configuration dictionary
92
- specifying the order in which to concatenate the description parts.
93
- Defaults to `{"order": ["model", "info"]}`.
94
- separator (str, optional): A string used to separate different parts
95
- of the description. Defaults to a newline character (`"\n"`).
96
-
97
- Returns:
98
- str | None: A single string combining the specified description parts
99
- if available, otherwise `None`.
100
-
101
-
102
- Example:
103
- >>> self.load_description()
104
- 'Model description...\nInfo description...'
105
- """
106
- default_config = {"order": ["model", "info"]}
107
-
108
- configuration = default_config | (config or {})
109
-
110
- descriptions = {
111
- "model": self.data_contract_specification.models.get(
112
- self.asset_name
113
- ).description,
114
- "info": self.data_contract_specification.info.description,
115
- }
116
-
117
- parts = []
118
- for key in configuration["order"]:
119
- desc = descriptions.get(key).replace("\n", f"{separator}\n")
120
- if desc:
121
- parts.append(textwrap.dedent(desc))
122
-
123
- if parts:
124
- return f"{separator}\n".join(parts)
125
-
126
- return None
127
-
128
100
  def load_data_quality_checks(self) -> dg.AssetChecksDefinition:
129
101
  """Define and return a data quality check for the specified asset.
130
102
 
@@ -0,0 +1,3 @@
1
+ from dagster_datacontract.description.description import get_description
2
+
3
+ __all__ = ["get_description"]
@@ -0,0 +1,48 @@
1
+ import textwrap
2
+ from typing import Any
3
+
4
+ from datacontract.data_contract import DataContractSpecification
5
+
6
+
7
+ def get_description(
8
+ asset_name: str,
9
+ data_contract_specification: DataContractSpecification,
10
+ config: dict[str, Any] | None = None,
11
+ separator: str = "\n",
12
+ ) -> str | None:
13
+ """Load and return a formatted description string based on the data contract specification.
14
+
15
+ This method composes a description by pulling text from different parts
16
+ of the data contract specification (e.g., model and info descriptions),
17
+ joining them using the specified separator.
18
+
19
+ Args:
20
+ config (dict[str, Any] | None, optional): A configuration dictionary
21
+ specifying the order in which to concatenate the description parts.
22
+ Defaults to `{"order": ["model", "info"]}`.
23
+ separator (str, optional): A string used to separate different parts
24
+ of the description. Defaults to a newline character (`"\n"`).
25
+
26
+ Returns:
27
+ str | None: A single string combining the specified description parts
28
+ if available, otherwise `None`.
29
+ """
30
+ default_config = {"order": ["model", "info"]}
31
+
32
+ configuration = default_config | (config or {})
33
+
34
+ descriptions = {
35
+ "model": data_contract_specification.models.get(asset_name).description,
36
+ "info": data_contract_specification.info.description,
37
+ }
38
+
39
+ parts = []
40
+ for key in configuration["order"]:
41
+ desc = descriptions.get(key).replace("\n", f"{separator}\n")
42
+ if desc:
43
+ parts.append(textwrap.dedent(desc))
44
+
45
+ if parts:
46
+ return f"{separator}\n".join(parts)
47
+
48
+ return None
@@ -1,6 +1,7 @@
1
+ from dagster_datacontract.metadata.server_information import get_server_information
1
2
  from dagster_datacontract.metadata.table_colums import (
2
3
  get_column_lineage,
3
4
  get_table_column,
4
5
  )
5
6
 
6
- __all__ = ["get_table_column", "get_column_lineage"]
7
+ __all__ = ["get_table_column", "get_column_lineage", "get_server_information"]
@@ -0,0 +1,84 @@
1
+ from datacontract.data_contract import DataContractSpecification
2
+
3
+ from dagster_datacontract.utils import normalize_path
4
+
5
+
6
+ def get_server_information(
7
+ data_contract_specification: DataContractSpecification,
8
+ server_name: str | None,
9
+ asset_name: str,
10
+ ) -> dict[str, str]:
11
+ """Returns a dictionary containing server-specific information to be used
12
+ by Dagster for identifying asset locations or connections.
13
+
14
+ This function inspects the provided `DataContractSpecification` to locate
15
+ the specified server by name and constructs a dictionary with keys such as
16
+ "dagster/uri" and "dagster/table_name" depending on the server type.
17
+
18
+ Parameters:
19
+ data_contract_specification (DataContractSpecification):
20
+ The data contract specification containing server configurations.
21
+ server_name (str | None):
22
+ The name of the server to retrieve information for. If None or not found, returns an empty dict.
23
+ asset_name (str):
24
+ The name of the asset, used for constructing fully qualified table names for certain server types.
25
+
26
+ Returns:
27
+ dict[str, str]: A dictionary with keys like "dagster/uri" and/or "dagster/table_name"
28
+ depending on the server type. Returns an empty dictionary if the server is not found
29
+ or if the server type is not recognized or unsupported.
30
+ """
31
+ server = data_contract_specification.servers.get(server_name)
32
+ if not server:
33
+ return {}
34
+
35
+ server_information = {}
36
+ match server.type:
37
+ case "azure":
38
+ server_information["dagster/uri"] = server.location
39
+ case "databricks":
40
+ server_information["dagster/uri"] = server.host
41
+ server_information["dagster/table_name"] = (
42
+ f"{server.catalog}.{server.schema}.{asset_name}"
43
+ )
44
+ case "kafka":
45
+ server_information["dagster/uri"] = server.host
46
+ case "kinesis":
47
+ server_information = {}
48
+ case "local":
49
+ server_information["dagster/uri"] = normalize_path(server.path)
50
+ case "oracle":
51
+ server_information["dagster/uri"] = f"{server.host}:{server.port}"
52
+ case "postgres":
53
+ server_information["dagster/uri"] = f"{server.host}:{server.port}"
54
+ server_information["dagster/table_name"] = (
55
+ f"{server.database}.{server.schema}.{asset_name}"
56
+ )
57
+ case "pubsub":
58
+ server_information = {}
59
+ case "redshift":
60
+ server_information["dagster/uri"] = server.endpoint
61
+ server_information["dagster/table_name"] = (
62
+ f"{server.database}.{server.schema}.{asset_name}"
63
+ )
64
+ case "s3":
65
+ server_information["dagster/uri"] = server.location
66
+ case "sftp":
67
+ server_information["dagster/uri"] = server.location
68
+ case "snowflake":
69
+ server_information["dagster/table_name"] = (
70
+ f"{server.database}.{server.schema}.{asset_name}"
71
+ )
72
+ case "sqlserver":
73
+ server_information["dagster/table_name"] = (
74
+ f"{server.database}.{server.schema}.{asset_name}"
75
+ )
76
+ case "trino":
77
+ server_information["dagster/uri"] = f"{server.host}:{server.port}"
78
+ server_information["dagster/table_name"] = (
79
+ f"{server.catalog}.{server.schema}.{asset_name}"
80
+ )
81
+ case _:
82
+ server_information = {}
83
+
84
+ return server_information
@@ -1,3 +1,3 @@
1
- from dagster_datacontract.tags.load_tags import get_tags
1
+ from dagster_datacontract.tags.tags import get_tags
2
2
 
3
3
  __all__ = ["get_tags"]
@@ -0,0 +1,28 @@
1
+ import os
2
+ import urllib.parse
3
+
4
+
5
+ def normalize_path(path: str) -> str:
6
+ """Normalizes a file path to ensure it is returned in a consistent URI format.
7
+
8
+ This function checks if the provided path is a local file path (with no scheme
9
+ or with the 'file' scheme) and converts it into a fully qualified file URI.
10
+ If the path already has a non-'file' scheme (e.g., 's3://', 'http://'),
11
+ it is returned unchanged.
12
+
13
+ Parameters:
14
+ path (str): The input file path. This can be a relative or absolute local path,
15
+ a path starting with `~`, or a URI with a supported scheme.
16
+
17
+ Returns:
18
+ str: A normalized path string:
19
+ - If the input is a local path or has a "file" scheme, returns it in the form "file:///absolute/path".
20
+ - If the input has another scheme (e.g., "s3://", "http://"), returns it unchanged.
21
+ """
22
+ parsed = urllib.parse.urlparse(path)
23
+
24
+ if not parsed.scheme or parsed.scheme == "file":
25
+ full_path = os.path.abspath(os.path.expanduser(path))
26
+ return f"file://{full_path}"
27
+ else:
28
+ return path
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-datacontract
3
- Version: 0.2.5
3
+ Version: 0.3.1
4
4
  Summary: Load metadata and asset check spesifications from data contracts.
5
5
  Author-email: Fredrik Bakken <fredrik@dataheim.io>
6
6
  Requires-Python: >=3.10.0
@@ -0,0 +1,14 @@
1
+ dagster_datacontract/__init__.py,sha256=Pzq4AblLnDhW0QBmV8ntV5KLD0a7D0-mA2Lw9hjS_V8,5685
2
+ dagster_datacontract/description/__init__.py,sha256=ulWqPp5jIPvCzaDFZcjLjcDkljJ5j_FRsE0dXhK8Wlc,104
3
+ dagster_datacontract/description/description.py,sha256=FmjgCYDpJ9UHrvAv0sAthfRohDjdG0lL1XcMKK8QMmI,1646
4
+ dagster_datacontract/metadata/__init__.py,sha256=e-xmcWWoAhmKTwosshsxnyrjI1j-UyY6YpdpzA2ggF4,269
5
+ dagster_datacontract/metadata/server_information.py,sha256=jk_H8aI5PdGzIeoYThlhhZMSOtBh-6xc8QAFz0BFesU,3512
6
+ dagster_datacontract/metadata/table_colums.py,sha256=Q7ZCiMReWU4-T2YfBvtt5vvoVXEoUgzK5OPMxQEgzpQ,4013
7
+ dagster_datacontract/tags/__init__.py,sha256=2Ph-M0WbBKUjJWIzM_cEBW3SQZh7Nq8oy5MbD5bt_lc,76
8
+ dagster_datacontract/tags/tags.py,sha256=aZ_HTkc-vjJ_rofT32fT_zrLCt9x1ZGn8XoihhOMhfU,1414
9
+ dagster_datacontract/utils/__init__.py,sha256=Zfbuf20Eorf7BD8gSDASiPqgPwWu8Mz03r3aa2zE4NA,1106
10
+ dagster_datacontract-0.3.1.dist-info/licenses/LICENSE,sha256=9ULsEM1ICzCaGoso40plwO-d_SCQ7nsU6ZA4xgfaRq8,11338
11
+ dagster_datacontract-0.3.1.dist-info/METADATA,sha256=P98oLsqB-GCJz4uanki-xtG3UnZCHD0biDMUYIZMEhc,3029
12
+ dagster_datacontract-0.3.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
13
+ dagster_datacontract-0.3.1.dist-info/top_level.txt,sha256=_HUQ6OJ50Q0VZxEkdocTtxk1QkJpztb1QY7A0rcvtCE,21
14
+ dagster_datacontract-0.3.1.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- dagster_datacontract/__init__.py,sha256=Be_sIJXm0gaX5lDuj5xk7FbM5hZsEmQTwHcsztmkEoo,6730
2
- dagster_datacontract/metadata/__init__.py,sha256=c5giTkxAgr-05ivY5hgsdVuA5UEDzdskVyjNczZdjHM,159
3
- dagster_datacontract/metadata/table_colums.py,sha256=Q7ZCiMReWU4-T2YfBvtt5vvoVXEoUgzK5OPMxQEgzpQ,4013
4
- dagster_datacontract/tags/__init__.py,sha256=jmHogoOunDs8YvgnXiMTSoCEGrxWbWnQCKH2_x2uQz8,81
5
- dagster_datacontract/tags/load_tags.py,sha256=aZ_HTkc-vjJ_rofT32fT_zrLCt9x1ZGn8XoihhOMhfU,1414
6
- dagster_datacontract-0.2.5.dist-info/licenses/LICENSE,sha256=9ULsEM1ICzCaGoso40plwO-d_SCQ7nsU6ZA4xgfaRq8,11338
7
- dagster_datacontract-0.2.5.dist-info/METADATA,sha256=RGX_hGPEYpNeXxF9clpc8oBoCx1XqX4H6mHwcQetL3I,3029
8
- dagster_datacontract-0.2.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
9
- dagster_datacontract-0.2.5.dist-info/top_level.txt,sha256=_HUQ6OJ50Q0VZxEkdocTtxk1QkJpztb1QY7A0rcvtCE,21
10
- dagster_datacontract-0.2.5.dist-info/RECORD,,
File without changes