dagster-datacontract 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/PKG-INFO +1 -1
  2. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/dagster_datacontract/__init__.py +23 -17
  3. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/dagster_datacontract/metadata/__init__.py +7 -1
  4. dagster_datacontract-0.4.0/dagster_datacontract/metadata/links.py +17 -0
  5. dagster_datacontract-0.4.0/dagster_datacontract/metadata/server_information.py +114 -0
  6. dagster_datacontract-0.4.0/dagster_datacontract/owners/__init__.py +23 -0
  7. dagster_datacontract-0.4.0/dagster_datacontract/utils/__init__.py +28 -0
  8. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/dagster_datacontract.egg-info/PKG-INFO +1 -1
  9. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/dagster_datacontract.egg-info/SOURCES.txt +4 -1
  10. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/pyproject.toml +1 -1
  11. dagster_datacontract-0.3.0/dagster_datacontract/metadata/server_information.py +0 -75
  12. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/LICENSE +0 -0
  13. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/README.md +0 -0
  14. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/dagster_datacontract/description/__init__.py +0 -0
  15. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/dagster_datacontract/description/description.py +0 -0
  16. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/dagster_datacontract/metadata/table_colums.py +0 -0
  17. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/dagster_datacontract/tags/__init__.py +0 -0
  18. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/dagster_datacontract/tags/tags.py +0 -0
  19. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/dagster_datacontract.egg-info/dependency_links.txt +0 -0
  20. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/dagster_datacontract.egg-info/requires.txt +0 -0
  21. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/dagster_datacontract.egg-info/top_level.txt +0 -0
  22. {dagster_datacontract-0.3.0 → dagster_datacontract-0.4.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-datacontract
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Load metadata and asset check spesifications from data contracts.
5
5
  Author-email: Fredrik Bakken <fredrik@dataheim.io>
6
6
  Requires-Python: >=3.10.0
@@ -10,10 +10,13 @@ from loguru import logger
10
10
  from dagster_datacontract.description import get_description
11
11
  from dagster_datacontract.metadata import (
12
12
  get_column_lineage,
13
+ get_links,
13
14
  get_server_information,
14
15
  get_table_column,
15
16
  )
17
+ from dagster_datacontract.owners import get_owner
16
18
  from dagster_datacontract.tags import get_tags
19
+ from dagster_datacontract.utils import normalize_path
17
20
 
18
21
 
19
22
  class DataContractLoader:
@@ -34,13 +37,22 @@ class DataContractLoader:
34
37
  self.asset_name,
35
38
  self.data_contract_specification,
36
39
  )
37
- self.owner = self._load_owner()
40
+ self.owner = get_owner(self.data_contract_specification)
38
41
  self.version = self._load_version()
39
42
  self.cron_schedule = self._load_cron_schedule()
40
43
 
41
44
  def _load_metadata(
42
45
  self,
43
46
  ) -> dict[str, TableColumnLineage | TableSchema | Any] | None:
47
+ metadata = (
48
+ {
49
+ "datacontract/path": dg.MetadataValue.url(
50
+ normalize_path(self.data_contract._data_contract_file)
51
+ ),
52
+ }
53
+ if self.data_contract._data_contract_file
54
+ else {}
55
+ )
44
56
  columns = []
45
57
  deps_by_column = {}
46
58
 
@@ -53,24 +65,22 @@ class DataContractLoader:
53
65
  table_column_lineage = get_column_lineage(column_field)
54
66
  deps_by_column[column_name] = table_column_lineage
55
67
 
68
+ metadata["dagster/column_schema"] = dg.TableSchema(columns=columns)
69
+ metadata["dagster/column_lineage"] = dg.TableColumnLineage(
70
+ deps_by_column=deps_by_column
71
+ )
72
+
56
73
  server_information = get_server_information(
57
74
  self.data_contract_specification,
58
75
  self.data_contract._server,
59
76
  self.asset_name,
60
77
  )
78
+ metadata.update(server_information)
61
79
 
62
- return {
63
- "dagster/column_schema": dg.TableSchema(columns=columns),
64
- "dagster/column_lineage": dg.TableColumnLineage(
65
- deps_by_column=deps_by_column
66
- ),
67
- **server_information,
68
- }
80
+ links = get_links(self.data_contract_specification.links)
81
+ metadata.update(links)
69
82
 
70
- def _load_owner(self) -> list[str] | None:
71
- owner = self.data_contract_specification.info.owner
72
-
73
- return [f"team:{owner}"] if owner else None
83
+ return metadata
74
84
 
75
85
  def _load_version(self) -> str | None:
76
86
  version = self.data_contract_specification.info.version
@@ -108,11 +118,7 @@ class DataContractLoader:
108
118
  blocking=True,
109
119
  )
110
120
  def check_asset():
111
- data_contract = DataContract(
112
- data_contract=self.data_contract_specification,
113
- server=self.server_name,
114
- )
115
- run = data_contract.test()
121
+ run = self.data_contract.test()
116
122
 
117
123
  return dg.AssetCheckResult(
118
124
  passed=run.result == ResultEnum.passed,
@@ -1,7 +1,13 @@
1
+ from dagster_datacontract.metadata.links import get_links
1
2
  from dagster_datacontract.metadata.server_information import get_server_information
2
3
  from dagster_datacontract.metadata.table_colums import (
3
4
  get_column_lineage,
4
5
  get_table_column,
5
6
  )
6
7
 
7
- __all__ = ["get_table_column", "get_column_lineage", "get_server_information"]
8
+ __all__ = [
9
+ "get_column_lineage",
10
+ "get_links",
11
+ "get_table_column",
12
+ "get_server_information",
13
+ ]
@@ -0,0 +1,17 @@
1
+ import dagster as dg
2
+
3
+
4
+ def get_links(links: dict[str, str]) -> dict[str, str]:
5
+ """Return a dictionary with keys prefixed by 'link/' and values as Dagster URL metadata.
6
+
7
+ Args:
8
+ links (dict[str, str]): A dictionary where each key is a name/label and each
9
+ value is a URL string.
10
+
11
+ Returns:
12
+ dict[str, str]: A dictionary where each key is prefixed with 'link/' and
13
+ each value is a `MetadataValue.url`.
14
+ """
15
+ links = {f"link/{key}": dg.MetadataValue.url(value) for key, value in links.items()}
16
+
17
+ return links
@@ -0,0 +1,114 @@
1
+ from datacontract.data_contract import DataContractSpecification
2
+
3
+ from dagster_datacontract.utils import normalize_path
4
+
5
+
6
+ def get_server_information(
7
+ data_contract_specification: DataContractSpecification,
8
+ server_name: str | None,
9
+ asset_name: str,
10
+ ) -> dict[str, str]:
11
+ """Returns a dictionary containing server-specific information to be used
12
+ by Dagster for identifying asset locations or connections.
13
+
14
+ This function inspects the provided `DataContractSpecification` to locate
15
+ the specified server by name and constructs a dictionary with keys such as
16
+ "dagster/uri" and "dagster/table_name" depending on the server type.
17
+
18
+ Server information can be obtained from: https://datacontract.com/#server-object
19
+
20
+ Parameters:
21
+ data_contract_specification (DataContractSpecification):
22
+ The data contract specification containing server configurations.
23
+ server_name (str | None):
24
+ The name of the server to retrieve information for. If None or not found, returns an empty dict.
25
+ asset_name (str):
26
+ The name of the asset, used for constructing fully qualified table names for certain server types.
27
+
28
+ Returns:
29
+ dict[str, str]: A dictionary with keys like "dagster/uri" and/or "dagster/table_name"
30
+ depending on the server type. Returns an empty dictionary if the server is not found
31
+ or if the server type is not recognized or unsupported.
32
+ """
33
+ server = data_contract_specification.servers.get(server_name)
34
+ if not server:
35
+ return {}
36
+
37
+ server_information = {}
38
+ match server.type:
39
+ case "azure":
40
+ server_information["dagster/uri"] = server.location
41
+ server_information["azure/storage_account"] = server.storageAccount
42
+ server_information["file/format"] = server.format
43
+ server_information["file/delimiter"] = server.delimiter
44
+ case "bigquery":
45
+ server_information["bigquery/project"] = server.project
46
+ server_information["bigquery/dataset"] = server.dataset
47
+ case "databricks":
48
+ server_information["dagster/uri"] = server.host
49
+ server_information["dagster/table_name"] = (
50
+ f"{server.catalog}.{server.schema}.{asset_name}"
51
+ )
52
+ case "glue":
53
+ server_information = {}
54
+ case "kafka":
55
+ server_information["dagster/uri"] = server.host
56
+ server_information["kafka/topic"] = server.topic
57
+ server_information["kafka/format"] = server.format
58
+ case "kinesis":
59
+ server_information["kinesis/stream"] = server.stream
60
+ server_information["kinesis/region"] = server.region
61
+ server_information["kinesis/format"] = server.format
62
+ case "local":
63
+ server_information["dagster/uri"] = normalize_path(server.path)
64
+ server_information["file/format"] = server.format
65
+ case "oracle":
66
+ server_information["dagster/uri"] = f"{server.host}:{server.port}"
67
+ server_information["oracle/service_name"] = server.serviceName
68
+ case "postgres":
69
+ server_information["dagster/uri"] = f"{server.host}:{server.port}"
70
+ server_information["dagster/table_name"] = (
71
+ f"{server.database}.{server.schema}.{asset_name}"
72
+ )
73
+ case "pubsub":
74
+ server_information["pubsub/project"] = server.project
75
+ server_information["pubsub/topic"] = server.topic
76
+ case "redshift":
77
+ server_information["dagster/uri"] = server.endpoint
78
+ server_information["dagster/table_name"] = (
79
+ f"{server.database}.{server.schema}.{asset_name}"
80
+ )
81
+ server_information["redshift/account"] = server.account
82
+ server_information["redshift/host"] = server.host
83
+ server_information["redshift/port"] = server.port
84
+ server_information["redshift/cluster"] = server.clusterIdentifier
85
+ case "s3":
86
+ server_information["dagster/uri"] = server.location
87
+ server_information["s3/endpoint"] = server.endpointUrl
88
+ server_information["file/format"] = server.format
89
+ server_information["file/delimiter"] = server.delimiter
90
+ case "sftp":
91
+ server_information["dagster/uri"] = server.location
92
+ server_information["file/format"] = server.format
93
+ server_information["file/delimiter"] = server.delimiter
94
+ case "snowflake":
95
+ server_information["dagster/table_name"] = (
96
+ f"{server.database}.{server.schema}.{asset_name}"
97
+ )
98
+ server_information["snowflake/account"] = server.account
99
+ case "sqlserver":
100
+ server_information["dagster/table_name"] = (
101
+ f"{server.database}.{server.schema}.{asset_name}"
102
+ )
103
+ server_information["sqlserver/host"] = server.host
104
+ server_information["sqlserver/port"] = server.port
105
+ server_information["sqlserver/driver"] = server.driver
106
+ case "trino":
107
+ server_information["dagster/uri"] = f"{server.host}:{server.port}"
108
+ server_information["dagster/table_name"] = (
109
+ f"{server.catalog}.{server.schema}.{asset_name}"
110
+ )
111
+ case _:
112
+ server_information = {}
113
+
114
+ return server_information
@@ -0,0 +1,23 @@
1
+ from datacontract.data_contract import DataContractSpecification
2
+
3
+
4
+ def get_owner(
5
+ data_contract_specification: DataContractSpecification,
6
+ is_team: bool = True,
7
+ ) -> list[str] | None:
8
+ """Return the owner of a data contract, optionally formatted as a team identifier.
9
+
10
+ Args:
11
+ data_contract_specification (DataContractSpecification): The data contract specification containing ownership metadata.
12
+ is_team (bool, optional): If True, formats the owner as a team identifier (e.g., 'team:owner').
13
+ If False, returns the raw owner string. Defaults to True.
14
+
15
+ Returns:
16
+ list[str] | None: A list containing the owner string, formatted depending on `is_team`, or None if no owner is found.
17
+ """
18
+ owner = data_contract_specification.info.owner
19
+
20
+ if is_team:
21
+ return [f"team:{owner}"]
22
+
23
+ return [owner]
@@ -0,0 +1,28 @@
1
+ import os
2
+ import urllib.parse
3
+
4
+
5
+ def normalize_path(path: str) -> str:
6
+ """Normalizes a file path to ensure it is returned in a consistent URI format.
7
+
8
+ This function checks if the provided path is a local file path (with no scheme
9
+ or with the 'file' scheme) and converts it into a fully qualified file URI.
10
+ If the path already has a non-'file' scheme (e.g., 's3://', 'http://'),
11
+ it is returned unchanged.
12
+
13
+ Parameters:
14
+ path (str): The input file path. This can be a relative or absolute local path,
15
+ a path starting with `~`, or a URI with a supported scheme.
16
+
17
+ Returns:
18
+ str: A normalized path string:
19
+ - If the input is a local path or has a "file" scheme, returns it in the form "file:///absolute/path".
20
+ - If the input has another scheme (e.g., "s3://", "http://"), returns it unchanged.
21
+ """
22
+ parsed = urllib.parse.urlparse(path)
23
+
24
+ if not parsed.scheme or parsed.scheme == "file":
25
+ full_path = os.path.abspath(os.path.expanduser(path))
26
+ return f"file://{full_path}"
27
+ else:
28
+ return path
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-datacontract
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Load metadata and asset check spesifications from data contracts.
5
5
  Author-email: Fredrik Bakken <fredrik@dataheim.io>
6
6
  Requires-Python: >=3.10.0
@@ -10,7 +10,10 @@ dagster_datacontract.egg-info/top_level.txt
10
10
  dagster_datacontract/description/__init__.py
11
11
  dagster_datacontract/description/description.py
12
12
  dagster_datacontract/metadata/__init__.py
13
+ dagster_datacontract/metadata/links.py
13
14
  dagster_datacontract/metadata/server_information.py
14
15
  dagster_datacontract/metadata/table_colums.py
16
+ dagster_datacontract/owners/__init__.py
15
17
  dagster_datacontract/tags/__init__.py
16
- dagster_datacontract/tags/tags.py
18
+ dagster_datacontract/tags/tags.py
19
+ dagster_datacontract/utils/__init__.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dagster-datacontract"
3
- version = "0.3.0"
3
+ version = "0.4.0"
4
4
  description = "Load metadata and asset check spesifications from data contracts."
5
5
  authors = [
6
6
  { name = "Fredrik Bakken", email = "fredrik@dataheim.io" }
@@ -1,75 +0,0 @@
1
- import os
2
- import urllib.parse
3
-
4
- from datacontract.data_contract import DataContractSpecification
5
-
6
-
7
- def _normalize_path(path: str) -> str:
8
- parsed = urllib.parse.urlparse(path)
9
-
10
- if not parsed.scheme or parsed.scheme == "file":
11
- full_path = os.path.abspath(os.path.expanduser(path))
12
- return f"file://{full_path}"
13
- else:
14
- return path
15
-
16
-
17
- def get_server_information(
18
- data_contract_specification: DataContractSpecification,
19
- server_name: str | None,
20
- asset_name: str,
21
- ) -> dict[str, str]:
22
- server = data_contract_specification.servers.get(server_name)
23
- if not server:
24
- return {}
25
-
26
- server_information = {}
27
- match server.type:
28
- case "azure":
29
- server_information["dagster/uri"] = server.location
30
- case "databricks":
31
- server_information["dagster/uri"] = server.host
32
- server_information["dagster/table_name"] = (
33
- f"{server.catalog}.{server.schema}.{asset_name}"
34
- )
35
- case "kafka":
36
- server_information["dagster/uri"] = server.host
37
- case "kinesis":
38
- server_information = {}
39
- case "local":
40
- server_information["dagster/uri"] = _normalize_path(server.path)
41
- case "oracle":
42
- server_information["dagster/uri"] = f"{server.host}:{server.port}"
43
- case "postgres":
44
- server_information["dagster/uri"] = f"{server.host}:{server.port}"
45
- server_information["dagster/table_name"] = (
46
- f"{server.database}.{server.schema}.{asset_name}"
47
- )
48
- case "pubsub":
49
- server_information = {}
50
- case "redshift":
51
- server_information["dagster/uri"] = server.endpoint
52
- server_information["dagster/table_name"] = (
53
- f"{server.database}.{server.schema}.{asset_name}"
54
- )
55
- case "s3":
56
- server_information["dagster/uri"] = server.location
57
- case "sftp":
58
- server_information["dagster/uri"] = server.location
59
- case "snowflake":
60
- server_information["dagster/table_name"] = (
61
- f"{server.database}.{server.schema}.{asset_name}"
62
- )
63
- case "sqlserver":
64
- server_information["dagster/table_name"] = (
65
- f"{server.database}.{server.schema}.{asset_name}"
66
- )
67
- case "trino":
68
- server_information["dagster/uri"] = f"{server.host}:{server.port}"
69
- server_information["dagster/table_name"] = (
70
- f"{server.catalog}.{server.schema}.{asset_name}"
71
- )
72
- case _:
73
- server_information = {}
74
-
75
- return server_information