dagster-datacontract 0.3.1__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/PKG-INFO +1 -1
  2. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract/__init__.py +7 -7
  3. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract/metadata/__init__.py +7 -1
  4. dagster_datacontract-0.4.0/dagster_datacontract/metadata/links.py +17 -0
  5. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract/metadata/server_information.py +32 -2
  6. dagster_datacontract-0.4.0/dagster_datacontract/owners/__init__.py +23 -0
  7. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract.egg-info/PKG-INFO +1 -1
  8. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract.egg-info/SOURCES.txt +2 -0
  9. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/pyproject.toml +1 -1
  10. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/LICENSE +0 -0
  11. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/README.md +0 -0
  12. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract/description/__init__.py +0 -0
  13. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract/description/description.py +0 -0
  14. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract/metadata/table_colums.py +0 -0
  15. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract/tags/__init__.py +0 -0
  16. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract/tags/tags.py +0 -0
  17. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract/utils/__init__.py +0 -0
  18. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract.egg-info/dependency_links.txt +0 -0
  19. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract.egg-info/requires.txt +0 -0
  20. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/dagster_datacontract.egg-info/top_level.txt +0 -0
  21. {dagster_datacontract-0.3.1 → dagster_datacontract-0.4.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-datacontract
3
- Version: 0.3.1
3
+ Version: 0.4.0
4
4
  Summary: Load metadata and asset check spesifications from data contracts.
5
5
  Author-email: Fredrik Bakken <fredrik@dataheim.io>
6
6
  Requires-Python: >=3.10.0
@@ -10,9 +10,11 @@ from loguru import logger
10
10
  from dagster_datacontract.description import get_description
11
11
  from dagster_datacontract.metadata import (
12
12
  get_column_lineage,
13
+ get_links,
13
14
  get_server_information,
14
15
  get_table_column,
15
16
  )
17
+ from dagster_datacontract.owners import get_owner
16
18
  from dagster_datacontract.tags import get_tags
17
19
  from dagster_datacontract.utils import normalize_path
18
20
 
@@ -35,7 +37,7 @@ class DataContractLoader:
35
37
  self.asset_name,
36
38
  self.data_contract_specification,
37
39
  )
38
- self.owner = self._load_owner()
40
+ self.owner = get_owner(self.data_contract_specification)
39
41
  self.version = self._load_version()
40
42
  self.cron_schedule = self._load_cron_schedule()
41
43
 
@@ -44,7 +46,7 @@ class DataContractLoader:
44
46
  ) -> dict[str, TableColumnLineage | TableSchema | Any] | None:
45
47
  metadata = (
46
48
  {
47
- "data contract path": dg.MetadataValue.url(
49
+ "datacontract/path": dg.MetadataValue.url(
48
50
  normalize_path(self.data_contract._data_contract_file)
49
51
  ),
50
52
  }
@@ -75,12 +77,10 @@ class DataContractLoader:
75
77
  )
76
78
  metadata.update(server_information)
77
79
 
78
- return metadata
79
-
80
- def _load_owner(self) -> list[str] | None:
81
- owner = self.data_contract_specification.info.owner
80
+ links = get_links(self.data_contract_specification.links)
81
+ metadata.update(links)
82
82
 
83
- return [f"team:{owner}"] if owner else None
83
+ return metadata
84
84
 
85
85
  def _load_version(self) -> str | None:
86
86
  version = self.data_contract_specification.info.version
@@ -1,7 +1,13 @@
1
+ from dagster_datacontract.metadata.links import get_links
1
2
  from dagster_datacontract.metadata.server_information import get_server_information
2
3
  from dagster_datacontract.metadata.table_colums import (
3
4
  get_column_lineage,
4
5
  get_table_column,
5
6
  )
6
7
 
7
- __all__ = ["get_table_column", "get_column_lineage", "get_server_information"]
8
+ __all__ = [
9
+ "get_column_lineage",
10
+ "get_links",
11
+ "get_table_column",
12
+ "get_server_information",
13
+ ]
@@ -0,0 +1,17 @@
1
+ import dagster as dg
2
+
3
+
4
+ def get_links(links: dict[str, str]) -> dict[str, str]:
5
+ """Return a dictionary with keys prefixed by 'link/' and values as Dagster URL metadata.
6
+
7
+ Args:
8
+ links (dict[str, str]): A dictionary where each key is a name/label and each
9
+ value is a URL string.
10
+
11
+ Returns:
12
+ dict[str, str]: A dictionary where each key is prefixed with 'link/' and
13
+ each value is a `MetadataValue.url`.
14
+ """
15
+ links = {f"link/{key}": dg.MetadataValue.url(value) for key, value in links.items()}
16
+
17
+ return links
@@ -15,6 +15,8 @@ def get_server_information(
15
15
  the specified server by name and constructs a dictionary with keys such as
16
16
  "dagster/uri" and "dagster/table_name" depending on the server type.
17
17
 
18
+ Server information can be obtained from: https://datacontract.com/#server-object
19
+
18
20
  Parameters:
19
21
  data_contract_specification (DataContractSpecification):
20
22
  The data contract specification containing server configurations.
@@ -36,43 +38,71 @@ def get_server_information(
36
38
  match server.type:
37
39
  case "azure":
38
40
  server_information["dagster/uri"] = server.location
41
+ server_information["azure/storage_account"] = server.storageAccount
42
+ server_information["file/format"] = server.format
43
+ server_information["file/delimiter"] = server.delimiter
44
+ case "bigquery":
45
+ server_information["bigquery/project"] = server.project
46
+ server_information["bigquery/dataset"] = server.dataset
39
47
  case "databricks":
40
48
  server_information["dagster/uri"] = server.host
41
49
  server_information["dagster/table_name"] = (
42
50
  f"{server.catalog}.{server.schema}.{asset_name}"
43
51
  )
52
+ case "glue":
53
+ server_information = {}
44
54
  case "kafka":
45
55
  server_information["dagster/uri"] = server.host
56
+ server_information["kafka/topic"] = server.topic
57
+ server_information["kafka/format"] = server.format
46
58
  case "kinesis":
47
- server_information = {}
59
+ server_information["kinesis/stream"] = server.stream
60
+ server_information["kinesis/region"] = server.region
61
+ server_information["kinesis/format"] = server.format
48
62
  case "local":
49
63
  server_information["dagster/uri"] = normalize_path(server.path)
64
+ server_information["file/format"] = server.format
50
65
  case "oracle":
51
66
  server_information["dagster/uri"] = f"{server.host}:{server.port}"
67
+ server_information["oracle/service_name"] = server.serviceName
52
68
  case "postgres":
53
69
  server_information["dagster/uri"] = f"{server.host}:{server.port}"
54
70
  server_information["dagster/table_name"] = (
55
71
  f"{server.database}.{server.schema}.{asset_name}"
56
72
  )
57
73
  case "pubsub":
58
- server_information = {}
74
+ server_information["pubsub/project"] = server.project
75
+ server_information["pubsub/topic"] = server.topic
59
76
  case "redshift":
60
77
  server_information["dagster/uri"] = server.endpoint
61
78
  server_information["dagster/table_name"] = (
62
79
  f"{server.database}.{server.schema}.{asset_name}"
63
80
  )
81
+ server_information["redshift/account"] = server.account
82
+ server_information["redshift/host"] = server.host
83
+ server_information["redshift/port"] = server.port
84
+ server_information["redshift/cluster"] = server.clusterIdentifier
64
85
  case "s3":
65
86
  server_information["dagster/uri"] = server.location
87
+ server_information["s3/endpoint"] = server.endpointUrl
88
+ server_information["file/format"] = server.format
89
+ server_information["file/delimiter"] = server.delimiter
66
90
  case "sftp":
67
91
  server_information["dagster/uri"] = server.location
92
+ server_information["file/format"] = server.format
93
+ server_information["file/delimiter"] = server.delimiter
68
94
  case "snowflake":
69
95
  server_information["dagster/table_name"] = (
70
96
  f"{server.database}.{server.schema}.{asset_name}"
71
97
  )
98
+ server_information["snowflake/account"] = server.account
72
99
  case "sqlserver":
73
100
  server_information["dagster/table_name"] = (
74
101
  f"{server.database}.{server.schema}.{asset_name}"
75
102
  )
103
+ server_information["sqlserver/host"] = server.host
104
+ server_information["sqlserver/port"] = server.port
105
+ server_information["sqlserver/driver"] = server.driver
76
106
  case "trino":
77
107
  server_information["dagster/uri"] = f"{server.host}:{server.port}"
78
108
  server_information["dagster/table_name"] = (
@@ -0,0 +1,23 @@
1
+ from datacontract.data_contract import DataContractSpecification
2
+
3
+
4
+ def get_owner(
5
+ data_contract_specification: DataContractSpecification,
6
+ is_team: bool = True,
7
+ ) -> list[str] | None:
8
+ """Return the owner of a data contract, optionally formatted as a team identifier.
9
+
10
+ Args:
11
+ data_contract_specification (DataContractSpecification): The data contract specification containing ownership metadata.
12
+ is_team (bool, optional): If True, formats the owner as a team identifier (e.g., 'team:owner').
13
+ If False, returns the raw owner string. Defaults to True.
14
+
15
+ Returns:
16
+ list[str] | None: A list containing the owner string, formatted depending on `is_team`, or None if no owner is found.
17
+ """
18
+ owner = data_contract_specification.info.owner
19
+
20
+ if is_team:
21
+ return [f"team:{owner}"]
22
+
23
+ return [owner]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-datacontract
3
- Version: 0.3.1
3
+ Version: 0.4.0
4
4
  Summary: Load metadata and asset check spesifications from data contracts.
5
5
  Author-email: Fredrik Bakken <fredrik@dataheim.io>
6
6
  Requires-Python: >=3.10.0
@@ -10,8 +10,10 @@ dagster_datacontract.egg-info/top_level.txt
10
10
  dagster_datacontract/description/__init__.py
11
11
  dagster_datacontract/description/description.py
12
12
  dagster_datacontract/metadata/__init__.py
13
+ dagster_datacontract/metadata/links.py
13
14
  dagster_datacontract/metadata/server_information.py
14
15
  dagster_datacontract/metadata/table_colums.py
16
+ dagster_datacontract/owners/__init__.py
15
17
  dagster_datacontract/tags/__init__.py
16
18
  dagster_datacontract/tags/tags.py
17
19
  dagster_datacontract/utils/__init__.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dagster-datacontract"
3
- version = "0.3.1"
3
+ version = "0.4.0"
4
4
  description = "Load metadata and asset check spesifications from data contracts."
5
5
  authors = [
6
6
  { name = "Fredrik Bakken", email = "fredrik@dataheim.io" }