dagster-datacontract 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,6 @@ from datetime import timedelta
2
2
  from typing import Any
3
3
 
4
4
  import dagster as dg
5
- from dagster import TableColumnLineage, TableSchema
6
5
  from datacontract.data_contract import DataContract
7
6
  from datacontract.model.run import ResultEnum
8
7
  from loguru import logger
@@ -16,18 +15,28 @@ from dagster_datacontract.metadata import (
16
15
  )
17
16
  from dagster_datacontract.owners import get_owner
18
17
  from dagster_datacontract.tags import get_tags
19
- from dagster_datacontract.utils import normalize_path
18
+ from dagster_datacontract.utils import combine_parts, normalize_path
20
19
 
21
20
 
22
21
  class DataContractLoader:
23
22
  def __init__(
24
23
  self,
25
24
  asset_name: str,
26
- data_contract: DataContract,
25
+ data_contract: DataContract | None = None,
26
+ data_contract_path: str | None = None,
27
27
  ):
28
+ if data_contract is None and data_contract_path is None:
29
+ raise ValueError(
30
+ "Either 'data_contract' or 'data_contract_path' must be provided."
31
+ )
32
+
28
33
  self.asset_name = asset_name
29
34
  self.asset_key = dg.AssetKey(path=self.asset_name)
30
- self.data_contract = data_contract
35
+ self.data_contract = (
36
+ data_contract
37
+ if data_contract
38
+ else DataContract(data_contract_file=data_contract_path)
39
+ )
31
40
  self.data_contract_specification = (
32
41
  self.data_contract.get_data_contract_specification()
33
42
  )
@@ -40,10 +49,18 @@ class DataContractLoader:
40
49
  self.owner = get_owner(self.data_contract_specification)
41
50
  self.version = self._load_version()
42
51
  self.cron_schedule = self._load_cron_schedule()
52
+ self.asset_spec = dg.AssetSpec(
53
+ key=asset_name,
54
+ description=self.description,
55
+ metadata=self.metadata,
56
+ code_version=self.version,
57
+ owners=self.owner,
58
+ tags=self.tags,
59
+ )
43
60
 
44
61
  def _load_metadata(
45
62
  self,
46
- ) -> dict[str, TableColumnLineage | TableSchema | Any] | None:
63
+ ) -> dict[str, dg.TableColumnLineage | dg.TableSchema | Any] | None:
47
64
  metadata = (
48
65
  {
49
66
  "datacontract/path": dg.MetadataValue.url(
@@ -56,19 +73,24 @@ class DataContractLoader:
56
73
  columns = []
57
74
  deps_by_column = {}
58
75
 
59
- fields = self.data_contract_specification.models.get(self.asset_name).fields
76
+ try:
77
+ fields = self.data_contract_specification.models.get(self.asset_name).fields
60
78
 
61
- for column_name, column_field in fields.items():
62
- table_column = get_table_column(column_name, column_field)
63
- columns.append(table_column)
79
+ for column_name, column_field in fields.items():
80
+ table_column = get_table_column(column_name, column_field)
81
+ columns.append(table_column)
64
82
 
65
- table_column_lineage = get_column_lineage(column_field)
66
- deps_by_column[column_name] = table_column_lineage
83
+ table_column_lineage = get_column_lineage(column_field)
84
+ deps_by_column[column_name] = table_column_lineage
67
85
 
68
- metadata["dagster/column_schema"] = dg.TableSchema(columns=columns)
69
- metadata["dagster/column_lineage"] = dg.TableColumnLineage(
70
- deps_by_column=deps_by_column
71
- )
86
+ metadata["dagster/column_schema"] = dg.TableSchema(columns=columns)
87
+ metadata["dagster/column_lineage"] = dg.TableColumnLineage(
88
+ deps_by_column=deps_by_column
89
+ )
90
+ except AttributeError as e:
91
+ logger.warning(
92
+ f"No field named {self.asset_name} found in data contract.\n{e}"
93
+ )
72
94
 
73
95
  server_information = get_server_information(
74
96
  self.data_contract_specification,
@@ -158,3 +180,55 @@ class DataContractLoader:
158
180
  )
159
181
 
160
182
  return freshness_checks
183
+
184
+ def combine_asset_specs(
185
+ self,
186
+ asset_spec: dg.AssetSpec,
187
+ ) -> dg.AssetSpec:
188
+ """Merge the given AssetSpec with the current object's attributes to produce a new AssetSpec.
189
+
190
+ This method combines metadata, descriptions, code versions, owners, and tags from the
191
+ provided `asset_spec` and the current instance. Preference is generally given to the
192
+ current instance's values where appropriate. Fields like dependencies, skippability,
193
+ group name, automation condition, kinds, and partition definitions are taken directly
194
+ from the input `asset_spec`.
195
+
196
+ Args:
197
+ asset_spec (dg.AssetSpec): The base asset specification to merge with the current one.
198
+
199
+ Returns:
200
+ dg.AssetSpec: A new AssetSpec instance containing the combined data.
201
+
202
+ Notes:
203
+ - Descriptions are joined with double newlines (`"\n\n"`).
204
+ - Code versions are joined with an underscore (`"_"`).
205
+ - Owners are concatenated.
206
+ - Metadata and tags are merged with the current instance taking precedence.
207
+ """
208
+ description = combine_parts(
209
+ [asset_spec.description, self.description], delimiter="\n\n"
210
+ )
211
+ metadata = {
212
+ **asset_spec.metadata,
213
+ **self.metadata,
214
+ }
215
+ code_version = combine_parts(
216
+ [asset_spec.code_version, self.version], delimiter="_"
217
+ )
218
+ owners = list(asset_spec.owners) + self.owner
219
+ tags = {**asset_spec.tags, **self.tags}
220
+
221
+ return dg.AssetSpec(
222
+ key=self.asset_name,
223
+ deps=asset_spec.deps,
224
+ description=description,
225
+ metadata=metadata,
226
+ skippable=asset_spec.skippable,
227
+ group_name=asset_spec.group_name,
228
+ code_version=code_version,
229
+ automation_condition=asset_spec.automation_condition,
230
+ owners=owners,
231
+ tags=tags,
232
+ kinds=asset_spec.kinds,
233
+ partitions_def=asset_spec.partitions_def,
234
+ )
@@ -1,28 +1,8 @@
1
- import os
2
- import urllib.parse
3
-
4
-
5
- def normalize_path(path: str) -> str:
6
- """Normalizes a file path to ensure it is returned in a consistent URI format.
7
-
8
- This function checks if the provided path is a local file path (with no scheme
9
- or with the 'file' scheme) and converts it into a fully qualified file URI.
10
- If the path already has a non-'file' scheme (e.g., 's3://', 'http://'),
11
- it is returned unchanged.
12
-
13
- Parameters:
14
- path (str): The input file path. This can be a relative or absolute local path,
15
- a path starting with `~`, or a URI with a supported scheme.
16
-
17
- Returns:
18
- str: A normalized path string:
19
- - If the input is a local path or has a "file" scheme, returns it in the form "file:///absolute/path".
20
- - If the input has another scheme (e.g., "s3://", "http://"), returns it unchanged.
21
- """
22
- parsed = urllib.parse.urlparse(path)
23
-
24
- if not parsed.scheme or parsed.scheme == "file":
25
- full_path = os.path.abspath(os.path.expanduser(path))
26
- return f"file://{full_path}"
27
- else:
28
- return path
1
+ from dagster_datacontract.utils.combine_strings import combine_parts
2
+ from dagster_datacontract.utils.paths import get_absolute_path, normalize_path
3
+
4
+ __all__ = [
5
+ "combine_parts",
6
+ "get_absolute_path",
7
+ "normalize_path",
8
+ ]
@@ -0,0 +1,29 @@
1
+ from collections.abc import Iterable
2
+
3
+
4
+ def combine_parts(parts: Iterable[str | None], delimiter: str = "_") -> str:
5
+ """
6
+ Combine multiple optional strings using a specified delimiter.
7
+
8
+ This function takes an iterable of optional strings and joins the non-None,
9
+ non-empty strings using the given delimiter. None values and empty strings
10
+ are ignored. If all values are None or empty, the result is an empty string.
11
+
12
+ Args:
13
+ parts (Iterable[Optional[str]]): An iterable of strings or None values to combine.
14
+ delimiter (str): A string used to separate the non-None parts. Defaults to "_".
15
+
16
+ Returns:
17
+ str: A single combined string of all non-None, non-empty parts separated by the delimiter.
18
+
19
+ Examples:
20
+ >>> combine_parts(["v1", "2023", None])
21
+ 'v1_2023'
22
+
23
+ >>> combine_parts([None, None])
24
+ ''
25
+
26
+ >>> combine_parts(["", "alpha", None])
27
+ 'alpha'
28
+ """
29
+ return delimiter.join(filter(None, parts))
@@ -0,0 +1,51 @@
1
+ import os
2
+ from pathlib import Path
3
+ from urllib.parse import urlparse
4
+
5
+ import dagster as dg
6
+
7
+
8
+ def normalize_path(path: str) -> str:
9
+ """Normalizes a file path to ensure it is returned in a consistent URI format.
10
+
11
+ This function checks if the provided path is a local file path (with no scheme
12
+ or with the 'file' scheme) and converts it into a fully qualified file URI.
13
+ If the path already has a non-'file' scheme (e.g., 's3://', 'http://'),
14
+ it is returned unchanged.
15
+
16
+ Parameters:
17
+ path (str): The input file path. This can be a relative or absolute local path,
18
+ a path starting with `~`, or a URI with a supported scheme.
19
+
20
+ Returns:
21
+ str: A normalized path string:
22
+ - If the input is a local path or has a "file" scheme, returns it in the form "file:///absolute/path".
23
+ - If the input has another scheme (e.g., "s3://", "http://"), returns it unchanged.
24
+ """
25
+ parsed = urlparse(path)
26
+
27
+ if not parsed.scheme or parsed.scheme == "file":
28
+ full_path = os.path.abspath(os.path.expanduser(path))
29
+ return f"file://{full_path}"
30
+ else:
31
+ return path
32
+
33
+
34
+ def get_absolute_path(
35
+ context_path: Path,
36
+ full_path: str,
37
+ ) -> Path:
38
+ """TODO."""
39
+ if isinstance(full_path, dg.UrlMetadataValue):
40
+ full_path = full_path.url
41
+
42
+ parsed_path = urlparse(full_path)
43
+ if parsed_path.scheme == "file":
44
+ full_path = Path(parsed_path.path)
45
+ else:
46
+ full_path = Path(full_path)
47
+
48
+ if full_path.is_absolute():
49
+ return full_path
50
+
51
+ return Path(context_path, full_path).absolute()
@@ -1,13 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-datacontract
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Load metadata and asset check spesifications from data contracts.
5
5
  Author-email: Fredrik Bakken <fredrik@dataheim.io>
6
- Requires-Python: >=3.10.0
6
+ Requires-Python: >=3.10
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: dagster>=1.10.10
10
- Requires-Dist: datacontract-cli>=0.10.23
10
+ Requires-Dist: dagster-dg>=0.26.11
11
+ Requires-Dist: datacontract-cli>=0.10.24
11
12
  Requires-Dist: loguru>=0.7.3
12
13
  Dynamic: license-file
13
14
 
@@ -25,7 +26,7 @@ uv add dagster-datacontract
25
26
 
26
27
  ## Simple Example
27
28
 
28
- The following example can be found inside the [example](https://github.com/dataheim-io/dagster-datacontract/tree/main/example) directory:
29
+ The following example can be found inside the [examples/simple](https://github.com/dataheim-io/dagster-datacontract/tree/main/examples/simple)-directory:
29
30
 
30
31
  ```python
31
32
  from datetime import timedelta
@@ -1,4 +1,4 @@
1
- dagster_datacontract/__init__.py,sha256=nlLGDivblIOJqE5CUA7wzSb_MwSVO_3YwhO3dyVjwgo,5711
1
+ dagster_datacontract/__init__.py,sha256=Tv6_G45c16Yq5Sh9A5Xw9rxkkIiAm9BBxuK79kkvSvE,8585
2
2
  dagster_datacontract/description/__init__.py,sha256=ulWqPp5jIPvCzaDFZcjLjcDkljJ5j_FRsE0dXhK8Wlc,104
3
3
  dagster_datacontract/description/description.py,sha256=FmjgCYDpJ9UHrvAv0sAthfRohDjdG0lL1XcMKK8QMmI,1646
4
4
  dagster_datacontract/metadata/__init__.py,sha256=Gj7Htl3rYRXsE-631yr4LWqL7Tf5bZKFEFjTeglxZek,359
@@ -8,9 +8,11 @@ dagster_datacontract/metadata/table_colums.py,sha256=Q7ZCiMReWU4-T2YfBvtt5vvoVXE
8
8
  dagster_datacontract/owners/__init__.py,sha256=c0AhLQRzfw-QPmsF9rPXRyE6VoLmgDRRNgVqG8JUvFs,882
9
9
  dagster_datacontract/tags/__init__.py,sha256=2Ph-M0WbBKUjJWIzM_cEBW3SQZh7Nq8oy5MbD5bt_lc,76
10
10
  dagster_datacontract/tags/tags.py,sha256=aZ_HTkc-vjJ_rofT32fT_zrLCt9x1ZGn8XoihhOMhfU,1414
11
- dagster_datacontract/utils/__init__.py,sha256=Zfbuf20Eorf7BD8gSDASiPqgPwWu8Mz03r3aa2zE4NA,1106
12
- dagster_datacontract-0.4.0.dist-info/licenses/LICENSE,sha256=9ULsEM1ICzCaGoso40plwO-d_SCQ7nsU6ZA4xgfaRq8,11338
13
- dagster_datacontract-0.4.0.dist-info/METADATA,sha256=RRJoWluvktDqQhXBH7dsQOsctgrUQYIgiuo4QAbxYu4,3029
14
- dagster_datacontract-0.4.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
15
- dagster_datacontract-0.4.0.dist-info/top_level.txt,sha256=_HUQ6OJ50Q0VZxEkdocTtxk1QkJpztb1QY7A0rcvtCE,21
16
- dagster_datacontract-0.4.0.dist-info/RECORD,,
11
+ dagster_datacontract/utils/__init__.py,sha256=GSQ2Zry9pKlLhWI5Vjoj3X7iZiEgt-SjqnQRwfplbHM,231
12
+ dagster_datacontract/utils/combine_strings.py,sha256=nWy3unX6yuPi8YmvDTl_mO10K6MbJP8cxQWPOMDj6G4,987
13
+ dagster_datacontract/utils/paths.py,sha256=Rh-l5GSmxZwhIVQ_aBJPHTKU5afEma8hlA5RT31EhbY,1611
14
+ dagster_datacontract-0.4.1.dist-info/licenses/LICENSE,sha256=9ULsEM1ICzCaGoso40plwO-d_SCQ7nsU6ZA4xgfaRq8,11338
15
+ dagster_datacontract-0.4.1.dist-info/METADATA,sha256=KAjnQQkr1UUNBAPcQ44mJcqUqmcEqxHDvr3pU-5YyWU,3078
16
+ dagster_datacontract-0.4.1.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
17
+ dagster_datacontract-0.4.1.dist-info/top_level.txt,sha256=_HUQ6OJ50Q0VZxEkdocTtxk1QkJpztb1QY7A0rcvtCE,21
18
+ dagster_datacontract-0.4.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5