dagster-datacontract 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/PKG-INFO +8 -7
  2. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/README.md +5 -4
  3. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract/__init__.py +89 -15
  4. dagster_datacontract-0.5.0/dagster_datacontract/utils/__init__.py +8 -0
  5. dagster_datacontract-0.5.0/dagster_datacontract/utils/combine_strings.py +29 -0
  6. dagster_datacontract-0.4.0/dagster_datacontract/utils/__init__.py → dagster_datacontract-0.5.0/dagster_datacontract/utils/paths.py +25 -2
  7. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract.egg-info/PKG-INFO +8 -7
  8. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract.egg-info/SOURCES.txt +3 -1
  9. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract.egg-info/requires.txt +1 -1
  10. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/pyproject.toml +10 -4
  11. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/LICENSE +0 -0
  12. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract/description/__init__.py +0 -0
  13. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract/description/description.py +0 -0
  14. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract/metadata/__init__.py +0 -0
  15. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract/metadata/links.py +0 -0
  16. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract/metadata/server_information.py +0 -0
  17. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract/metadata/table_colums.py +0 -0
  18. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract/owners/__init__.py +0 -0
  19. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract/tags/__init__.py +0 -0
  20. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract/tags/tags.py +0 -0
  21. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract.egg-info/dependency_links.txt +0 -0
  22. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/dagster_datacontract.egg-info/top_level.txt +0 -0
  23. {dagster_datacontract-0.4.0 → dagster_datacontract-0.5.0}/setup.cfg +0 -0
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-datacontract
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Load metadata and asset check spesifications from data contracts.
5
5
  Author-email: Fredrik Bakken <fredrik@dataheim.io>
6
- Requires-Python: >=3.10.0
6
+ Requires-Python: >=3.10
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: dagster>=1.10.10
10
- Requires-Dist: datacontract-cli>=0.10.23
10
+ Requires-Dist: datacontract-cli>=0.10.24
11
11
  Requires-Dist: loguru>=0.7.3
12
12
  Dynamic: license-file
13
13
 
@@ -23,9 +23,9 @@ Install `dagster-datacontract` using [`uv`](https://github.com/astral-sh/uv):
23
23
  uv add dagster-datacontract
24
24
  ```
25
25
 
26
- ## Simple Example
26
+ ## Example
27
27
 
28
- The following example can be found inside the [example](https://github.com/dataheim-io/dagster-datacontract/tree/main/example) directory:
28
+ The following example can be found inside the [examples](https://github.com/dataheim-io/dagster-datacontract/tree/main/examples)-directory:
29
29
 
30
30
  ```python
31
31
  from datetime import timedelta
@@ -39,10 +39,11 @@ from datacontract.data_contract import DataContract
39
39
  from dagster_datacontract import DataContractLoader
40
40
 
41
41
  asset_name = "yellow_taxi_trip_records"
42
+ examples_path = Path(__file__).resolve().parent.parent
42
43
  data_contract = DataContractLoader(
43
44
  asset_name=asset_name,
44
45
  data_contract=DataContract(
45
- data_contract_file="./example/datacontract.yml",
46
+ data_contract_file=str(examples_path / "datacontracts" / "datacontract.yml"),
46
47
  server="production",
47
48
  ),
48
49
  )
@@ -59,7 +60,7 @@ data_contract = DataContractLoader(
59
60
  def yellow_taxi_trip_records(
60
61
  context: dg.AssetExecutionContext,
61
62
  ) -> None:
62
- download_path = "./example/data"
63
+ download_path = examples_path.parent.parent / "data"
63
64
  Path(download_path).mkdir(parents=True, exist_ok=True)
64
65
 
65
66
  url = "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-01.parquet"
@@ -10,9 +10,9 @@ Install `dagster-datacontract` using [`uv`](https://github.com/astral-sh/uv):
10
10
  uv add dagster-datacontract
11
11
  ```
12
12
 
13
- ## Simple Example
13
+ ## Example
14
14
 
15
- The following example can be found inside the [example](https://github.com/dataheim-io/dagster-datacontract/tree/main/example) directory:
15
+ The following example can be found inside the [examples](https://github.com/dataheim-io/dagster-datacontract/tree/main/examples)-directory:
16
16
 
17
17
  ```python
18
18
  from datetime import timedelta
@@ -26,10 +26,11 @@ from datacontract.data_contract import DataContract
26
26
  from dagster_datacontract import DataContractLoader
27
27
 
28
28
  asset_name = "yellow_taxi_trip_records"
29
+ examples_path = Path(__file__).resolve().parent.parent
29
30
  data_contract = DataContractLoader(
30
31
  asset_name=asset_name,
31
32
  data_contract=DataContract(
32
- data_contract_file="./example/datacontract.yml",
33
+ data_contract_file=str(examples_path / "datacontracts" / "datacontract.yml"),
33
34
  server="production",
34
35
  ),
35
36
  )
@@ -46,7 +47,7 @@ data_contract = DataContractLoader(
46
47
  def yellow_taxi_trip_records(
47
48
  context: dg.AssetExecutionContext,
48
49
  ) -> None:
49
- download_path = "./example/data"
50
+ download_path = examples_path.parent.parent / "data"
50
51
  Path(download_path).mkdir(parents=True, exist_ok=True)
51
52
 
52
53
  url = "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-01.parquet"
@@ -2,7 +2,6 @@ from datetime import timedelta
2
2
  from typing import Any
3
3
 
4
4
  import dagster as dg
5
- from dagster import TableColumnLineage, TableSchema
6
5
  from datacontract.data_contract import DataContract
7
6
  from datacontract.model.run import ResultEnum
8
7
  from loguru import logger
@@ -16,18 +15,28 @@ from dagster_datacontract.metadata import (
16
15
  )
17
16
  from dagster_datacontract.owners import get_owner
18
17
  from dagster_datacontract.tags import get_tags
19
- from dagster_datacontract.utils import normalize_path
18
+ from dagster_datacontract.utils import combine_parts, normalize_path
20
19
 
21
20
 
22
21
  class DataContractLoader:
23
22
  def __init__(
24
23
  self,
25
24
  asset_name: str,
26
- data_contract: DataContract,
25
+ data_contract: DataContract | None = None,
26
+ data_contract_path: str | None = None,
27
27
  ):
28
+ if data_contract is None and data_contract_path is None:
29
+ raise ValueError(
30
+ "Either 'data_contract' or 'data_contract_path' must be provided."
31
+ )
32
+
28
33
  self.asset_name = asset_name
29
34
  self.asset_key = dg.AssetKey(path=self.asset_name)
30
- self.data_contract = data_contract
35
+ self.data_contract = (
36
+ data_contract
37
+ if data_contract
38
+ else DataContract(data_contract_file=data_contract_path)
39
+ )
31
40
  self.data_contract_specification = (
32
41
  self.data_contract.get_data_contract_specification()
33
42
  )
@@ -40,10 +49,18 @@ class DataContractLoader:
40
49
  self.owner = get_owner(self.data_contract_specification)
41
50
  self.version = self._load_version()
42
51
  self.cron_schedule = self._load_cron_schedule()
52
+ self.asset_spec = dg.AssetSpec(
53
+ key=asset_name,
54
+ description=self.description,
55
+ metadata=self.metadata,
56
+ code_version=self.version,
57
+ owners=self.owner,
58
+ tags=self.tags,
59
+ )
43
60
 
44
61
  def _load_metadata(
45
62
  self,
46
- ) -> dict[str, TableColumnLineage | TableSchema | Any] | None:
63
+ ) -> dict[str, dg.TableColumnLineage | dg.TableSchema | Any] | None:
47
64
  metadata = (
48
65
  {
49
66
  "datacontract/path": dg.MetadataValue.url(
@@ -56,19 +73,24 @@ class DataContractLoader:
56
73
  columns = []
57
74
  deps_by_column = {}
58
75
 
59
- fields = self.data_contract_specification.models.get(self.asset_name).fields
76
+ try:
77
+ fields = self.data_contract_specification.models.get(self.asset_name).fields
60
78
 
61
- for column_name, column_field in fields.items():
62
- table_column = get_table_column(column_name, column_field)
63
- columns.append(table_column)
79
+ for column_name, column_field in fields.items():
80
+ table_column = get_table_column(column_name, column_field)
81
+ columns.append(table_column)
64
82
 
65
- table_column_lineage = get_column_lineage(column_field)
66
- deps_by_column[column_name] = table_column_lineage
83
+ table_column_lineage = get_column_lineage(column_field)
84
+ deps_by_column[column_name] = table_column_lineage
67
85
 
68
- metadata["dagster/column_schema"] = dg.TableSchema(columns=columns)
69
- metadata["dagster/column_lineage"] = dg.TableColumnLineage(
70
- deps_by_column=deps_by_column
71
- )
86
+ metadata["dagster/column_schema"] = dg.TableSchema(columns=columns)
87
+ metadata["dagster/column_lineage"] = dg.TableColumnLineage(
88
+ deps_by_column=deps_by_column
89
+ )
90
+ except AttributeError as e:
91
+ logger.warning(
92
+ f"No field named {self.asset_name} found in data contract.\n{e}"
93
+ )
72
94
 
73
95
  server_information = get_server_information(
74
96
  self.data_contract_specification,
@@ -158,3 +180,55 @@ class DataContractLoader:
158
180
  )
159
181
 
160
182
  return freshness_checks
183
+
184
+ def combine_asset_specs(
185
+ self,
186
+ asset_spec: dg.AssetSpec,
187
+ ) -> dg.AssetSpec:
188
+ """Merge the given AssetSpec with the current object's attributes to produce a new AssetSpec.
189
+
190
+ This method combines metadata, descriptions, code versions, owners, and tags from the
191
+ provided `asset_spec` and the current instance. Preference is generally given to the
192
+ current instance's values where appropriate. Fields like dependencies, skippability,
193
+ group name, automation condition, kinds, and partition definitions are taken directly
194
+ from the input `asset_spec`.
195
+
196
+ Args:
197
+ asset_spec (dg.AssetSpec): The base asset specification to merge with the current one.
198
+
199
+ Returns:
200
+ dg.AssetSpec: A new AssetSpec instance containing the combined data.
201
+
202
+ Notes:
203
+ - Descriptions are joined with double newlines (`"\n\n"`).
204
+ - Code versions are joined with an underscore (`"_"`).
205
+ - Owners are concatenated.
206
+ - Metadata and tags are merged with the current instance taking precedence.
207
+ """
208
+ description = combine_parts(
209
+ [asset_spec.description, self.description], delimiter="\n\n"
210
+ )
211
+ metadata = {
212
+ **asset_spec.metadata,
213
+ **self.metadata,
214
+ }
215
+ code_version = combine_parts(
216
+ [asset_spec.code_version, self.version], delimiter="_"
217
+ )
218
+ owners = list(asset_spec.owners) + self.owner
219
+ tags = {**asset_spec.tags, **self.tags}
220
+
221
+ return dg.AssetSpec(
222
+ key=self.asset_name,
223
+ deps=asset_spec.deps,
224
+ description=description,
225
+ metadata=metadata,
226
+ skippable=asset_spec.skippable,
227
+ group_name=asset_spec.group_name,
228
+ code_version=code_version,
229
+ automation_condition=asset_spec.automation_condition,
230
+ owners=owners,
231
+ tags=tags,
232
+ kinds=asset_spec.kinds,
233
+ partitions_def=asset_spec.partitions_def,
234
+ )
@@ -0,0 +1,8 @@
1
+ from dagster_datacontract.utils.combine_strings import combine_parts
2
+ from dagster_datacontract.utils.paths import get_absolute_path, normalize_path
3
+
4
+ __all__ = [
5
+ "combine_parts",
6
+ "get_absolute_path",
7
+ "normalize_path",
8
+ ]
@@ -0,0 +1,29 @@
1
+ from collections.abc import Iterable
2
+
3
+
4
+ def combine_parts(parts: Iterable[str | None], delimiter: str = "_") -> str:
5
+ """
6
+ Combine multiple optional strings using a specified delimiter.
7
+
8
+ This function takes an iterable of optional strings and joins the non-None,
9
+ non-empty strings using the given delimiter. None values and empty strings
10
+ are ignored. If all values are None or empty, the result is an empty string.
11
+
12
+ Args:
13
+ parts (Iterable[Optional[str]]): An iterable of strings or None values to combine.
14
+ delimiter (str): A string used to separate the non-None parts. Defaults to "_".
15
+
16
+ Returns:
17
+ str: A single combined string of all non-None, non-empty parts separated by the delimiter.
18
+
19
+ Examples:
20
+ >>> combine_parts(["v1", "2023", None])
21
+ 'v1_2023'
22
+
23
+ >>> combine_parts([None, None])
24
+ ''
25
+
26
+ >>> combine_parts(["", "alpha", None])
27
+ 'alpha'
28
+ """
29
+ return delimiter.join(filter(None, parts))
@@ -1,5 +1,8 @@
1
1
  import os
2
- import urllib.parse
2
+ from pathlib import Path
3
+ from urllib.parse import urlparse
4
+
5
+ import dagster as dg
3
6
 
4
7
 
5
8
  def normalize_path(path: str) -> str:
@@ -19,10 +22,30 @@ def normalize_path(path: str) -> str:
19
22
  - If the input is a local path or has a "file" scheme, returns it in the form "file:///absolute/path".
20
23
  - If the input has another scheme (e.g., "s3://", "http://"), returns it unchanged.
21
24
  """
22
- parsed = urllib.parse.urlparse(path)
25
+ parsed = urlparse(path)
23
26
 
24
27
  if not parsed.scheme or parsed.scheme == "file":
25
28
  full_path = os.path.abspath(os.path.expanduser(path))
26
29
  return f"file://{full_path}"
27
30
  else:
28
31
  return path
32
+
33
+
34
+ def get_absolute_path(
35
+ context_path: Path,
36
+ full_path: str,
37
+ ) -> Path:
38
+ """TODO."""
39
+ if isinstance(full_path, dg.UrlMetadataValue):
40
+ full_path = full_path.url
41
+
42
+ parsed_path = urlparse(full_path)
43
+ if parsed_path.scheme == "file":
44
+ full_path = Path(parsed_path.path)
45
+ else:
46
+ full_path = Path(full_path)
47
+
48
+ if full_path.is_absolute():
49
+ return full_path
50
+
51
+ return Path(context_path, full_path).absolute()
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-datacontract
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Load metadata and asset check spesifications from data contracts.
5
5
  Author-email: Fredrik Bakken <fredrik@dataheim.io>
6
- Requires-Python: >=3.10.0
6
+ Requires-Python: >=3.10
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: dagster>=1.10.10
10
- Requires-Dist: datacontract-cli>=0.10.23
10
+ Requires-Dist: datacontract-cli>=0.10.24
11
11
  Requires-Dist: loguru>=0.7.3
12
12
  Dynamic: license-file
13
13
 
@@ -23,9 +23,9 @@ Install `dagster-datacontract` using [`uv`](https://github.com/astral-sh/uv):
23
23
  uv add dagster-datacontract
24
24
  ```
25
25
 
26
- ## Simple Example
26
+ ## Example
27
27
 
28
- The following example can be found inside the [example](https://github.com/dataheim-io/dagster-datacontract/tree/main/example) directory:
28
+ The following example can be found inside the [examples](https://github.com/dataheim-io/dagster-datacontract/tree/main/examples)-directory:
29
29
 
30
30
  ```python
31
31
  from datetime import timedelta
@@ -39,10 +39,11 @@ from datacontract.data_contract import DataContract
39
39
  from dagster_datacontract import DataContractLoader
40
40
 
41
41
  asset_name = "yellow_taxi_trip_records"
42
+ examples_path = Path(__file__).resolve().parent.parent
42
43
  data_contract = DataContractLoader(
43
44
  asset_name=asset_name,
44
45
  data_contract=DataContract(
45
- data_contract_file="./example/datacontract.yml",
46
+ data_contract_file=str(examples_path / "datacontracts" / "datacontract.yml"),
46
47
  server="production",
47
48
  ),
48
49
  )
@@ -59,7 +60,7 @@ data_contract = DataContractLoader(
59
60
  def yellow_taxi_trip_records(
60
61
  context: dg.AssetExecutionContext,
61
62
  ) -> None:
62
- download_path = "./example/data"
63
+ download_path = examples_path.parent.parent / "data"
63
64
  Path(download_path).mkdir(parents=True, exist_ok=True)
64
65
 
65
66
  url = "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-01.parquet"
@@ -16,4 +16,6 @@ dagster_datacontract/metadata/table_colums.py
16
16
  dagster_datacontract/owners/__init__.py
17
17
  dagster_datacontract/tags/__init__.py
18
18
  dagster_datacontract/tags/tags.py
19
- dagster_datacontract/utils/__init__.py
19
+ dagster_datacontract/utils/__init__.py
20
+ dagster_datacontract/utils/combine_strings.py
21
+ dagster_datacontract/utils/paths.py
@@ -1,3 +1,3 @@
1
1
  dagster>=1.10.10
2
- datacontract-cli>=0.10.23
2
+ datacontract-cli>=0.10.24
3
3
  loguru>=0.7.3
@@ -1,30 +1,36 @@
1
1
  [project]
2
2
  name = "dagster-datacontract"
3
- version = "0.4.0"
3
+ version = "0.5.0"
4
4
  description = "Load metadata and asset check spesifications from data contracts."
5
5
  authors = [
6
6
  { name = "Fredrik Bakken", email = "fredrik@dataheim.io" }
7
7
  ]
8
8
  readme = "README.md"
9
- requires-python = ">=3.10.0"
9
+ requires-python = ">=3.10"
10
10
  dependencies = [
11
11
  "dagster>=1.10.10",
12
- "datacontract-cli>=0.10.23",
12
+ "datacontract-cli>=0.10.24",
13
13
  "loguru>=0.7.3",
14
14
  ]
15
15
 
16
16
  [dependency-groups]
17
17
  dev = [
18
+ "dagster-dg-cli[local]>=1.10.18",
18
19
  "dagster-webserver>=1.10.10",
19
20
  "polars>=1.27.1",
20
21
  "pre-commit>=4.2.0",
21
- "ruff>=0.11.5",
22
+ "ruff>=0.11.6",
22
23
  ]
23
24
 
24
25
  [build-system]
25
26
  requires = ["setuptools"]
26
27
  build-backend = "setuptools.build_meta"
27
28
 
29
+ [tool.setuptools.packages.find]
30
+ where = ["."]
31
+ include = ["dagster_datacontract*"]
32
+ exclude = ["examples*", "tests*"]
33
+
28
34
  [tool.ruff.lint]
29
35
  extend-select = [
30
36
  "UP", # pyupgrade