dagster-azure 0.21.13__tar.gz → 0.28.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/LICENSE +1 -1
- dagster_azure-0.28.1/PKG-INFO +32 -0
- dagster_azure-0.28.1/README.md +4 -0
- dagster_azure-0.28.1/dagster_azure/__init__.py +5 -0
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/adls2/__init__.py +4 -9
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/adls2/file_manager.py +24 -18
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/adls2/io_manager.py +45 -29
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/adls2/resources.py +65 -7
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/adls2/utils.py +8 -3
- dagster_azure-0.28.1/dagster_azure/blob/__init__.py +12 -0
- dagster_azure-0.28.1/dagster_azure/blob/compute_log_manager.py +391 -0
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/blob/fake_blob_client.py +9 -1
- dagster_azure-0.28.1/dagster_azure/blob/resources.py +126 -0
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/blob/utils.py +10 -4
- dagster_azure-0.28.1/dagster_azure/fakes/__init__.py +5 -0
- {dagster-azure-0.21.13/dagster_azure/adls2 → dagster_azure-0.28.1/dagster_azure/fakes}/fake_adls2_resource.py +3 -4
- dagster_azure-0.28.1/dagster_azure/pipes/__init__.py +9 -0
- dagster_azure-0.28.1/dagster_azure/pipes/clients/__init__.py +5 -0
- dagster_azure-0.28.1/dagster_azure/pipes/clients/azureml.py +140 -0
- dagster_azure-0.28.1/dagster_azure/pipes/context_injectors.py +47 -0
- dagster_azure-0.28.1/dagster_azure/pipes/message_readers.py +83 -0
- dagster_azure-0.28.1/dagster_azure/version.py +1 -0
- dagster_azure-0.28.1/dagster_azure.egg-info/PKG-INFO +32 -0
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure.egg-info/SOURCES.txt +9 -2
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure.egg-info/requires.txt +2 -1
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/setup.py +6 -4
- dagster-azure-0.21.13/PKG-INFO +0 -15
- dagster-azure-0.21.13/README.md +0 -4
- dagster-azure-0.21.13/dagster_azure/__init__.py +0 -5
- dagster-azure-0.21.13/dagster_azure/blob/__init__.py +0 -3
- dagster-azure-0.21.13/dagster_azure/blob/compute_log_manager.py +0 -256
- dagster-azure-0.21.13/dagster_azure/version.py +0 -1
- dagster-azure-0.21.13/dagster_azure.egg-info/PKG-INFO +0 -15
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/MANIFEST.in +0 -0
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/py.typed +0 -0
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure.egg-info/dependency_links.txt +0 -0
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure.egg-info/entry_points.txt +0 -0
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure.egg-info/not-zip-safe +0 -0
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure.egg-info/top_level.txt +0 -0
- {dagster-azure-0.21.13 → dagster_azure-0.28.1}/setup.cfg +0 -0
|
@@ -186,7 +186,7 @@
|
|
|
186
186
|
same "printed page" as the copyright notice for easier
|
|
187
187
|
identification within third-party archives.
|
|
188
188
|
|
|
189
|
-
Copyright
|
|
189
|
+
Copyright 2025 Dagster Labs, Inc.
|
|
190
190
|
|
|
191
191
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
192
|
you may not use this file except in compliance with the License.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dagster-azure
|
|
3
|
+
Version: 0.28.1
|
|
4
|
+
Summary: Package for Azure-specific Dagster framework op and resource components.
|
|
5
|
+
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-azure
|
|
6
|
+
Author: Dagster Labs
|
|
7
|
+
Author-email: hello@dagsterlabs.com
|
|
8
|
+
License: Apache-2.0
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Requires-Python: >=3.9,<3.14
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: azure-core<2.0.0,>=1.7.0
|
|
19
|
+
Requires-Dist: azure-identity<2.0.0,>=1.7.0
|
|
20
|
+
Requires-Dist: azure-ai-ml<2.0.0,>=1.28.0
|
|
21
|
+
Requires-Dist: azure-storage-blob<13.0.0,>=12.5.0
|
|
22
|
+
Requires-Dist: azure-storage-file-datalake<13.0.0,>=12.5
|
|
23
|
+
Requires-Dist: dagster==1.12.1
|
|
24
|
+
Dynamic: author
|
|
25
|
+
Dynamic: author-email
|
|
26
|
+
Dynamic: classifier
|
|
27
|
+
Dynamic: home-page
|
|
28
|
+
Dynamic: license
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
Dynamic: requires-dist
|
|
31
|
+
Dynamic: requires-python
|
|
32
|
+
Dynamic: summary
|
|
@@ -1,19 +1,14 @@
|
|
|
1
|
-
from .
|
|
2
|
-
FakeADLS2Resource as FakeADLS2Resource,
|
|
3
|
-
FakeADLS2ServiceClient as FakeADLS2ServiceClient,
|
|
4
|
-
fake_adls2_resource as fake_adls2_resource,
|
|
5
|
-
)
|
|
6
|
-
from .file_manager import (
|
|
1
|
+
from dagster_azure.adls2.file_manager import (
|
|
7
2
|
ADLS2FileHandle as ADLS2FileHandle,
|
|
8
3
|
ADLS2FileManager as ADLS2FileManager,
|
|
9
4
|
)
|
|
10
|
-
from .io_manager import (
|
|
5
|
+
from dagster_azure.adls2.io_manager import (
|
|
11
6
|
ADLS2PickleIOManager as ADLS2PickleIOManager,
|
|
12
7
|
ConfigurablePickledObjectADLS2IOManager as ConfigurablePickledObjectADLS2IOManager,
|
|
13
8
|
PickledObjectADLS2IOManager as PickledObjectADLS2IOManager,
|
|
14
9
|
adls2_pickle_io_manager as adls2_pickle_io_manager,
|
|
15
10
|
)
|
|
16
|
-
from .resources import (
|
|
11
|
+
from dagster_azure.adls2.resources import (
|
|
17
12
|
ADLS2DefaultAzureCredential as ADLS2DefaultAzureCredential,
|
|
18
13
|
ADLS2Key as ADLS2Key,
|
|
19
14
|
ADLS2Resource as ADLS2Resource,
|
|
@@ -21,4 +16,4 @@ from .resources import (
|
|
|
21
16
|
adls2_file_manager as adls2_file_manager,
|
|
22
17
|
adls2_resource as adls2_resource,
|
|
23
18
|
)
|
|
24
|
-
from .utils import create_adls2_client as create_adls2_client
|
|
19
|
+
from dagster_azure.adls2.utils import create_adls2_client as create_adls2_client
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import io
|
|
2
2
|
import uuid
|
|
3
3
|
from contextlib import contextmanager
|
|
4
|
+
from typing import Any, Optional
|
|
4
5
|
|
|
5
6
|
import dagster._check as check
|
|
6
7
|
from dagster._core.storage.file_manager import (
|
|
@@ -10,6 +11,8 @@ from dagster._core.storage.file_manager import (
|
|
|
10
11
|
check_file_like_obj,
|
|
11
12
|
)
|
|
12
13
|
|
|
14
|
+
from dagster_azure.adls2.utils import DataLakeServiceClient
|
|
15
|
+
|
|
13
16
|
|
|
14
17
|
class ADLS2FileHandle(FileHandle):
|
|
15
18
|
"""A reference to a file on ADLS2."""
|
|
@@ -20,44 +23,44 @@ class ADLS2FileHandle(FileHandle):
|
|
|
20
23
|
self._key = check.str_param(key, "key")
|
|
21
24
|
|
|
22
25
|
@property
|
|
23
|
-
def account(self):
|
|
26
|
+
def account(self) -> str:
|
|
24
27
|
"""str: The name of the ADLS2 account."""
|
|
25
28
|
return self._account
|
|
26
29
|
|
|
27
30
|
@property
|
|
28
|
-
def file_system(self):
|
|
31
|
+
def file_system(self) -> str:
|
|
29
32
|
"""str: The name of the ADLS2 file system."""
|
|
30
33
|
return self._file_system
|
|
31
34
|
|
|
32
35
|
@property
|
|
33
|
-
def key(self):
|
|
36
|
+
def key(self) -> str:
|
|
34
37
|
"""str: The ADLS2 key."""
|
|
35
38
|
return self._key
|
|
36
39
|
|
|
37
40
|
@property
|
|
38
|
-
def path_desc(self):
|
|
41
|
+
def path_desc(self) -> str:
|
|
39
42
|
"""str: The file's ADLS2 URL."""
|
|
40
43
|
return self.adls2_path
|
|
41
44
|
|
|
42
45
|
@property
|
|
43
|
-
def adls2_path(self):
|
|
46
|
+
def adls2_path(self) -> str:
|
|
44
47
|
"""str: The file's ADLS2 URL."""
|
|
45
48
|
return f"adfss://{self.file_system}@{self.account}.dfs.core.windows.net/{self.key}"
|
|
46
49
|
|
|
47
50
|
|
|
48
51
|
class ADLS2FileManager(FileManager):
|
|
49
|
-
def __init__(self, adls2_client, file_system, prefix):
|
|
52
|
+
def __init__(self, adls2_client: DataLakeServiceClient, file_system: str, prefix: str):
|
|
50
53
|
self._client = adls2_client
|
|
51
54
|
self._file_system = check.str_param(file_system, "file_system")
|
|
52
55
|
self._prefix = check.str_param(prefix, "prefix")
|
|
53
|
-
self._local_handle_cache = {}
|
|
56
|
+
self._local_handle_cache: dict[str, str] = {}
|
|
54
57
|
self._temp_file_manager = TempfileManager()
|
|
55
58
|
|
|
56
|
-
def copy_handle_to_local_temp(self, file_handle):
|
|
59
|
+
def copy_handle_to_local_temp(self, file_handle: ADLS2FileHandle): # pyright: ignore[reportIncompatibleMethodOverride]
|
|
57
60
|
self._download_if_not_cached(file_handle)
|
|
58
61
|
return self._get_local_path(file_handle)
|
|
59
62
|
|
|
60
|
-
def _download_if_not_cached(self, file_handle):
|
|
63
|
+
def _download_if_not_cached(self, file_handle: ADLS2FileHandle):
|
|
61
64
|
if not self._file_handle_cached(file_handle):
|
|
62
65
|
# instigate download
|
|
63
66
|
temp_file_obj = self._temp_file_manager.tempfile()
|
|
@@ -74,7 +77,7 @@ class ADLS2FileManager(FileManager):
|
|
|
74
77
|
return file_handle
|
|
75
78
|
|
|
76
79
|
@contextmanager
|
|
77
|
-
def read(self, file_handle, mode="rb"):
|
|
80
|
+
def read(self, file_handle: ADLS2FileHandle, mode: str = "rb"): # pyright: ignore[reportIncompatibleMethodOverride]
|
|
78
81
|
check.inst_param(file_handle, "file_handle", ADLS2FileHandle)
|
|
79
82
|
check.str_param(mode, "mode")
|
|
80
83
|
check.param_invariant(mode in {"r", "rb"}, "mode")
|
|
@@ -85,31 +88,34 @@ class ADLS2FileManager(FileManager):
|
|
|
85
88
|
with open(self._get_local_path(file_handle), mode, encoding=encoding) as file_obj:
|
|
86
89
|
yield file_obj
|
|
87
90
|
|
|
88
|
-
def _file_handle_cached(self, file_handle):
|
|
91
|
+
def _file_handle_cached(self, file_handle: ADLS2FileHandle) -> bool:
|
|
89
92
|
return file_handle.adls2_path in self._local_handle_cache
|
|
90
93
|
|
|
91
|
-
def _get_local_path(self, file_handle):
|
|
94
|
+
def _get_local_path(self, file_handle: ADLS2FileHandle) -> str:
|
|
92
95
|
return self._local_handle_cache[file_handle.adls2_path]
|
|
93
96
|
|
|
94
|
-
def read_data(self, file_handle):
|
|
97
|
+
def read_data(self, file_handle: ADLS2FileHandle) -> Any: # pyright: ignore[reportIncompatibleMethodOverride]
|
|
95
98
|
with self.read(file_handle, mode="rb") as file_obj:
|
|
96
99
|
return file_obj.read()
|
|
97
100
|
|
|
98
|
-
def write_data(self, data, ext=None):
|
|
101
|
+
def write_data(self, data: bytes, ext: Optional[str] = None) -> ADLS2FileHandle:
|
|
99
102
|
check.inst_param(data, "data", bytes)
|
|
100
103
|
return self.write(io.BytesIO(data), mode="wb", ext=ext)
|
|
101
104
|
|
|
102
|
-
def write(
|
|
105
|
+
def write( # pyright: ignore[reportIncompatibleMethodOverride]
|
|
106
|
+
self, file_obj: io.BytesIO, mode: str = "wb", ext: Optional[str] = None
|
|
107
|
+
) -> ADLS2FileHandle:
|
|
103
108
|
check_file_like_obj(file_obj)
|
|
104
109
|
adls2_key = self.get_full_key(str(uuid.uuid4()) + (("." + ext) if ext is not None else ""))
|
|
105
110
|
adls2_file = self._client.get_file_client(
|
|
106
111
|
file_system=self._file_system, file_path=adls2_key
|
|
107
112
|
)
|
|
108
113
|
adls2_file.upload_data(file_obj, overwrite=True)
|
|
109
|
-
|
|
114
|
+
account_name = check.not_none(self._client.account_name, "Expected account name to be set")
|
|
115
|
+
return ADLS2FileHandle(account_name, self._file_system, adls2_key)
|
|
110
116
|
|
|
111
|
-
def get_full_key(self, file_key):
|
|
117
|
+
def get_full_key(self, file_key: str) -> str:
|
|
112
118
|
return f"{self._prefix}/{file_key}"
|
|
113
119
|
|
|
114
|
-
def delete_local_temp(self):
|
|
120
|
+
def delete_local_temp(self) -> None:
|
|
115
121
|
self._temp_file_manager.close()
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import pickle
|
|
2
|
+
from collections.abc import Iterator
|
|
2
3
|
from contextlib import contextmanager
|
|
3
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Union
|
|
4
5
|
|
|
5
6
|
from dagster import (
|
|
6
7
|
InputContext,
|
|
@@ -11,6 +12,7 @@ from dagster import (
|
|
|
11
12
|
)
|
|
12
13
|
from dagster._annotations import deprecated
|
|
13
14
|
from dagster._config.pythonic_config import ConfigurableIOManager
|
|
15
|
+
from dagster._core.execution.context.init import InitResourceContext
|
|
14
16
|
from dagster._core.storage.io_manager import dagster_maintained_io_manager
|
|
15
17
|
from dagster._core.storage.upath_io_manager import UPathIOManager
|
|
16
18
|
from dagster._utils import PICKLE_PROTOCOL
|
|
@@ -19,20 +21,27 @@ from pydantic import Field
|
|
|
19
21
|
from upath import UPath
|
|
20
22
|
|
|
21
23
|
from dagster_azure.adls2.resources import ADLS2Resource
|
|
22
|
-
from dagster_azure.adls2.utils import
|
|
23
|
-
|
|
24
|
-
|
|
24
|
+
from dagster_azure.adls2.utils import (
|
|
25
|
+
DataLakeLeaseClient,
|
|
26
|
+
DataLakeServiceClient,
|
|
27
|
+
ResourceNotFoundError,
|
|
28
|
+
)
|
|
29
|
+
from dagster_azure.blob.utils import BlobLeaseClient, BlobServiceClient
|
|
25
30
|
|
|
26
31
|
|
|
27
32
|
class PickledObjectADLS2IOManager(UPathIOManager):
|
|
28
33
|
def __init__(
|
|
29
34
|
self,
|
|
30
|
-
file_system:
|
|
31
|
-
adls2_client:
|
|
32
|
-
blob_client:
|
|
33
|
-
lease_client_constructor:
|
|
35
|
+
file_system: str,
|
|
36
|
+
adls2_client: DataLakeServiceClient,
|
|
37
|
+
blob_client: BlobServiceClient,
|
|
38
|
+
lease_client_constructor: Union[type[DataLakeLeaseClient], type[BlobLeaseClient]],
|
|
34
39
|
prefix: str = "dagster",
|
|
40
|
+
lease_duration: int = 60,
|
|
35
41
|
):
|
|
42
|
+
if lease_duration != -1 and (lease_duration < 15 or lease_duration > 60):
|
|
43
|
+
raise ValueError("lease_duration must be -1 (unlimited) or between 15 and 60")
|
|
44
|
+
|
|
36
45
|
self.adls2_client = adls2_client
|
|
37
46
|
self.file_system_client = self.adls2_client.get_file_system_client(file_system)
|
|
38
47
|
# We also need a blob client to handle copying as ADLS doesn't have a copy API yet
|
|
@@ -41,7 +50,7 @@ class PickledObjectADLS2IOManager(UPathIOManager):
|
|
|
41
50
|
self.prefix = check.str_param(prefix, "prefix")
|
|
42
51
|
|
|
43
52
|
self.lease_client_constructor = lease_client_constructor
|
|
44
|
-
self.lease_duration =
|
|
53
|
+
self.lease_duration = lease_duration
|
|
45
54
|
self.file_system_client.get_file_system_properties()
|
|
46
55
|
super().__init__(base_path=UPath(self.prefix))
|
|
47
56
|
|
|
@@ -74,17 +83,13 @@ class PickledObjectADLS2IOManager(UPathIOManager):
|
|
|
74
83
|
return True
|
|
75
84
|
|
|
76
85
|
def _uri_for_path(self, path: UPath, protocol: str = "abfss://") -> str:
|
|
77
|
-
return "{protocol}{
|
|
78
|
-
protocol=protocol,
|
|
79
|
-
filesystem=self.file_system_client.file_system_name,
|
|
80
|
-
account=self.file_system_client.account_name,
|
|
81
|
-
key=path.as_posix(),
|
|
82
|
-
)
|
|
86
|
+
return f"{protocol}{self.file_system_client.file_system_name}@{self.file_system_client.account_name}.dfs.core.windows.net/{path.as_posix()}"
|
|
83
87
|
|
|
84
88
|
@contextmanager
|
|
85
89
|
def _acquire_lease(self, client: Any, is_rm: bool = False) -> Iterator[str]:
|
|
86
90
|
lease_client = self.lease_client_constructor(client=client)
|
|
87
91
|
try:
|
|
92
|
+
# Unclear why this needs to be type-ignored
|
|
88
93
|
lease_client.acquire(lease_duration=self.lease_duration)
|
|
89
94
|
yield lease_client.id
|
|
90
95
|
finally:
|
|
@@ -134,7 +139,7 @@ class ADLS2PickleIOManager(ConfigurableIOManager):
|
|
|
134
139
|
.. code-block:: python
|
|
135
140
|
|
|
136
141
|
from dagster import Definitions, asset
|
|
137
|
-
from dagster_azure.adls2 import ADLS2PickleIOManager,
|
|
142
|
+
from dagster_azure.adls2 import ADLS2PickleIOManager, ADLS2Resource, ADLS2SASToken
|
|
138
143
|
|
|
139
144
|
@asset
|
|
140
145
|
def asset1():
|
|
@@ -145,14 +150,17 @@ class ADLS2PickleIOManager(ConfigurableIOManager):
|
|
|
145
150
|
def asset2(asset1):
|
|
146
151
|
return df[:5]
|
|
147
152
|
|
|
148
|
-
|
|
153
|
+
Definitions(
|
|
149
154
|
assets=[asset1, asset2],
|
|
150
155
|
resources={
|
|
151
156
|
"io_manager": ADLS2PickleIOManager(
|
|
152
157
|
adls2_file_system="my-cool-fs",
|
|
153
|
-
adls2_prefix="my-cool-prefix"
|
|
158
|
+
adls2_prefix="my-cool-prefix",
|
|
159
|
+
adls2=ADLS2Resource(
|
|
160
|
+
storage_account="my-storage-account",
|
|
161
|
+
credential=ADLS2SASToken(token="my-sas-token"),
|
|
162
|
+
),
|
|
154
163
|
),
|
|
155
|
-
"adls2": adls2_resource,
|
|
156
164
|
},
|
|
157
165
|
)
|
|
158
166
|
|
|
@@ -162,15 +170,18 @@ class ADLS2PickleIOManager(ConfigurableIOManager):
|
|
|
162
170
|
.. code-block:: python
|
|
163
171
|
|
|
164
172
|
from dagster import job
|
|
165
|
-
from dagster_azure.adls2 import ADLS2PickleIOManager,
|
|
173
|
+
from dagster_azure.adls2 import ADLS2PickleIOManager, ADLS2Resource, ADLS2SASToken
|
|
166
174
|
|
|
167
175
|
@job(
|
|
168
176
|
resource_defs={
|
|
169
177
|
"io_manager": ADLS2PickleIOManager(
|
|
170
178
|
adls2_file_system="my-cool-fs",
|
|
171
|
-
adls2_prefix="my-cool-prefix"
|
|
179
|
+
adls2_prefix="my-cool-prefix",
|
|
180
|
+
adls2=ADLS2Resource(
|
|
181
|
+
storage_account="my-storage-account",
|
|
182
|
+
credential=ADLS2SASToken(token="my-sas-token"),
|
|
183
|
+
),
|
|
172
184
|
),
|
|
173
|
-
"adls2": adls2_resource,
|
|
174
185
|
},
|
|
175
186
|
)
|
|
176
187
|
def my_job():
|
|
@@ -182,6 +193,10 @@ class ADLS2PickleIOManager(ConfigurableIOManager):
|
|
|
182
193
|
adls2_prefix: str = Field(
|
|
183
194
|
default="dagster", description="ADLS Gen2 file system prefix to write to."
|
|
184
195
|
)
|
|
196
|
+
lease_duration: int = Field(
|
|
197
|
+
default=60,
|
|
198
|
+
description="Lease duration in seconds. Must be between 15 and 60 seconds or -1 for infinite.",
|
|
199
|
+
)
|
|
185
200
|
|
|
186
201
|
@classmethod
|
|
187
202
|
def _is_dagster_maintained(cls) -> bool:
|
|
@@ -196,6 +211,7 @@ class ADLS2PickleIOManager(ConfigurableIOManager):
|
|
|
196
211
|
self.adls2.blob_client,
|
|
197
212
|
self.adls2.lease_client_constructor,
|
|
198
213
|
self.adls2_prefix,
|
|
214
|
+
self.lease_duration,
|
|
199
215
|
)
|
|
200
216
|
|
|
201
217
|
def load_input(self, context: "InputContext") -> Any:
|
|
@@ -207,7 +223,7 @@ class ADLS2PickleIOManager(ConfigurableIOManager):
|
|
|
207
223
|
|
|
208
224
|
@deprecated(
|
|
209
225
|
breaking_version="2.0",
|
|
210
|
-
additional_warn_text="Please use
|
|
226
|
+
additional_warn_text="Please use ADLS2PickleIOManager instead.",
|
|
211
227
|
)
|
|
212
228
|
class ConfigurablePickledObjectADLS2IOManager(ADLS2PickleIOManager):
|
|
213
229
|
"""Renamed to ADLS2PickleIOManager. See ADLS2PickleIOManager for documentation."""
|
|
@@ -220,7 +236,7 @@ class ConfigurablePickledObjectADLS2IOManager(ADLS2PickleIOManager):
|
|
|
220
236
|
config_schema=ADLS2PickleIOManager.to_config_schema(),
|
|
221
237
|
required_resource_keys={"adls2"},
|
|
222
238
|
)
|
|
223
|
-
def adls2_pickle_io_manager(init_context):
|
|
239
|
+
def adls2_pickle_io_manager(init_context: InitResourceContext) -> PickledObjectADLS2IOManager:
|
|
224
240
|
"""Persistent IO manager using Azure Data Lake Storage Gen2 for storage.
|
|
225
241
|
|
|
226
242
|
Serializes objects via pickling. Suitable for objects storage for distributed executors, so long
|
|
@@ -239,7 +255,7 @@ def adls2_pickle_io_manager(init_context):
|
|
|
239
255
|
|
|
240
256
|
Example usage:
|
|
241
257
|
|
|
242
|
-
|
|
258
|
+
Attach this IO manager to a set of assets.
|
|
243
259
|
|
|
244
260
|
.. code-block:: python
|
|
245
261
|
|
|
@@ -255,7 +271,7 @@ def adls2_pickle_io_manager(init_context):
|
|
|
255
271
|
def asset2(asset1):
|
|
256
272
|
return df[:5]
|
|
257
273
|
|
|
258
|
-
|
|
274
|
+
Definitions(
|
|
259
275
|
assets=[asset1, asset2],
|
|
260
276
|
resources={
|
|
261
277
|
"io_manager": adls2_pickle_io_manager.configured(
|
|
@@ -266,7 +282,7 @@ def adls2_pickle_io_manager(init_context):
|
|
|
266
282
|
)
|
|
267
283
|
|
|
268
284
|
|
|
269
|
-
|
|
285
|
+
Attach this IO manager to your job to make it available to your ops.
|
|
270
286
|
|
|
271
287
|
.. code-block:: python
|
|
272
288
|
|
|
@@ -288,11 +304,11 @@ def adls2_pickle_io_manager(init_context):
|
|
|
288
304
|
adls2_client = adls_resource.adls2_client
|
|
289
305
|
blob_client = adls_resource.blob_client
|
|
290
306
|
lease_client = adls_resource.lease_client_constructor
|
|
291
|
-
|
|
307
|
+
return PickledObjectADLS2IOManager(
|
|
292
308
|
init_context.resource_config["adls2_file_system"],
|
|
293
309
|
adls2_client,
|
|
294
310
|
blob_client,
|
|
295
311
|
lease_client,
|
|
296
312
|
init_context.resource_config.get("adls2_prefix"),
|
|
313
|
+
init_context.resource_config.get("lease_duration"),
|
|
297
314
|
)
|
|
298
|
-
return pickled_io_manager
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Union
|
|
2
2
|
|
|
3
3
|
from azure.identity import DefaultAzureCredential
|
|
4
4
|
from azure.storage.filedatalake import DataLakeLeaseClient
|
|
@@ -17,11 +17,11 @@ from dagster._utils.merger import merge_dicts
|
|
|
17
17
|
from pydantic import Field
|
|
18
18
|
from typing_extensions import Literal
|
|
19
19
|
|
|
20
|
+
from dagster_azure.adls2.file_manager import ADLS2FileManager
|
|
21
|
+
from dagster_azure.adls2.io_manager import InitResourceContext
|
|
22
|
+
from dagster_azure.adls2.utils import DataLakeServiceClient, create_adls2_client
|
|
20
23
|
from dagster_azure.blob.utils import BlobServiceClient, create_blob_client
|
|
21
24
|
|
|
22
|
-
from .file_manager import ADLS2FileManager
|
|
23
|
-
from .utils import DataLakeServiceClient, create_adls2_client
|
|
24
|
-
|
|
25
25
|
|
|
26
26
|
class ADLS2SASToken(Config):
|
|
27
27
|
credential_type: Literal["sas"] = "sas"
|
|
@@ -35,7 +35,7 @@ class ADLS2Key(Config):
|
|
|
35
35
|
|
|
36
36
|
class ADLS2DefaultAzureCredential(Config):
|
|
37
37
|
credential_type: Literal["default_azure_credential"] = "default_azure_credential"
|
|
38
|
-
kwargs:
|
|
38
|
+
kwargs: dict[str, Any]
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class ADLS2BaseResource(ConfigurableResource):
|
|
@@ -71,6 +71,64 @@ class ADLS2Resource(ADLS2BaseResource):
|
|
|
71
71
|
|
|
72
72
|
Contains a client for both the Data Lake and Blob APIs, to work around the limitations
|
|
73
73
|
of each.
|
|
74
|
+
|
|
75
|
+
Example usage:
|
|
76
|
+
|
|
77
|
+
Attach this resource to your Definitions to be used by assets and jobs.
|
|
78
|
+
|
|
79
|
+
.. code-block:: python
|
|
80
|
+
|
|
81
|
+
from dagster import Definitions, asset, job, op
|
|
82
|
+
from dagster_azure.adls2 import ADLS2Resource, ADLS2SASToken
|
|
83
|
+
|
|
84
|
+
@asset
|
|
85
|
+
def asset1(adls2: ADLS2Resource):
|
|
86
|
+
adls2.adls2_client.list_file_systems()
|
|
87
|
+
...
|
|
88
|
+
|
|
89
|
+
@op
|
|
90
|
+
def my_op(adls2: ADLS2Resource):
|
|
91
|
+
adls2.adls2_client.list_file_systems()
|
|
92
|
+
...
|
|
93
|
+
|
|
94
|
+
@job
|
|
95
|
+
def my_job():
|
|
96
|
+
my_op()
|
|
97
|
+
|
|
98
|
+
Definitions(
|
|
99
|
+
assets=[asset1],
|
|
100
|
+
jobs=[my_job],
|
|
101
|
+
resources={
|
|
102
|
+
"adls2": ADLS2Resource(
|
|
103
|
+
storage_account="my-storage-account",
|
|
104
|
+
credential=ADLS2SASToken(token="my-sas-token"),
|
|
105
|
+
)
|
|
106
|
+
},
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
Attach this resource to your job to make it available to your ops.
|
|
111
|
+
|
|
112
|
+
.. code-block:: python
|
|
113
|
+
|
|
114
|
+
from dagster import job, op
|
|
115
|
+
from dagster_azure.adls2 import ADLS2Resource, ADLS2SASToken
|
|
116
|
+
|
|
117
|
+
@op
|
|
118
|
+
def my_op(adls2: ADLS2Resource):
|
|
119
|
+
adls2.adls2_client.list_file_systems()
|
|
120
|
+
...
|
|
121
|
+
|
|
122
|
+
@job(
|
|
123
|
+
resource_defs={
|
|
124
|
+
"adls2": ADLS2Resource(
|
|
125
|
+
storage_account="my-storage-account",
|
|
126
|
+
credential=ADLS2SASToken(token="my-sas-token"),
|
|
127
|
+
)
|
|
128
|
+
},
|
|
129
|
+
)
|
|
130
|
+
def my_job():
|
|
131
|
+
my_op()
|
|
74
132
|
"""
|
|
75
133
|
|
|
76
134
|
@classmethod
|
|
@@ -107,7 +165,7 @@ class ADLS2Resource(ADLS2BaseResource):
|
|
|
107
165
|
# to construct the new config and then use that to construct the resource.
|
|
108
166
|
@dagster_maintained_resource
|
|
109
167
|
@resource(ADLS2_CLIENT_CONFIG)
|
|
110
|
-
def adls2_resource(context):
|
|
168
|
+
def adls2_resource(context: InitResourceContext) -> ADLS2Resource:
|
|
111
169
|
"""Resource that gives ops access to Azure Data Lake Storage Gen2.
|
|
112
170
|
|
|
113
171
|
The underlying client is a :py:class:`~azure.storage.filedatalake.DataLakeServiceClient`.
|
|
@@ -171,7 +229,7 @@ def adls2_resource(context):
|
|
|
171
229
|
},
|
|
172
230
|
)
|
|
173
231
|
)
|
|
174
|
-
def adls2_file_manager(context):
|
|
232
|
+
def adls2_file_manager(context: InitResourceContext) -> ADLS2FileManager:
|
|
175
233
|
"""FileManager that provides abstract access to ADLS2.
|
|
176
234
|
|
|
177
235
|
Implements the :py:class:`~dagster._core.storage.file_manager.FileManager` API.
|
|
@@ -3,7 +3,7 @@ import warnings
|
|
|
3
3
|
try:
|
|
4
4
|
# Centralise Azure imports here so we only need to warn in one place
|
|
5
5
|
from azure.core.exceptions import ResourceNotFoundError
|
|
6
|
-
from azure.storage.filedatalake import DataLakeServiceClient
|
|
6
|
+
from azure.storage.filedatalake import DataLakeLeaseClient, DataLakeServiceClient
|
|
7
7
|
except ImportError:
|
|
8
8
|
msg = (
|
|
9
9
|
"Could not import required Azure objects. This probably means you have an old version "
|
|
@@ -16,7 +16,7 @@ except ImportError:
|
|
|
16
16
|
raise
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def _create_url(storage_account, subdomain):
|
|
19
|
+
def _create_url(storage_account: str, subdomain: str) -> str:
|
|
20
20
|
return f"https://{storage_account}.{subdomain}.core.windows.net/"
|
|
21
21
|
|
|
22
22
|
|
|
@@ -26,4 +26,9 @@ def create_adls2_client(storage_account: str, credential) -> DataLakeServiceClie
|
|
|
26
26
|
return DataLakeServiceClient(account_url, credential)
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
__all__ = [
|
|
29
|
+
__all__ = [
|
|
30
|
+
"DataLakeLeaseClient",
|
|
31
|
+
"DataLakeServiceClient",
|
|
32
|
+
"ResourceNotFoundError",
|
|
33
|
+
"create_adls2_client",
|
|
34
|
+
]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from dagster_azure.blob.compute_log_manager import (
|
|
2
|
+
AzureBlobComputeLogManager as AzureBlobComputeLogManager,
|
|
3
|
+
)
|
|
4
|
+
from dagster_azure.blob.fake_blob_client import FakeBlobServiceClient as FakeBlobServiceClient
|
|
5
|
+
from dagster_azure.blob.resources import (
|
|
6
|
+
AzureBlobStorageAnonymousCredential as AzureBlobStorageAnonymousCredential,
|
|
7
|
+
AzureBlobStorageDefaultCredential as AzureBlobStorageDefaultCredential,
|
|
8
|
+
AzureBlobStorageKeyCredential as AzureBlobStorageKeyCredential,
|
|
9
|
+
AzureBlobStorageResource as AzureBlobStorageResource,
|
|
10
|
+
AzureBlobStorageSASTokenCredential as AzureBlobStorageSASTokenCredential,
|
|
11
|
+
)
|
|
12
|
+
from dagster_azure.blob.utils import create_blob_client as create_blob_client
|