dagster-azure 0.21.13__tar.gz → 0.28.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/LICENSE +1 -1
  2. dagster_azure-0.28.1/PKG-INFO +32 -0
  3. dagster_azure-0.28.1/README.md +4 -0
  4. dagster_azure-0.28.1/dagster_azure/__init__.py +5 -0
  5. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/adls2/__init__.py +4 -9
  6. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/adls2/file_manager.py +24 -18
  7. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/adls2/io_manager.py +45 -29
  8. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/adls2/resources.py +65 -7
  9. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/adls2/utils.py +8 -3
  10. dagster_azure-0.28.1/dagster_azure/blob/__init__.py +12 -0
  11. dagster_azure-0.28.1/dagster_azure/blob/compute_log_manager.py +391 -0
  12. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/blob/fake_blob_client.py +9 -1
  13. dagster_azure-0.28.1/dagster_azure/blob/resources.py +126 -0
  14. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/blob/utils.py +10 -4
  15. dagster_azure-0.28.1/dagster_azure/fakes/__init__.py +5 -0
  16. {dagster-azure-0.21.13/dagster_azure/adls2 → dagster_azure-0.28.1/dagster_azure/fakes}/fake_adls2_resource.py +3 -4
  17. dagster_azure-0.28.1/dagster_azure/pipes/__init__.py +9 -0
  18. dagster_azure-0.28.1/dagster_azure/pipes/clients/__init__.py +5 -0
  19. dagster_azure-0.28.1/dagster_azure/pipes/clients/azureml.py +140 -0
  20. dagster_azure-0.28.1/dagster_azure/pipes/context_injectors.py +47 -0
  21. dagster_azure-0.28.1/dagster_azure/pipes/message_readers.py +83 -0
  22. dagster_azure-0.28.1/dagster_azure/version.py +1 -0
  23. dagster_azure-0.28.1/dagster_azure.egg-info/PKG-INFO +32 -0
  24. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure.egg-info/SOURCES.txt +9 -2
  25. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure.egg-info/requires.txt +2 -1
  26. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/setup.py +6 -4
  27. dagster-azure-0.21.13/PKG-INFO +0 -15
  28. dagster-azure-0.21.13/README.md +0 -4
  29. dagster-azure-0.21.13/dagster_azure/__init__.py +0 -5
  30. dagster-azure-0.21.13/dagster_azure/blob/__init__.py +0 -3
  31. dagster-azure-0.21.13/dagster_azure/blob/compute_log_manager.py +0 -256
  32. dagster-azure-0.21.13/dagster_azure/version.py +0 -1
  33. dagster-azure-0.21.13/dagster_azure.egg-info/PKG-INFO +0 -15
  34. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/MANIFEST.in +0 -0
  35. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure/py.typed +0 -0
  36. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure.egg-info/dependency_links.txt +0 -0
  37. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure.egg-info/entry_points.txt +0 -0
  38. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure.egg-info/not-zip-safe +0 -0
  39. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/dagster_azure.egg-info/top_level.txt +0 -0
  40. {dagster-azure-0.21.13 → dagster_azure-0.28.1}/setup.cfg +0 -0
@@ -186,7 +186,7 @@
186
186
  same "printed page" as the copyright notice for easier
187
187
  identification within third-party archives.
188
188
 
189
- Copyright 2023 Dagster Labs, Inc".
189
+ Copyright 2025 Dagster Labs, Inc.
190
190
 
191
191
  Licensed under the Apache License, Version 2.0 (the "License");
192
192
  you may not use this file except in compliance with the License.
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: dagster-azure
3
+ Version: 0.28.1
4
+ Summary: Package for Azure-specific Dagster framework op and resource components.
5
+ Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-azure
6
+ Author: Dagster Labs
7
+ Author-email: hello@dagsterlabs.com
8
+ License: Apache-2.0
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: OS Independent
16
+ Requires-Python: >=3.9,<3.14
17
+ License-File: LICENSE
18
+ Requires-Dist: azure-core<2.0.0,>=1.7.0
19
+ Requires-Dist: azure-identity<2.0.0,>=1.7.0
20
+ Requires-Dist: azure-ai-ml<2.0.0,>=1.28.0
21
+ Requires-Dist: azure-storage-blob<13.0.0,>=12.5.0
22
+ Requires-Dist: azure-storage-file-datalake<13.0.0,>=12.5
23
+ Requires-Dist: dagster==1.12.1
24
+ Dynamic: author
25
+ Dynamic: author-email
26
+ Dynamic: classifier
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: license-file
30
+ Dynamic: requires-dist
31
+ Dynamic: requires-python
32
+ Dynamic: summary
@@ -0,0 +1,4 @@
1
+ # dagster-azure
2
+
3
+ The docs for `dagster-azure` can be found
4
+ [here](https://docs.dagster.io/api/python-api/libraries/dagster-azure).
@@ -0,0 +1,5 @@
1
+ from dagster_shared.libraries import DagsterLibraryRegistry
2
+
3
+ from dagster_azure.version import __version__
4
+
5
+ DagsterLibraryRegistry.register("dagster-azure", __version__)
@@ -1,19 +1,14 @@
1
- from .fake_adls2_resource import (
2
- FakeADLS2Resource as FakeADLS2Resource,
3
- FakeADLS2ServiceClient as FakeADLS2ServiceClient,
4
- fake_adls2_resource as fake_adls2_resource,
5
- )
6
- from .file_manager import (
1
+ from dagster_azure.adls2.file_manager import (
7
2
  ADLS2FileHandle as ADLS2FileHandle,
8
3
  ADLS2FileManager as ADLS2FileManager,
9
4
  )
10
- from .io_manager import (
5
+ from dagster_azure.adls2.io_manager import (
11
6
  ADLS2PickleIOManager as ADLS2PickleIOManager,
12
7
  ConfigurablePickledObjectADLS2IOManager as ConfigurablePickledObjectADLS2IOManager,
13
8
  PickledObjectADLS2IOManager as PickledObjectADLS2IOManager,
14
9
  adls2_pickle_io_manager as adls2_pickle_io_manager,
15
10
  )
16
- from .resources import (
11
+ from dagster_azure.adls2.resources import (
17
12
  ADLS2DefaultAzureCredential as ADLS2DefaultAzureCredential,
18
13
  ADLS2Key as ADLS2Key,
19
14
  ADLS2Resource as ADLS2Resource,
@@ -21,4 +16,4 @@ from .resources import (
21
16
  adls2_file_manager as adls2_file_manager,
22
17
  adls2_resource as adls2_resource,
23
18
  )
24
- from .utils import create_adls2_client as create_adls2_client
19
+ from dagster_azure.adls2.utils import create_adls2_client as create_adls2_client
@@ -1,6 +1,7 @@
1
1
  import io
2
2
  import uuid
3
3
  from contextlib import contextmanager
4
+ from typing import Any, Optional
4
5
 
5
6
  import dagster._check as check
6
7
  from dagster._core.storage.file_manager import (
@@ -10,6 +11,8 @@ from dagster._core.storage.file_manager import (
10
11
  check_file_like_obj,
11
12
  )
12
13
 
14
+ from dagster_azure.adls2.utils import DataLakeServiceClient
15
+
13
16
 
14
17
  class ADLS2FileHandle(FileHandle):
15
18
  """A reference to a file on ADLS2."""
@@ -20,44 +23,44 @@ class ADLS2FileHandle(FileHandle):
20
23
  self._key = check.str_param(key, "key")
21
24
 
22
25
  @property
23
- def account(self):
26
+ def account(self) -> str:
24
27
  """str: The name of the ADLS2 account."""
25
28
  return self._account
26
29
 
27
30
  @property
28
- def file_system(self):
31
+ def file_system(self) -> str:
29
32
  """str: The name of the ADLS2 file system."""
30
33
  return self._file_system
31
34
 
32
35
  @property
33
- def key(self):
36
+ def key(self) -> str:
34
37
  """str: The ADLS2 key."""
35
38
  return self._key
36
39
 
37
40
  @property
38
- def path_desc(self):
41
+ def path_desc(self) -> str:
39
42
  """str: The file's ADLS2 URL."""
40
43
  return self.adls2_path
41
44
 
42
45
  @property
43
- def adls2_path(self):
46
+ def adls2_path(self) -> str:
44
47
  """str: The file's ADLS2 URL."""
45
48
  return f"adfss://{self.file_system}@{self.account}.dfs.core.windows.net/{self.key}"
46
49
 
47
50
 
48
51
  class ADLS2FileManager(FileManager):
49
- def __init__(self, adls2_client, file_system, prefix):
52
+ def __init__(self, adls2_client: DataLakeServiceClient, file_system: str, prefix: str):
50
53
  self._client = adls2_client
51
54
  self._file_system = check.str_param(file_system, "file_system")
52
55
  self._prefix = check.str_param(prefix, "prefix")
53
- self._local_handle_cache = {}
56
+ self._local_handle_cache: dict[str, str] = {}
54
57
  self._temp_file_manager = TempfileManager()
55
58
 
56
- def copy_handle_to_local_temp(self, file_handle):
59
+ def copy_handle_to_local_temp(self, file_handle: ADLS2FileHandle): # pyright: ignore[reportIncompatibleMethodOverride]
57
60
  self._download_if_not_cached(file_handle)
58
61
  return self._get_local_path(file_handle)
59
62
 
60
- def _download_if_not_cached(self, file_handle):
63
+ def _download_if_not_cached(self, file_handle: ADLS2FileHandle):
61
64
  if not self._file_handle_cached(file_handle):
62
65
  # instigate download
63
66
  temp_file_obj = self._temp_file_manager.tempfile()
@@ -74,7 +77,7 @@ class ADLS2FileManager(FileManager):
74
77
  return file_handle
75
78
 
76
79
  @contextmanager
77
- def read(self, file_handle, mode="rb"):
80
+ def read(self, file_handle: ADLS2FileHandle, mode: str = "rb"): # pyright: ignore[reportIncompatibleMethodOverride]
78
81
  check.inst_param(file_handle, "file_handle", ADLS2FileHandle)
79
82
  check.str_param(mode, "mode")
80
83
  check.param_invariant(mode in {"r", "rb"}, "mode")
@@ -85,31 +88,34 @@ class ADLS2FileManager(FileManager):
85
88
  with open(self._get_local_path(file_handle), mode, encoding=encoding) as file_obj:
86
89
  yield file_obj
87
90
 
88
- def _file_handle_cached(self, file_handle):
91
+ def _file_handle_cached(self, file_handle: ADLS2FileHandle) -> bool:
89
92
  return file_handle.adls2_path in self._local_handle_cache
90
93
 
91
- def _get_local_path(self, file_handle):
94
+ def _get_local_path(self, file_handle: ADLS2FileHandle) -> str:
92
95
  return self._local_handle_cache[file_handle.adls2_path]
93
96
 
94
- def read_data(self, file_handle):
97
+ def read_data(self, file_handle: ADLS2FileHandle) -> Any: # pyright: ignore[reportIncompatibleMethodOverride]
95
98
  with self.read(file_handle, mode="rb") as file_obj:
96
99
  return file_obj.read()
97
100
 
98
- def write_data(self, data, ext=None):
101
+ def write_data(self, data: bytes, ext: Optional[str] = None) -> ADLS2FileHandle:
99
102
  check.inst_param(data, "data", bytes)
100
103
  return self.write(io.BytesIO(data), mode="wb", ext=ext)
101
104
 
102
- def write(self, file_obj, mode="wb", ext=None):
105
+ def write( # pyright: ignore[reportIncompatibleMethodOverride]
106
+ self, file_obj: io.BytesIO, mode: str = "wb", ext: Optional[str] = None
107
+ ) -> ADLS2FileHandle:
103
108
  check_file_like_obj(file_obj)
104
109
  adls2_key = self.get_full_key(str(uuid.uuid4()) + (("." + ext) if ext is not None else ""))
105
110
  adls2_file = self._client.get_file_client(
106
111
  file_system=self._file_system, file_path=adls2_key
107
112
  )
108
113
  adls2_file.upload_data(file_obj, overwrite=True)
109
- return ADLS2FileHandle(self._client.account_name, self._file_system, adls2_key)
114
+ account_name = check.not_none(self._client.account_name, "Expected account name to be set")
115
+ return ADLS2FileHandle(account_name, self._file_system, adls2_key)
110
116
 
111
- def get_full_key(self, file_key):
117
+ def get_full_key(self, file_key: str) -> str:
112
118
  return f"{self._prefix}/{file_key}"
113
119
 
114
- def delete_local_temp(self):
120
+ def delete_local_temp(self) -> None:
115
121
  self._temp_file_manager.close()
@@ -1,6 +1,7 @@
1
1
  import pickle
2
+ from collections.abc import Iterator
2
3
  from contextlib import contextmanager
3
- from typing import Any, Iterator, Union
4
+ from typing import Any, Union
4
5
 
5
6
  from dagster import (
6
7
  InputContext,
@@ -11,6 +12,7 @@ from dagster import (
11
12
  )
12
13
  from dagster._annotations import deprecated
13
14
  from dagster._config.pythonic_config import ConfigurableIOManager
15
+ from dagster._core.execution.context.init import InitResourceContext
14
16
  from dagster._core.storage.io_manager import dagster_maintained_io_manager
15
17
  from dagster._core.storage.upath_io_manager import UPathIOManager
16
18
  from dagster._utils import PICKLE_PROTOCOL
@@ -19,20 +21,27 @@ from pydantic import Field
19
21
  from upath import UPath
20
22
 
21
23
  from dagster_azure.adls2.resources import ADLS2Resource
22
- from dagster_azure.adls2.utils import ResourceNotFoundError
23
-
24
- _LEASE_DURATION = 60 # One minute
24
+ from dagster_azure.adls2.utils import (
25
+ DataLakeLeaseClient,
26
+ DataLakeServiceClient,
27
+ ResourceNotFoundError,
28
+ )
29
+ from dagster_azure.blob.utils import BlobLeaseClient, BlobServiceClient
25
30
 
26
31
 
27
32
  class PickledObjectADLS2IOManager(UPathIOManager):
28
33
  def __init__(
29
34
  self,
30
- file_system: Any,
31
- adls2_client: Any,
32
- blob_client: Any,
33
- lease_client_constructor: Any,
35
+ file_system: str,
36
+ adls2_client: DataLakeServiceClient,
37
+ blob_client: BlobServiceClient,
38
+ lease_client_constructor: Union[type[DataLakeLeaseClient], type[BlobLeaseClient]],
34
39
  prefix: str = "dagster",
40
+ lease_duration: int = 60,
35
41
  ):
42
+ if lease_duration != -1 and (lease_duration < 15 or lease_duration > 60):
43
+ raise ValueError("lease_duration must be -1 (unlimited) or between 15 and 60")
44
+
36
45
  self.adls2_client = adls2_client
37
46
  self.file_system_client = self.adls2_client.get_file_system_client(file_system)
38
47
  # We also need a blob client to handle copying as ADLS doesn't have a copy API yet
@@ -41,7 +50,7 @@ class PickledObjectADLS2IOManager(UPathIOManager):
41
50
  self.prefix = check.str_param(prefix, "prefix")
42
51
 
43
52
  self.lease_client_constructor = lease_client_constructor
44
- self.lease_duration = _LEASE_DURATION
53
+ self.lease_duration = lease_duration
45
54
  self.file_system_client.get_file_system_properties()
46
55
  super().__init__(base_path=UPath(self.prefix))
47
56
 
@@ -74,17 +83,13 @@ class PickledObjectADLS2IOManager(UPathIOManager):
74
83
  return True
75
84
 
76
85
  def _uri_for_path(self, path: UPath, protocol: str = "abfss://") -> str:
77
- return "{protocol}{filesystem}@{account}.dfs.core.windows.net/{key}".format(
78
- protocol=protocol,
79
- filesystem=self.file_system_client.file_system_name,
80
- account=self.file_system_client.account_name,
81
- key=path.as_posix(),
82
- )
86
+ return f"{protocol}{self.file_system_client.file_system_name}@{self.file_system_client.account_name}.dfs.core.windows.net/{path.as_posix()}"
83
87
 
84
88
  @contextmanager
85
89
  def _acquire_lease(self, client: Any, is_rm: bool = False) -> Iterator[str]:
86
90
  lease_client = self.lease_client_constructor(client=client)
87
91
  try:
92
+ # Unclear why this needs to be type-ignored
88
93
  lease_client.acquire(lease_duration=self.lease_duration)
89
94
  yield lease_client.id
90
95
  finally:
@@ -134,7 +139,7 @@ class ADLS2PickleIOManager(ConfigurableIOManager):
134
139
  .. code-block:: python
135
140
 
136
141
  from dagster import Definitions, asset
137
- from dagster_azure.adls2 import ADLS2PickleIOManager, adls2_resource
142
+ from dagster_azure.adls2 import ADLS2PickleIOManager, ADLS2Resource, ADLS2SASToken
138
143
 
139
144
  @asset
140
145
  def asset1():
@@ -145,14 +150,17 @@ class ADLS2PickleIOManager(ConfigurableIOManager):
145
150
  def asset2(asset1):
146
151
  return df[:5]
147
152
 
148
- defs = Definitions(
153
+ Definitions(
149
154
  assets=[asset1, asset2],
150
155
  resources={
151
156
  "io_manager": ADLS2PickleIOManager(
152
157
  adls2_file_system="my-cool-fs",
153
- adls2_prefix="my-cool-prefix"
158
+ adls2_prefix="my-cool-prefix",
159
+ adls2=ADLS2Resource(
160
+ storage_account="my-storage-account",
161
+ credential=ADLS2SASToken(token="my-sas-token"),
162
+ ),
154
163
  ),
155
- "adls2": adls2_resource,
156
164
  },
157
165
  )
158
166
 
@@ -162,15 +170,18 @@ class ADLS2PickleIOManager(ConfigurableIOManager):
162
170
  .. code-block:: python
163
171
 
164
172
  from dagster import job
165
- from dagster_azure.adls2 import ADLS2PickleIOManager, adls2_resource
173
+ from dagster_azure.adls2 import ADLS2PickleIOManager, ADLS2Resource, ADLS2SASToken
166
174
 
167
175
  @job(
168
176
  resource_defs={
169
177
  "io_manager": ADLS2PickleIOManager(
170
178
  adls2_file_system="my-cool-fs",
171
- adls2_prefix="my-cool-prefix"
179
+ adls2_prefix="my-cool-prefix",
180
+ adls2=ADLS2Resource(
181
+ storage_account="my-storage-account",
182
+ credential=ADLS2SASToken(token="my-sas-token"),
183
+ ),
172
184
  ),
173
- "adls2": adls2_resource,
174
185
  },
175
186
  )
176
187
  def my_job():
@@ -182,6 +193,10 @@ class ADLS2PickleIOManager(ConfigurableIOManager):
182
193
  adls2_prefix: str = Field(
183
194
  default="dagster", description="ADLS Gen2 file system prefix to write to."
184
195
  )
196
+ lease_duration: int = Field(
197
+ default=60,
198
+ description="Lease duration in seconds. Must be between 15 and 60 seconds or -1 for infinite.",
199
+ )
185
200
 
186
201
  @classmethod
187
202
  def _is_dagster_maintained(cls) -> bool:
@@ -196,6 +211,7 @@ class ADLS2PickleIOManager(ConfigurableIOManager):
196
211
  self.adls2.blob_client,
197
212
  self.adls2.lease_client_constructor,
198
213
  self.adls2_prefix,
214
+ self.lease_duration,
199
215
  )
200
216
 
201
217
  def load_input(self, context: "InputContext") -> Any:
@@ -207,7 +223,7 @@ class ADLS2PickleIOManager(ConfigurableIOManager):
207
223
 
208
224
  @deprecated(
209
225
  breaking_version="2.0",
210
- additional_warn_text="Please use GCSPickleIOManager instead.",
226
+ additional_warn_text="Please use ADLS2PickleIOManager instead.",
211
227
  )
212
228
  class ConfigurablePickledObjectADLS2IOManager(ADLS2PickleIOManager):
213
229
  """Renamed to ADLS2PickleIOManager. See ADLS2PickleIOManager for documentation."""
@@ -220,7 +236,7 @@ class ConfigurablePickledObjectADLS2IOManager(ADLS2PickleIOManager):
220
236
  config_schema=ADLS2PickleIOManager.to_config_schema(),
221
237
  required_resource_keys={"adls2"},
222
238
  )
223
- def adls2_pickle_io_manager(init_context):
239
+ def adls2_pickle_io_manager(init_context: InitResourceContext) -> PickledObjectADLS2IOManager:
224
240
  """Persistent IO manager using Azure Data Lake Storage Gen2 for storage.
225
241
 
226
242
  Serializes objects via pickling. Suitable for objects storage for distributed executors, so long
@@ -239,7 +255,7 @@ def adls2_pickle_io_manager(init_context):
239
255
 
240
256
  Example usage:
241
257
 
242
- 1. Attach this IO manager to a set of assets.
258
+ Attach this IO manager to a set of assets.
243
259
 
244
260
  .. code-block:: python
245
261
 
@@ -255,7 +271,7 @@ def adls2_pickle_io_manager(init_context):
255
271
  def asset2(asset1):
256
272
  return df[:5]
257
273
 
258
- defs = Definitions(
274
+ Definitions(
259
275
  assets=[asset1, asset2],
260
276
  resources={
261
277
  "io_manager": adls2_pickle_io_manager.configured(
@@ -266,7 +282,7 @@ def adls2_pickle_io_manager(init_context):
266
282
  )
267
283
 
268
284
 
269
- 2. Attach this IO manager to your job to make it available to your ops.
285
+ Attach this IO manager to your job to make it available to your ops.
270
286
 
271
287
  .. code-block:: python
272
288
 
@@ -288,11 +304,11 @@ def adls2_pickle_io_manager(init_context):
288
304
  adls2_client = adls_resource.adls2_client
289
305
  blob_client = adls_resource.blob_client
290
306
  lease_client = adls_resource.lease_client_constructor
291
- pickled_io_manager = PickledObjectADLS2IOManager(
307
+ return PickledObjectADLS2IOManager(
292
308
  init_context.resource_config["adls2_file_system"],
293
309
  adls2_client,
294
310
  blob_client,
295
311
  lease_client,
296
312
  init_context.resource_config.get("adls2_prefix"),
313
+ init_context.resource_config.get("lease_duration"),
297
314
  )
298
- return pickled_io_manager
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, Union
1
+ from typing import Any, Union
2
2
 
3
3
  from azure.identity import DefaultAzureCredential
4
4
  from azure.storage.filedatalake import DataLakeLeaseClient
@@ -17,11 +17,11 @@ from dagster._utils.merger import merge_dicts
17
17
  from pydantic import Field
18
18
  from typing_extensions import Literal
19
19
 
20
+ from dagster_azure.adls2.file_manager import ADLS2FileManager
21
+ from dagster_azure.adls2.io_manager import InitResourceContext
22
+ from dagster_azure.adls2.utils import DataLakeServiceClient, create_adls2_client
20
23
  from dagster_azure.blob.utils import BlobServiceClient, create_blob_client
21
24
 
22
- from .file_manager import ADLS2FileManager
23
- from .utils import DataLakeServiceClient, create_adls2_client
24
-
25
25
 
26
26
  class ADLS2SASToken(Config):
27
27
  credential_type: Literal["sas"] = "sas"
@@ -35,7 +35,7 @@ class ADLS2Key(Config):
35
35
 
36
36
  class ADLS2DefaultAzureCredential(Config):
37
37
  credential_type: Literal["default_azure_credential"] = "default_azure_credential"
38
- kwargs: Dict[str, Any]
38
+ kwargs: dict[str, Any]
39
39
 
40
40
 
41
41
  class ADLS2BaseResource(ConfigurableResource):
@@ -71,6 +71,64 @@ class ADLS2Resource(ADLS2BaseResource):
71
71
 
72
72
  Contains a client for both the Data Lake and Blob APIs, to work around the limitations
73
73
  of each.
74
+
75
+ Example usage:
76
+
77
+ Attach this resource to your Definitions to be used by assets and jobs.
78
+
79
+ .. code-block:: python
80
+
81
+ from dagster import Definitions, asset, job, op
82
+ from dagster_azure.adls2 import ADLS2Resource, ADLS2SASToken
83
+
84
+ @asset
85
+ def asset1(adls2: ADLS2Resource):
86
+ adls2.adls2_client.list_file_systems()
87
+ ...
88
+
89
+ @op
90
+ def my_op(adls2: ADLS2Resource):
91
+ adls2.adls2_client.list_file_systems()
92
+ ...
93
+
94
+ @job
95
+ def my_job():
96
+ my_op()
97
+
98
+ Definitions(
99
+ assets=[asset1],
100
+ jobs=[my_job],
101
+ resources={
102
+ "adls2": ADLS2Resource(
103
+ storage_account="my-storage-account",
104
+ credential=ADLS2SASToken(token="my-sas-token"),
105
+ )
106
+ },
107
+ )
108
+
109
+
110
+ Attach this resource to your job to make it available to your ops.
111
+
112
+ .. code-block:: python
113
+
114
+ from dagster import job, op
115
+ from dagster_azure.adls2 import ADLS2Resource, ADLS2SASToken
116
+
117
+ @op
118
+ def my_op(adls2: ADLS2Resource):
119
+ adls2.adls2_client.list_file_systems()
120
+ ...
121
+
122
+ @job(
123
+ resource_defs={
124
+ "adls2": ADLS2Resource(
125
+ storage_account="my-storage-account",
126
+ credential=ADLS2SASToken(token="my-sas-token"),
127
+ )
128
+ },
129
+ )
130
+ def my_job():
131
+ my_op()
74
132
  """
75
133
 
76
134
  @classmethod
@@ -107,7 +165,7 @@ class ADLS2Resource(ADLS2BaseResource):
107
165
  # to construct the new config and then use that to construct the resource.
108
166
  @dagster_maintained_resource
109
167
  @resource(ADLS2_CLIENT_CONFIG)
110
- def adls2_resource(context):
168
+ def adls2_resource(context: InitResourceContext) -> ADLS2Resource:
111
169
  """Resource that gives ops access to Azure Data Lake Storage Gen2.
112
170
 
113
171
  The underlying client is a :py:class:`~azure.storage.filedatalake.DataLakeServiceClient`.
@@ -171,7 +229,7 @@ def adls2_resource(context):
171
229
  },
172
230
  )
173
231
  )
174
- def adls2_file_manager(context):
232
+ def adls2_file_manager(context: InitResourceContext) -> ADLS2FileManager:
175
233
  """FileManager that provides abstract access to ADLS2.
176
234
 
177
235
  Implements the :py:class:`~dagster._core.storage.file_manager.FileManager` API.
@@ -3,7 +3,7 @@ import warnings
3
3
  try:
4
4
  # Centralise Azure imports here so we only need to warn in one place
5
5
  from azure.core.exceptions import ResourceNotFoundError
6
- from azure.storage.filedatalake import DataLakeServiceClient
6
+ from azure.storage.filedatalake import DataLakeLeaseClient, DataLakeServiceClient
7
7
  except ImportError:
8
8
  msg = (
9
9
  "Could not import required Azure objects. This probably means you have an old version "
@@ -16,7 +16,7 @@ except ImportError:
16
16
  raise
17
17
 
18
18
 
19
- def _create_url(storage_account, subdomain):
19
+ def _create_url(storage_account: str, subdomain: str) -> str:
20
20
  return f"https://{storage_account}.{subdomain}.core.windows.net/"
21
21
 
22
22
 
@@ -26,4 +26,9 @@ def create_adls2_client(storage_account: str, credential) -> DataLakeServiceClie
26
26
  return DataLakeServiceClient(account_url, credential)
27
27
 
28
28
 
29
- __all__ = ["create_adls2_client", "DataLakeServiceClient", "ResourceNotFoundError"]
29
+ __all__ = [
30
+ "DataLakeLeaseClient",
31
+ "DataLakeServiceClient",
32
+ "ResourceNotFoundError",
33
+ "create_adls2_client",
34
+ ]
@@ -0,0 +1,12 @@
1
+ from dagster_azure.blob.compute_log_manager import (
2
+ AzureBlobComputeLogManager as AzureBlobComputeLogManager,
3
+ )
4
+ from dagster_azure.blob.fake_blob_client import FakeBlobServiceClient as FakeBlobServiceClient
5
+ from dagster_azure.blob.resources import (
6
+ AzureBlobStorageAnonymousCredential as AzureBlobStorageAnonymousCredential,
7
+ AzureBlobStorageDefaultCredential as AzureBlobStorageDefaultCredential,
8
+ AzureBlobStorageKeyCredential as AzureBlobStorageKeyCredential,
9
+ AzureBlobStorageResource as AzureBlobStorageResource,
10
+ AzureBlobStorageSASTokenCredential as AzureBlobStorageSASTokenCredential,
11
+ )
12
+ from dagster_azure.blob.utils import create_blob_client as create_blob_client