dagster-azure 0.13.19__tar.gz → 0.28.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {dagster-azure-0.13.19 → dagster_azure-0.28.1}/LICENSE +1 -1
  2. {dagster-azure-0.13.19 → dagster_azure-0.28.1}/MANIFEST.in +1 -0
  3. dagster_azure-0.28.1/PKG-INFO +32 -0
  4. dagster_azure-0.28.1/README.md +4 -0
  5. dagster_azure-0.28.1/dagster_azure/__init__.py +5 -0
  6. dagster_azure-0.28.1/dagster_azure/adls2/__init__.py +19 -0
  7. {dagster-azure-0.13.19 → dagster_azure-0.28.1}/dagster_azure/adls2/file_manager.py +30 -28
  8. dagster_azure-0.28.1/dagster_azure/adls2/io_manager.py +314 -0
  9. dagster_azure-0.28.1/dagster_azure/adls2/resources.py +262 -0
  10. {dagster-azure-0.13.19 → dagster_azure-0.28.1}/dagster_azure/adls2/utils.py +12 -9
  11. dagster_azure-0.28.1/dagster_azure/blob/__init__.py +12 -0
  12. dagster_azure-0.28.1/dagster_azure/blob/compute_log_manager.py +391 -0
  13. {dagster-azure-0.13.19 → dagster_azure-0.28.1}/dagster_azure/blob/fake_blob_client.py +40 -21
  14. dagster_azure-0.28.1/dagster_azure/blob/resources.py +126 -0
  15. {dagster-azure-0.13.19 → dagster_azure-0.28.1}/dagster_azure/blob/utils.py +12 -11
  16. dagster_azure-0.28.1/dagster_azure/fakes/__init__.py +5 -0
  17. {dagster-azure-0.13.19/dagster_azure/adls2 → dagster_azure-0.28.1/dagster_azure/fakes}/fake_adls2_resource.py +81 -28
  18. dagster_azure-0.28.1/dagster_azure/pipes/__init__.py +9 -0
  19. dagster_azure-0.28.1/dagster_azure/pipes/clients/__init__.py +5 -0
  20. dagster_azure-0.28.1/dagster_azure/pipes/clients/azureml.py +140 -0
  21. dagster_azure-0.28.1/dagster_azure/pipes/context_injectors.py +47 -0
  22. dagster_azure-0.28.1/dagster_azure/pipes/message_readers.py +83 -0
  23. dagster_azure-0.28.1/dagster_azure/py.typed +1 -0
  24. dagster_azure-0.28.1/dagster_azure/version.py +1 -0
  25. dagster_azure-0.28.1/dagster_azure.egg-info/PKG-INFO +32 -0
  26. {dagster-azure-0.13.19 → dagster_azure-0.28.1}/dagster_azure.egg-info/SOURCES.txt +9 -9
  27. {dagster-azure-0.13.19 → dagster_azure-0.28.1}/dagster_azure.egg-info/entry_points.txt +0 -1
  28. {dagster-azure-0.13.19 → dagster_azure-0.28.1}/dagster_azure.egg-info/requires.txt +3 -1
  29. dagster_azure-0.28.1/dagster_azure.egg-info/top_level.txt +1 -0
  30. dagster_azure-0.28.1/setup.py +47 -0
  31. dagster-azure-0.13.19/PKG-INFO +0 -15
  32. dagster-azure-0.13.19/README.md +0 -4
  33. dagster-azure-0.13.19/dagster_azure/__init__.py +0 -5
  34. dagster-azure-0.13.19/dagster_azure/adls2/__init__.py +0 -6
  35. dagster-azure-0.13.19/dagster_azure/adls2/file_cache.py +0 -74
  36. dagster-azure-0.13.19/dagster_azure/adls2/io_manager.py +0 -133
  37. dagster-azure-0.13.19/dagster_azure/adls2/resources.py +0 -122
  38. dagster-azure-0.13.19/dagster_azure/blob/__init__.py +0 -3
  39. dagster-azure-0.13.19/dagster_azure/blob/compute_log_manager.py +0 -205
  40. dagster-azure-0.13.19/dagster_azure/version.py +0 -1
  41. dagster-azure-0.13.19/dagster_azure.egg-info/PKG-INFO +0 -15
  42. dagster-azure-0.13.19/dagster_azure.egg-info/top_level.txt +0 -2
  43. dagster-azure-0.13.19/dagster_azure_tests/__init__.py +0 -0
  44. dagster-azure-0.13.19/dagster_azure_tests/adls2_tests/__init__.py +0 -0
  45. dagster-azure-0.13.19/dagster_azure_tests/adls2_tests/conftest.py +0 -18
  46. dagster-azure-0.13.19/dagster_azure_tests/adls2_tests/test_adls2_file_cache.py +0 -60
  47. dagster-azure-0.13.19/dagster_azure_tests/adls2_tests/test_adls2_file_manager.py +0 -190
  48. dagster-azure-0.13.19/dagster_azure_tests/adls2_tests/test_io_manager.py +0 -142
  49. dagster-azure-0.13.19/dagster_azure_tests/test_version.py +0 -5
  50. dagster-azure-0.13.19/setup.py +0 -43
  51. {dagster-azure-0.13.19 → dagster_azure-0.28.1}/dagster_azure.egg-info/dependency_links.txt +0 -0
  52. {dagster-azure-0.13.19 → dagster_azure-0.28.1}/dagster_azure.egg-info/not-zip-safe +0 -0
  53. {dagster-azure-0.13.19 → dagster_azure-0.28.1}/setup.cfg +0 -0
@@ -186,7 +186,7 @@
186
186
  same "printed page" as the copyright notice for easier
187
187
  identification within third-party archives.
188
188
 
189
- Copyright {yyyy} {name of copyright owner}
189
+ Copyright 2025 Dagster Labs, Inc.
190
190
 
191
191
  Licensed under the Apache License, Version 2.0 (the "License");
192
192
  you may not use this file except in compliance with the License.
@@ -3,3 +3,4 @@ recursive-include dagster_azure *.yaml
3
3
  recursive-include dagster_azure *.txt
4
4
  recursive-include dagster_azure *.template
5
5
  include LICENSE
6
+ include dagster_azure/py.typed
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: dagster-azure
3
+ Version: 0.28.1
4
+ Summary: Package for Azure-specific Dagster framework op and resource components.
5
+ Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-azure
6
+ Author: Dagster Labs
7
+ Author-email: hello@dagsterlabs.com
8
+ License: Apache-2.0
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: OS Independent
16
+ Requires-Python: >=3.9,<3.14
17
+ License-File: LICENSE
18
+ Requires-Dist: azure-core<2.0.0,>=1.7.0
19
+ Requires-Dist: azure-identity<2.0.0,>=1.7.0
20
+ Requires-Dist: azure-ai-ml<2.0.0,>=1.28.0
21
+ Requires-Dist: azure-storage-blob<13.0.0,>=12.5.0
22
+ Requires-Dist: azure-storage-file-datalake<13.0.0,>=12.5
23
+ Requires-Dist: dagster==1.12.1
24
+ Dynamic: author
25
+ Dynamic: author-email
26
+ Dynamic: classifier
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: license-file
30
+ Dynamic: requires-dist
31
+ Dynamic: requires-python
32
+ Dynamic: summary
@@ -0,0 +1,4 @@
1
+ # dagster-azure
2
+
3
+ The docs for `dagster-azure` can be found
4
+ [here](https://docs.dagster.io/api/python-api/libraries/dagster-azure).
@@ -0,0 +1,5 @@
1
+ from dagster_shared.libraries import DagsterLibraryRegistry
2
+
3
+ from dagster_azure.version import __version__
4
+
5
+ DagsterLibraryRegistry.register("dagster-azure", __version__)
@@ -0,0 +1,19 @@
1
+ from dagster_azure.adls2.file_manager import (
2
+ ADLS2FileHandle as ADLS2FileHandle,
3
+ ADLS2FileManager as ADLS2FileManager,
4
+ )
5
+ from dagster_azure.adls2.io_manager import (
6
+ ADLS2PickleIOManager as ADLS2PickleIOManager,
7
+ ConfigurablePickledObjectADLS2IOManager as ConfigurablePickledObjectADLS2IOManager,
8
+ PickledObjectADLS2IOManager as PickledObjectADLS2IOManager,
9
+ adls2_pickle_io_manager as adls2_pickle_io_manager,
10
+ )
11
+ from dagster_azure.adls2.resources import (
12
+ ADLS2DefaultAzureCredential as ADLS2DefaultAzureCredential,
13
+ ADLS2Key as ADLS2Key,
14
+ ADLS2Resource as ADLS2Resource,
15
+ ADLS2SASToken as ADLS2SASToken,
16
+ adls2_file_manager as adls2_file_manager,
17
+ adls2_resource as adls2_resource,
18
+ )
19
+ from dagster_azure.adls2.utils import create_adls2_client as create_adls2_client
@@ -1,17 +1,19 @@
1
1
  import io
2
2
  import uuid
3
3
  from contextlib import contextmanager
4
+ from typing import Any, Optional
4
5
 
5
- from dagster import check, usable_as_dagster_type
6
- from dagster.core.storage.file_manager import (
6
+ import dagster._check as check
7
+ from dagster._core.storage.file_manager import (
7
8
  FileHandle,
8
9
  FileManager,
9
10
  TempfileManager,
10
11
  check_file_like_obj,
11
12
  )
12
13
 
14
+ from dagster_azure.adls2.utils import DataLakeServiceClient
15
+
13
16
 
14
- @usable_as_dagster_type
15
17
  class ADLS2FileHandle(FileHandle):
16
18
  """A reference to a file on ADLS2."""
17
19
 
@@ -21,48 +23,44 @@ class ADLS2FileHandle(FileHandle):
21
23
  self._key = check.str_param(key, "key")
22
24
 
23
25
  @property
24
- def account(self):
26
+ def account(self) -> str:
25
27
  """str: The name of the ADLS2 account."""
26
28
  return self._account
27
29
 
28
30
  @property
29
- def file_system(self):
31
+ def file_system(self) -> str:
30
32
  """str: The name of the ADLS2 file system."""
31
33
  return self._file_system
32
34
 
33
35
  @property
34
- def key(self):
36
+ def key(self) -> str:
35
37
  """str: The ADLS2 key."""
36
38
  return self._key
37
39
 
38
40
  @property
39
- def path_desc(self):
41
+ def path_desc(self) -> str:
40
42
  """str: The file's ADLS2 URL."""
41
43
  return self.adls2_path
42
44
 
43
45
  @property
44
- def adls2_path(self):
46
+ def adls2_path(self) -> str:
45
47
  """str: The file's ADLS2 URL."""
46
- return "adfss://{file_system}@{account}.dfs.core.windows.net/{key}".format(
47
- file_system=self.file_system,
48
- account=self.account,
49
- key=self.key,
50
- )
48
+ return f"adfss://{self.file_system}@{self.account}.dfs.core.windows.net/{self.key}"
51
49
 
52
50
 
53
51
  class ADLS2FileManager(FileManager):
54
- def __init__(self, adls2_client, file_system, prefix):
52
+ def __init__(self, adls2_client: DataLakeServiceClient, file_system: str, prefix: str):
55
53
  self._client = adls2_client
56
54
  self._file_system = check.str_param(file_system, "file_system")
57
55
  self._prefix = check.str_param(prefix, "prefix")
58
- self._local_handle_cache = {}
56
+ self._local_handle_cache: dict[str, str] = {}
59
57
  self._temp_file_manager = TempfileManager()
60
58
 
61
- def copy_handle_to_local_temp(self, file_handle):
59
+ def copy_handle_to_local_temp(self, file_handle: ADLS2FileHandle): # pyright: ignore[reportIncompatibleMethodOverride]
62
60
  self._download_if_not_cached(file_handle)
63
61
  return self._get_local_path(file_handle)
64
62
 
65
- def _download_if_not_cached(self, file_handle):
63
+ def _download_if_not_cached(self, file_handle: ADLS2FileHandle):
66
64
  if not self._file_handle_cached(file_handle):
67
65
  # instigate download
68
66
  temp_file_obj = self._temp_file_manager.tempfile()
@@ -79,41 +77,45 @@ class ADLS2FileManager(FileManager):
79
77
  return file_handle
80
78
 
81
79
  @contextmanager
82
- def read(self, file_handle, mode="rb"):
80
+ def read(self, file_handle: ADLS2FileHandle, mode: str = "rb"): # pyright: ignore[reportIncompatibleMethodOverride]
83
81
  check.inst_param(file_handle, "file_handle", ADLS2FileHandle)
84
82
  check.str_param(mode, "mode")
85
83
  check.param_invariant(mode in {"r", "rb"}, "mode")
86
84
 
87
85
  self._download_if_not_cached(file_handle)
88
86
 
89
- with open(self._get_local_path(file_handle), mode) as file_obj:
87
+ encoding = None if "b" in mode else "utf-8"
88
+ with open(self._get_local_path(file_handle), mode, encoding=encoding) as file_obj:
90
89
  yield file_obj
91
90
 
92
- def _file_handle_cached(self, file_handle):
91
+ def _file_handle_cached(self, file_handle: ADLS2FileHandle) -> bool:
93
92
  return file_handle.adls2_path in self._local_handle_cache
94
93
 
95
- def _get_local_path(self, file_handle):
94
+ def _get_local_path(self, file_handle: ADLS2FileHandle) -> str:
96
95
  return self._local_handle_cache[file_handle.adls2_path]
97
96
 
98
- def read_data(self, file_handle):
97
+ def read_data(self, file_handle: ADLS2FileHandle) -> Any: # pyright: ignore[reportIncompatibleMethodOverride]
99
98
  with self.read(file_handle, mode="rb") as file_obj:
100
99
  return file_obj.read()
101
100
 
102
- def write_data(self, data, ext=None):
101
+ def write_data(self, data: bytes, ext: Optional[str] = None) -> ADLS2FileHandle:
103
102
  check.inst_param(data, "data", bytes)
104
103
  return self.write(io.BytesIO(data), mode="wb", ext=ext)
105
104
 
106
- def write(self, file_obj, mode="wb", ext=None): # pylint: disable=unused-argument
105
+ def write( # pyright: ignore[reportIncompatibleMethodOverride]
106
+ self, file_obj: io.BytesIO, mode: str = "wb", ext: Optional[str] = None
107
+ ) -> ADLS2FileHandle:
107
108
  check_file_like_obj(file_obj)
108
109
  adls2_key = self.get_full_key(str(uuid.uuid4()) + (("." + ext) if ext is not None else ""))
109
110
  adls2_file = self._client.get_file_client(
110
111
  file_system=self._file_system, file_path=adls2_key
111
112
  )
112
113
  adls2_file.upload_data(file_obj, overwrite=True)
113
- return ADLS2FileHandle(self._client.account_name, self._file_system, adls2_key)
114
+ account_name = check.not_none(self._client.account_name, "Expected account name to be set")
115
+ return ADLS2FileHandle(account_name, self._file_system, adls2_key)
114
116
 
115
- def get_full_key(self, file_key):
116
- return "{base_key}/{file_key}".format(base_key=self._prefix, file_key=file_key)
117
+ def get_full_key(self, file_key: str) -> str:
118
+ return f"{self._prefix}/{file_key}"
117
119
 
118
- def delete_local_temp(self):
120
+ def delete_local_temp(self) -> None:
119
121
  self._temp_file_manager.close()
@@ -0,0 +1,314 @@
1
+ import pickle
2
+ from collections.abc import Iterator
3
+ from contextlib import contextmanager
4
+ from typing import Any, Union
5
+
6
+ from dagster import (
7
+ InputContext,
8
+ OutputContext,
9
+ ResourceDependency,
10
+ _check as check,
11
+ io_manager,
12
+ )
13
+ from dagster._annotations import deprecated
14
+ from dagster._config.pythonic_config import ConfigurableIOManager
15
+ from dagster._core.execution.context.init import InitResourceContext
16
+ from dagster._core.storage.io_manager import dagster_maintained_io_manager
17
+ from dagster._core.storage.upath_io_manager import UPathIOManager
18
+ from dagster._utils import PICKLE_PROTOCOL
19
+ from dagster._utils.cached_method import cached_method
20
+ from pydantic import Field
21
+ from upath import UPath
22
+
23
+ from dagster_azure.adls2.resources import ADLS2Resource
24
+ from dagster_azure.adls2.utils import (
25
+ DataLakeLeaseClient,
26
+ DataLakeServiceClient,
27
+ ResourceNotFoundError,
28
+ )
29
+ from dagster_azure.blob.utils import BlobLeaseClient, BlobServiceClient
30
+
31
+
32
+ class PickledObjectADLS2IOManager(UPathIOManager):
33
+ def __init__(
34
+ self,
35
+ file_system: str,
36
+ adls2_client: DataLakeServiceClient,
37
+ blob_client: BlobServiceClient,
38
+ lease_client_constructor: Union[type[DataLakeLeaseClient], type[BlobLeaseClient]],
39
+ prefix: str = "dagster",
40
+ lease_duration: int = 60,
41
+ ):
42
+ if lease_duration != -1 and (lease_duration < 15 or lease_duration > 60):
43
+ raise ValueError("lease_duration must be -1 (unlimited) or between 15 and 60")
44
+
45
+ self.adls2_client = adls2_client
46
+ self.file_system_client = self.adls2_client.get_file_system_client(file_system)
47
+ # We also need a blob client to handle copying as ADLS doesn't have a copy API yet
48
+ self.blob_client = blob_client
49
+ self.blob_container_client = self.blob_client.get_container_client(file_system)
50
+ self.prefix = check.str_param(prefix, "prefix")
51
+
52
+ self.lease_client_constructor = lease_client_constructor
53
+ self.lease_duration = lease_duration
54
+ self.file_system_client.get_file_system_properties()
55
+ super().__init__(base_path=UPath(self.prefix))
56
+
57
+ def get_op_output_relative_path(self, context: Union[InputContext, OutputContext]) -> UPath:
58
+ parts = context.get_identifier()
59
+ run_id = parts[0]
60
+ output_parts = parts[1:]
61
+ return UPath("storage", run_id, "files", *output_parts)
62
+
63
+ def get_loading_input_log_message(self, path: UPath) -> str:
64
+ return f"Loading ADLS2 object from: {self._uri_for_path(path)}"
65
+
66
+ def get_writing_output_log_message(self, path: UPath) -> str:
67
+ return f"Writing ADLS2 object at: {self._uri_for_path(path)}"
68
+
69
+ def unlink(self, path: UPath) -> None:
70
+ file_client = self.file_system_client.get_file_client(path.as_posix())
71
+ with self._acquire_lease(file_client, is_rm=True) as lease:
72
+ file_client.delete_file(lease=lease, recursive=True)
73
+
74
+ def make_directory(self, path: UPath) -> None:
75
+ # It is not necessary to create directories in ADLS2
76
+ return None
77
+
78
+ def path_exists(self, path: UPath) -> bool:
79
+ try:
80
+ self.file_system_client.get_file_client(path.as_posix()).get_file_properties()
81
+ except ResourceNotFoundError:
82
+ return False
83
+ return True
84
+
85
+ def _uri_for_path(self, path: UPath, protocol: str = "abfss://") -> str:
86
+ return f"{protocol}{self.file_system_client.file_system_name}@{self.file_system_client.account_name}.dfs.core.windows.net/{path.as_posix()}"
87
+
88
+ @contextmanager
89
+ def _acquire_lease(self, client: Any, is_rm: bool = False) -> Iterator[str]:
90
+ lease_client = self.lease_client_constructor(client=client)
91
+ try:
92
+ # Unclear why this needs to be type-ignored
93
+ lease_client.acquire(lease_duration=self.lease_duration)
94
+ yield lease_client.id
95
+ finally:
96
+ # cannot release a lease on a file that no longer exists, so need to check
97
+ if not is_rm:
98
+ lease_client.release()
99
+
100
+ def load_from_path(self, context: InputContext, path: UPath) -> Any:
101
+ if context.dagster_type.typing_type == type(None):
102
+ return None
103
+ file = self.file_system_client.get_file_client(path.as_posix())
104
+ stream = file.download_file()
105
+ return pickle.loads(stream.readall())
106
+
107
+ def dump_to_path(self, context: OutputContext, obj: Any, path: UPath) -> None:
108
+ if self.path_exists(path):
109
+ context.log.warning(f"Removing existing ADLS2 key: {path}")
110
+ self.unlink(path)
111
+
112
+ pickled_obj = pickle.dumps(obj, PICKLE_PROTOCOL)
113
+ file = self.file_system_client.create_file(path.as_posix())
114
+ with self._acquire_lease(file) as lease:
115
+ file.upload_data(pickled_obj, lease=lease, overwrite=True)
116
+
117
+
118
+ class ADLS2PickleIOManager(ConfigurableIOManager):
119
+ """Persistent IO manager using Azure Data Lake Storage Gen2 for storage.
120
+
121
+ Serializes objects via pickling. Suitable for objects storage for distributed executors, so long
122
+ as each execution node has network connectivity and credentials for ADLS and the backing
123
+ container.
124
+
125
+ Assigns each op output to a unique filepath containing run ID, step key, and output name.
126
+ Assigns each asset to a single filesystem path, at "<base_dir>/<asset_key>". If the asset key
127
+ has multiple components, the final component is used as the name of the file, and the preceding
128
+ components as parent directories under the base_dir.
129
+
130
+ Subsequent materializations of an asset will overwrite previous materializations of that asset.
131
+ With a base directory of "/my/base/path", an asset with key
132
+ `AssetKey(["one", "two", "three"])` would be stored in a file called "three" in a directory
133
+ with path "/my/base/path/one/two/".
134
+
135
+ Example usage:
136
+
137
+ 1. Attach this IO manager to a set of assets.
138
+
139
+ .. code-block:: python
140
+
141
+ from dagster import Definitions, asset
142
+ from dagster_azure.adls2 import ADLS2PickleIOManager, ADLS2Resource, ADLS2SASToken
143
+
144
+ @asset
145
+ def asset1():
146
+ # create df ...
147
+ return df
148
+
149
+ @asset
150
+ def asset2(asset1):
151
+ return df[:5]
152
+
153
+ Definitions(
154
+ assets=[asset1, asset2],
155
+ resources={
156
+ "io_manager": ADLS2PickleIOManager(
157
+ adls2_file_system="my-cool-fs",
158
+ adls2_prefix="my-cool-prefix",
159
+ adls2=ADLS2Resource(
160
+ storage_account="my-storage-account",
161
+ credential=ADLS2SASToken(token="my-sas-token"),
162
+ ),
163
+ ),
164
+ },
165
+ )
166
+
167
+
168
+ 2. Attach this IO manager to your job to make it available to your ops.
169
+
170
+ .. code-block:: python
171
+
172
+ from dagster import job
173
+ from dagster_azure.adls2 import ADLS2PickleIOManager, ADLS2Resource, ADLS2SASToken
174
+
175
+ @job(
176
+ resource_defs={
177
+ "io_manager": ADLS2PickleIOManager(
178
+ adls2_file_system="my-cool-fs",
179
+ adls2_prefix="my-cool-prefix",
180
+ adls2=ADLS2Resource(
181
+ storage_account="my-storage-account",
182
+ credential=ADLS2SASToken(token="my-sas-token"),
183
+ ),
184
+ ),
185
+ },
186
+ )
187
+ def my_job():
188
+ ...
189
+ """
190
+
191
+ adls2: ResourceDependency[ADLS2Resource]
192
+ adls2_file_system: str = Field(description="ADLS Gen2 file system name.")
193
+ adls2_prefix: str = Field(
194
+ default="dagster", description="ADLS Gen2 file system prefix to write to."
195
+ )
196
+ lease_duration: int = Field(
197
+ default=60,
198
+ description="Lease duration in seconds. Must be between 15 and 60 seconds or -1 for infinite.",
199
+ )
200
+
201
+ @classmethod
202
+ def _is_dagster_maintained(cls) -> bool:
203
+ return True
204
+
205
+ @property
206
+ @cached_method
207
+ def _internal_io_manager(self) -> PickledObjectADLS2IOManager:
208
+ return PickledObjectADLS2IOManager(
209
+ self.adls2_file_system,
210
+ self.adls2.adls2_client,
211
+ self.adls2.blob_client,
212
+ self.adls2.lease_client_constructor,
213
+ self.adls2_prefix,
214
+ self.lease_duration,
215
+ )
216
+
217
+ def load_input(self, context: "InputContext") -> Any:
218
+ return self._internal_io_manager.load_input(context)
219
+
220
+ def handle_output(self, context: "OutputContext", obj: Any) -> None:
221
+ self._internal_io_manager.handle_output(context, obj)
222
+
223
+
224
+ @deprecated(
225
+ breaking_version="2.0",
226
+ additional_warn_text="Please use ADLS2PickleIOManager instead.",
227
+ )
228
+ class ConfigurablePickledObjectADLS2IOManager(ADLS2PickleIOManager):
229
+ """Renamed to ADLS2PickleIOManager. See ADLS2PickleIOManager for documentation."""
230
+
231
+ pass
232
+
233
+
234
+ @dagster_maintained_io_manager
235
+ @io_manager(
236
+ config_schema=ADLS2PickleIOManager.to_config_schema(),
237
+ required_resource_keys={"adls2"},
238
+ )
239
+ def adls2_pickle_io_manager(init_context: InitResourceContext) -> PickledObjectADLS2IOManager:
240
+ """Persistent IO manager using Azure Data Lake Storage Gen2 for storage.
241
+
242
+ Serializes objects via pickling. Suitable for objects storage for distributed executors, so long
243
+ as each execution node has network connectivity and credentials for ADLS and the backing
244
+ container.
245
+
246
+ Assigns each op output to a unique filepath containing run ID, step key, and output name.
247
+ Assigns each asset to a single filesystem path, at "<base_dir>/<asset_key>". If the asset key
248
+ has multiple components, the final component is used as the name of the file, and the preceding
249
+ components as parent directories under the base_dir.
250
+
251
+ Subsequent materializations of an asset will overwrite previous materializations of that asset.
252
+ With a base directory of "/my/base/path", an asset with key
253
+ `AssetKey(["one", "two", "three"])` would be stored in a file called "three" in a directory
254
+ with path "/my/base/path/one/two/".
255
+
256
+ Example usage:
257
+
258
+ Attach this IO manager to a set of assets.
259
+
260
+ .. code-block:: python
261
+
262
+ from dagster import Definitions, asset
263
+ from dagster_azure.adls2 import adls2_pickle_io_manager, adls2_resource
264
+
265
+ @asset
266
+ def asset1():
267
+ # create df ...
268
+ return df
269
+
270
+ @asset
271
+ def asset2(asset1):
272
+ return df[:5]
273
+
274
+ Definitions(
275
+ assets=[asset1, asset2],
276
+ resources={
277
+ "io_manager": adls2_pickle_io_manager.configured(
278
+ {"adls2_file_system": "my-cool-fs", "adls2_prefix": "my-cool-prefix"}
279
+ ),
280
+ "adls2": adls2_resource,
281
+ },
282
+ )
283
+
284
+
285
+ Attach this IO manager to your job to make it available to your ops.
286
+
287
+ .. code-block:: python
288
+
289
+ from dagster import job
290
+ from dagster_azure.adls2 import adls2_pickle_io_manager, adls2_resource
291
+
292
+ @job(
293
+ resource_defs={
294
+ "io_manager": adls2_pickle_io_manager.configured(
295
+ {"adls2_file_system": "my-cool-fs", "adls2_prefix": "my-cool-prefix"}
296
+ ),
297
+ "adls2": adls2_resource,
298
+ },
299
+ )
300
+ def my_job():
301
+ ...
302
+ """
303
+ adls_resource = init_context.resources.adls2
304
+ adls2_client = adls_resource.adls2_client
305
+ blob_client = adls_resource.blob_client
306
+ lease_client = adls_resource.lease_client_constructor
307
+ return PickledObjectADLS2IOManager(
308
+ init_context.resource_config["adls2_file_system"],
309
+ adls2_client,
310
+ blob_client,
311
+ lease_client,
312
+ init_context.resource_config.get("adls2_prefix"),
313
+ init_context.resource_config.get("lease_duration"),
314
+ )