flyte 2.0.0b32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +108 -0
- flyte/_bin/__init__.py +0 -0
- flyte/_bin/debug.py +38 -0
- flyte/_bin/runtime.py +195 -0
- flyte/_bin/serve.py +178 -0
- flyte/_build.py +26 -0
- flyte/_cache/__init__.py +12 -0
- flyte/_cache/cache.py +147 -0
- flyte/_cache/defaults.py +9 -0
- flyte/_cache/local_cache.py +216 -0
- flyte/_cache/policy_function_body.py +42 -0
- flyte/_code_bundle/__init__.py +8 -0
- flyte/_code_bundle/_ignore.py +121 -0
- flyte/_code_bundle/_packaging.py +218 -0
- flyte/_code_bundle/_utils.py +347 -0
- flyte/_code_bundle/bundle.py +266 -0
- flyte/_constants.py +1 -0
- flyte/_context.py +155 -0
- flyte/_custom_context.py +73 -0
- flyte/_debug/__init__.py +0 -0
- flyte/_debug/constants.py +38 -0
- flyte/_debug/utils.py +17 -0
- flyte/_debug/vscode.py +307 -0
- flyte/_deploy.py +408 -0
- flyte/_deployer.py +109 -0
- flyte/_doc.py +29 -0
- flyte/_docstring.py +32 -0
- flyte/_environment.py +122 -0
- flyte/_excepthook.py +37 -0
- flyte/_group.py +32 -0
- flyte/_hash.py +8 -0
- flyte/_image.py +1055 -0
- flyte/_initialize.py +628 -0
- flyte/_interface.py +119 -0
- flyte/_internal/__init__.py +3 -0
- flyte/_internal/controllers/__init__.py +129 -0
- flyte/_internal/controllers/_local_controller.py +239 -0
- flyte/_internal/controllers/_trace.py +48 -0
- flyte/_internal/controllers/remote/__init__.py +58 -0
- flyte/_internal/controllers/remote/_action.py +211 -0
- flyte/_internal/controllers/remote/_client.py +47 -0
- flyte/_internal/controllers/remote/_controller.py +583 -0
- flyte/_internal/controllers/remote/_core.py +465 -0
- flyte/_internal/controllers/remote/_informer.py +381 -0
- flyte/_internal/controllers/remote/_service_protocol.py +50 -0
- flyte/_internal/imagebuild/__init__.py +3 -0
- flyte/_internal/imagebuild/docker_builder.py +706 -0
- flyte/_internal/imagebuild/image_builder.py +277 -0
- flyte/_internal/imagebuild/remote_builder.py +386 -0
- flyte/_internal/imagebuild/utils.py +78 -0
- flyte/_internal/resolvers/__init__.py +0 -0
- flyte/_internal/resolvers/_task_module.py +21 -0
- flyte/_internal/resolvers/common.py +31 -0
- flyte/_internal/resolvers/default.py +28 -0
- flyte/_internal/runtime/__init__.py +0 -0
- flyte/_internal/runtime/convert.py +486 -0
- flyte/_internal/runtime/entrypoints.py +204 -0
- flyte/_internal/runtime/io.py +188 -0
- flyte/_internal/runtime/resources_serde.py +152 -0
- flyte/_internal/runtime/reuse.py +125 -0
- flyte/_internal/runtime/rusty.py +193 -0
- flyte/_internal/runtime/task_serde.py +362 -0
- flyte/_internal/runtime/taskrunner.py +209 -0
- flyte/_internal/runtime/trigger_serde.py +160 -0
- flyte/_internal/runtime/types_serde.py +54 -0
- flyte/_keyring/__init__.py +0 -0
- flyte/_keyring/file.py +115 -0
- flyte/_logging.py +300 -0
- flyte/_map.py +312 -0
- flyte/_module.py +72 -0
- flyte/_pod.py +30 -0
- flyte/_resources.py +473 -0
- flyte/_retry.py +32 -0
- flyte/_reusable_environment.py +102 -0
- flyte/_run.py +724 -0
- flyte/_secret.py +96 -0
- flyte/_task.py +550 -0
- flyte/_task_environment.py +316 -0
- flyte/_task_plugins.py +47 -0
- flyte/_timeout.py +47 -0
- flyte/_tools.py +27 -0
- flyte/_trace.py +119 -0
- flyte/_trigger.py +1000 -0
- flyte/_utils/__init__.py +30 -0
- flyte/_utils/asyn.py +121 -0
- flyte/_utils/async_cache.py +139 -0
- flyte/_utils/coro_management.py +27 -0
- flyte/_utils/docker_credentials.py +173 -0
- flyte/_utils/file_handling.py +72 -0
- flyte/_utils/helpers.py +134 -0
- flyte/_utils/lazy_module.py +54 -0
- flyte/_utils/module_loader.py +104 -0
- flyte/_utils/org_discovery.py +57 -0
- flyte/_utils/uv_script_parser.py +49 -0
- flyte/_version.py +34 -0
- flyte/app/__init__.py +22 -0
- flyte/app/_app_environment.py +157 -0
- flyte/app/_deploy.py +125 -0
- flyte/app/_input.py +160 -0
- flyte/app/_runtime/__init__.py +3 -0
- flyte/app/_runtime/app_serde.py +347 -0
- flyte/app/_types.py +101 -0
- flyte/app/extras/__init__.py +3 -0
- flyte/app/extras/_fastapi.py +151 -0
- flyte/cli/__init__.py +12 -0
- flyte/cli/_abort.py +28 -0
- flyte/cli/_build.py +114 -0
- flyte/cli/_common.py +468 -0
- flyte/cli/_create.py +371 -0
- flyte/cli/_delete.py +45 -0
- flyte/cli/_deploy.py +293 -0
- flyte/cli/_gen.py +176 -0
- flyte/cli/_get.py +370 -0
- flyte/cli/_option.py +33 -0
- flyte/cli/_params.py +554 -0
- flyte/cli/_plugins.py +209 -0
- flyte/cli/_run.py +597 -0
- flyte/cli/_serve.py +64 -0
- flyte/cli/_update.py +37 -0
- flyte/cli/_user.py +17 -0
- flyte/cli/main.py +221 -0
- flyte/config/__init__.py +3 -0
- flyte/config/_config.py +248 -0
- flyte/config/_internal.py +73 -0
- flyte/config/_reader.py +225 -0
- flyte/connectors/__init__.py +11 -0
- flyte/connectors/_connector.py +270 -0
- flyte/connectors/_server.py +197 -0
- flyte/connectors/utils.py +135 -0
- flyte/errors.py +243 -0
- flyte/extend.py +19 -0
- flyte/extras/__init__.py +5 -0
- flyte/extras/_container.py +286 -0
- flyte/git/__init__.py +3 -0
- flyte/git/_config.py +21 -0
- flyte/io/__init__.py +29 -0
- flyte/io/_dataframe/__init__.py +131 -0
- flyte/io/_dataframe/basic_dfs.py +223 -0
- flyte/io/_dataframe/dataframe.py +1026 -0
- flyte/io/_dir.py +910 -0
- flyte/io/_file.py +914 -0
- flyte/io/_hashing_io.py +342 -0
- flyte/models.py +479 -0
- flyte/py.typed +0 -0
- flyte/remote/__init__.py +35 -0
- flyte/remote/_action.py +738 -0
- flyte/remote/_app.py +57 -0
- flyte/remote/_client/__init__.py +0 -0
- flyte/remote/_client/_protocols.py +189 -0
- flyte/remote/_client/auth/__init__.py +12 -0
- flyte/remote/_client/auth/_auth_utils.py +14 -0
- flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
- flyte/remote/_client/auth/_authenticators/base.py +403 -0
- flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- flyte/remote/_client/auth/_authenticators/device_code.py +117 -0
- flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
- flyte/remote/_client/auth/_authenticators/factory.py +200 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
- flyte/remote/_client/auth/_channel.py +213 -0
- flyte/remote/_client/auth/_client_config.py +85 -0
- flyte/remote/_client/auth/_default_html.py +32 -0
- flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
- flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
- flyte/remote/_client/auth/_keyring.py +152 -0
- flyte/remote/_client/auth/_token_client.py +260 -0
- flyte/remote/_client/auth/errors.py +16 -0
- flyte/remote/_client/controlplane.py +128 -0
- flyte/remote/_common.py +30 -0
- flyte/remote/_console.py +19 -0
- flyte/remote/_data.py +161 -0
- flyte/remote/_logs.py +185 -0
- flyte/remote/_project.py +88 -0
- flyte/remote/_run.py +386 -0
- flyte/remote/_secret.py +142 -0
- flyte/remote/_task.py +527 -0
- flyte/remote/_trigger.py +306 -0
- flyte/remote/_user.py +33 -0
- flyte/report/__init__.py +3 -0
- flyte/report/_report.py +182 -0
- flyte/report/_template.html +124 -0
- flyte/storage/__init__.py +36 -0
- flyte/storage/_config.py +237 -0
- flyte/storage/_parallel_reader.py +274 -0
- flyte/storage/_remote_fs.py +34 -0
- flyte/storage/_storage.py +456 -0
- flyte/storage/_utils.py +5 -0
- flyte/syncify/__init__.py +56 -0
- flyte/syncify/_api.py +375 -0
- flyte/types/__init__.py +52 -0
- flyte/types/_interface.py +40 -0
- flyte/types/_pickle.py +145 -0
- flyte/types/_renderer.py +162 -0
- flyte/types/_string_literals.py +119 -0
- flyte/types/_type_engine.py +2254 -0
- flyte/types/_utils.py +80 -0
- flyte-2.0.0b32.data/scripts/debug.py +38 -0
- flyte-2.0.0b32.data/scripts/runtime.py +195 -0
- flyte-2.0.0b32.dist-info/METADATA +351 -0
- flyte-2.0.0b32.dist-info/RECORD +204 -0
- flyte-2.0.0b32.dist-info/WHEEL +5 -0
- flyte-2.0.0b32.dist-info/entry_points.txt +7 -0
- flyte-2.0.0b32.dist-info/licenses/LICENSE +201 -0
- flyte-2.0.0b32.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
from typing import Any, Dict, List, Literal, Optional, Tuple, Type, Union
|
|
4
|
+
|
|
5
|
+
from flyteidl2.core import tasks_pb2
|
|
6
|
+
|
|
7
|
+
from flyte import Image, storage
|
|
8
|
+
from flyte._logging import logger
|
|
9
|
+
from flyte._task import TaskTemplate
|
|
10
|
+
from flyte.io import Dir, File
|
|
11
|
+
from flyte.models import NativeInterface, SerializationContext
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _extract_command_key(cmd: str, **kwargs) -> List[Any] | None:
|
|
15
|
+
"""
|
|
16
|
+
Extract the key from the command using regex.
|
|
17
|
+
"""
|
|
18
|
+
import re
|
|
19
|
+
|
|
20
|
+
input_regex = r"\{\{\.inputs\.([a-zA-Z0-9_]+)\}\}"
|
|
21
|
+
return re.findall(input_regex, cmd)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _extract_path_command_key(cmd: str, input_data_dir: Optional[str]) -> Optional[str]:
|
|
25
|
+
"""
|
|
26
|
+
Extract the key from the path-like command using regex.
|
|
27
|
+
"""
|
|
28
|
+
import re
|
|
29
|
+
|
|
30
|
+
input_data_dir = input_data_dir or ""
|
|
31
|
+
input_regex = rf"{re.escape(input_data_dir)}/([\w\-.]+)" # captures file or dir names
|
|
32
|
+
|
|
33
|
+
match = re.search(input_regex, cmd)
|
|
34
|
+
if match:
|
|
35
|
+
return match.group(1)
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ContainerTask(TaskTemplate):
|
|
40
|
+
"""
|
|
41
|
+
This is an intermediate class that represents Flyte Tasks that run a container at execution time. This is the vast
|
|
42
|
+
majority of tasks - the typical ``@task`` decorated tasks; for instance, all run a container. An example of
|
|
43
|
+
something that doesn't run a container would be something like the Athena SQL task.
|
|
44
|
+
|
|
45
|
+
:param name: Name of the task
|
|
46
|
+
:param image: The container image to use for the task. This can be a string or an Image object.
|
|
47
|
+
:param command: The command to run in the container. This can be a list of strings or a single string.
|
|
48
|
+
:param inputs: The inputs to the task. This is a dictionary of input names to types.
|
|
49
|
+
:param arguments: The arguments to pass to the command. This is a list of strings.
|
|
50
|
+
:param outputs: The outputs of the task. This is a dictionary of output names to types.
|
|
51
|
+
:param input_data_dir: The directory where the input data is stored. This is a string or a Path object.
|
|
52
|
+
:param output_data_dir: The directory where the output data is stored. This is a string or a Path object.
|
|
53
|
+
:param metadata_format: The format of the output file. This can be "JSON", "YAML", or "PROTO".
|
|
54
|
+
:param local_logs: If True, logs will be printed to the console in the local execution.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
MetadataFormat = Literal["JSON", "YAML", "PROTO"]
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
name: str,
|
|
62
|
+
image: Union[str, Image],
|
|
63
|
+
command: List[str],
|
|
64
|
+
inputs: Optional[Dict[str, Type]] = None,
|
|
65
|
+
arguments: Optional[List[str]] = None,
|
|
66
|
+
outputs: Optional[Dict[str, Type]] = None,
|
|
67
|
+
input_data_dir: str | pathlib.Path = "/var/inputs",
|
|
68
|
+
output_data_dir: str | pathlib.Path = "/var/outputs",
|
|
69
|
+
metadata_format: MetadataFormat = "JSON",
|
|
70
|
+
local_logs: bool = True,
|
|
71
|
+
**kwargs,
|
|
72
|
+
):
|
|
73
|
+
super().__init__(
|
|
74
|
+
task_type="raw-container",
|
|
75
|
+
name=name,
|
|
76
|
+
image=image,
|
|
77
|
+
interface=NativeInterface({k: (v, None) for k, v in inputs.items()} if inputs else {}, outputs or {}),
|
|
78
|
+
**kwargs,
|
|
79
|
+
)
|
|
80
|
+
self._image = image
|
|
81
|
+
if isinstance(image, str):
|
|
82
|
+
if image == "auto":
|
|
83
|
+
self._image = Image.from_debian_base()
|
|
84
|
+
else:
|
|
85
|
+
self._image = Image.from_base(image)
|
|
86
|
+
|
|
87
|
+
if command and any(not isinstance(c, str) for c in command):
|
|
88
|
+
raise ValueError("All elements in the command list must be strings.")
|
|
89
|
+
if arguments and any(not isinstance(a, str) for a in arguments):
|
|
90
|
+
raise ValueError("All elements in the arguments list must be strings.")
|
|
91
|
+
self._cmd = command
|
|
92
|
+
self._args = arguments
|
|
93
|
+
self._input_data_dir = input_data_dir
|
|
94
|
+
if isinstance(input_data_dir, str):
|
|
95
|
+
self._input_data_dir = pathlib.Path(input_data_dir)
|
|
96
|
+
self._output_data_dir = output_data_dir
|
|
97
|
+
if isinstance(output_data_dir, str):
|
|
98
|
+
self._output_data_dir = pathlib.Path(output_data_dir)
|
|
99
|
+
self._metadata_format = metadata_format
|
|
100
|
+
self._inputs = inputs
|
|
101
|
+
self._outputs = outputs
|
|
102
|
+
self.local_logs = local_logs
|
|
103
|
+
|
|
104
|
+
def _render_command_and_volume_binding(self, cmd: str, **kwargs) -> Tuple[str, Dict[str, Dict[str, str]]]:
|
|
105
|
+
"""
|
|
106
|
+
We support template-style references to inputs, e.g., "{{.inputs.infile}}".
|
|
107
|
+
|
|
108
|
+
For FlyteFile and FlyteDirectory commands, e.g., "/var/inputs/inputs", we extract the key from strings that
|
|
109
|
+
begin with the specified `input_data_dir`.
|
|
110
|
+
"""
|
|
111
|
+
from flyte.io import Dir, File
|
|
112
|
+
|
|
113
|
+
volume_binding: Dict[str, Dict[str, str]] = {}
|
|
114
|
+
path_k = _extract_path_command_key(cmd, str(self._input_data_dir))
|
|
115
|
+
keys = [path_k] if path_k else _extract_command_key(cmd)
|
|
116
|
+
|
|
117
|
+
command = cmd
|
|
118
|
+
|
|
119
|
+
if keys:
|
|
120
|
+
for k in keys:
|
|
121
|
+
input_val = kwargs.get(k)
|
|
122
|
+
# TODO: Add support file and directory transformer first
|
|
123
|
+
if input_val and type(input_val) in [File, Dir]:
|
|
124
|
+
if not path_k:
|
|
125
|
+
raise AssertionError(
|
|
126
|
+
"File and Directory commands should not use the template syntax "
|
|
127
|
+
"like this: {{.inputs.infile}}\n"
|
|
128
|
+
"Please use a path-like syntax, such as: /var/inputs/infile.\n"
|
|
129
|
+
"This requirement is due to how Flyte Propeller processes template syntax inputs."
|
|
130
|
+
)
|
|
131
|
+
local_flyte_file_or_dir_path = input_val.path
|
|
132
|
+
remote_flyte_file_or_dir_path = os.path.join(self._input_data_dir, k) # type: ignore
|
|
133
|
+
volume_binding[local_flyte_file_or_dir_path] = {
|
|
134
|
+
"bind": remote_flyte_file_or_dir_path,
|
|
135
|
+
"mode": "rw",
|
|
136
|
+
}
|
|
137
|
+
else:
|
|
138
|
+
command = command.replace(f"{{{{.inputs.{k}}}}}", str(input_val))
|
|
139
|
+
else:
|
|
140
|
+
command = cmd
|
|
141
|
+
|
|
142
|
+
return command, volume_binding
|
|
143
|
+
|
|
144
|
+
def _prepare_command_and_volumes(
|
|
145
|
+
self, cmd_and_args: List[str], **kwargs
|
|
146
|
+
) -> Tuple[List[str], Dict[str, Dict[str, str]]]:
|
|
147
|
+
"""
|
|
148
|
+
Prepares the command and volume bindings for the container based on input arguments and command templates.
|
|
149
|
+
|
|
150
|
+
Parameters:
|
|
151
|
+
- cmd_and_args (List[str]): The command and arguments to prepare.
|
|
152
|
+
- **kwargs: Keyword arguments representing task inputs.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
- Tuple[List[str], Dict[str, Dict[str, str]]]: A tuple containing the prepared commands and volume bindings.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
commands = []
|
|
159
|
+
volume_bindings = {}
|
|
160
|
+
|
|
161
|
+
for cmd in cmd_and_args:
|
|
162
|
+
command, volume_binding = self._render_command_and_volume_binding(cmd, **kwargs)
|
|
163
|
+
commands.append(command)
|
|
164
|
+
volume_bindings.update(volume_binding)
|
|
165
|
+
|
|
166
|
+
return commands, volume_bindings
|
|
167
|
+
|
|
168
|
+
def _pull_image_if_not_exists(self, client, image: str):
|
|
169
|
+
try:
|
|
170
|
+
if not client.images.list(filters={"reference": image}):
|
|
171
|
+
logger.info(f"Pulling image: {image} for container task: {self.name}")
|
|
172
|
+
client.images.pull(image)
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.error(f"Failed to pull image {image}: {e!s}")
|
|
175
|
+
raise
|
|
176
|
+
|
|
177
|
+
def _string_to_timedelta(self, s: str):
|
|
178
|
+
import datetime
|
|
179
|
+
import re
|
|
180
|
+
|
|
181
|
+
regex = r"(?:(\d+) days?, )?(?:(\d+):)?(\d+):(\d+)(?:\.(\d+))?"
|
|
182
|
+
parts = re.match(regex, s)
|
|
183
|
+
if not parts:
|
|
184
|
+
raise ValueError("Invalid timedelta string format")
|
|
185
|
+
|
|
186
|
+
days = int(parts.group(1)) if parts.group(1) else 0
|
|
187
|
+
hours = int(parts.group(2)) if parts.group(2) else 0
|
|
188
|
+
minutes = int(parts.group(3)) if parts.group(3) else 0
|
|
189
|
+
seconds = int(parts.group(4)) if parts.group(4) else 0
|
|
190
|
+
microseconds = int(parts.group(5)) if parts.group(5) else 0
|
|
191
|
+
|
|
192
|
+
return datetime.timedelta(
|
|
193
|
+
days=days,
|
|
194
|
+
hours=hours,
|
|
195
|
+
minutes=minutes,
|
|
196
|
+
seconds=seconds,
|
|
197
|
+
microseconds=microseconds,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
async def _convert_output_val_to_correct_type(
|
|
201
|
+
self, output_path: pathlib.Path, output_val: Any, output_type: Type
|
|
202
|
+
) -> Any:
|
|
203
|
+
import datetime
|
|
204
|
+
|
|
205
|
+
if issubclass(output_type, bool):
|
|
206
|
+
return output_val.lower() != "false"
|
|
207
|
+
elif issubclass(output_type, datetime.datetime):
|
|
208
|
+
return datetime.datetime.fromisoformat(output_val)
|
|
209
|
+
elif issubclass(output_type, datetime.timedelta):
|
|
210
|
+
return self._string_to_timedelta(output_val)
|
|
211
|
+
elif issubclass(output_type, File):
|
|
212
|
+
return await File.from_local(output_path)
|
|
213
|
+
elif issubclass(output_type, Dir):
|
|
214
|
+
return await Dir.from_local(output_path)
|
|
215
|
+
else:
|
|
216
|
+
return output_type(output_val)
|
|
217
|
+
|
|
218
|
+
async def _get_output(self, output_directory: pathlib.Path) -> Tuple[Any]:
|
|
219
|
+
output_items = []
|
|
220
|
+
if self._outputs:
|
|
221
|
+
for k, output_type in self._outputs.items():
|
|
222
|
+
output_path = output_directory / k
|
|
223
|
+
if os.path.isfile(output_path):
|
|
224
|
+
with output_path.open("r") as f:
|
|
225
|
+
output_val = f.read()
|
|
226
|
+
else:
|
|
227
|
+
output_val = None
|
|
228
|
+
parsed = await self._convert_output_val_to_correct_type(output_path, output_val, output_type)
|
|
229
|
+
output_items.append(parsed)
|
|
230
|
+
# return a tuple so that each element is treated as a separate output.
|
|
231
|
+
# this allows flyte to map the user-defined output types (dict) to individual values.
|
|
232
|
+
# if we returned a list instead, it would be treated as a single output.
|
|
233
|
+
return tuple(output_items)
|
|
234
|
+
|
|
235
|
+
async def execute(self, **kwargs) -> Any:
|
|
236
|
+
try:
|
|
237
|
+
import docker
|
|
238
|
+
except ImportError:
|
|
239
|
+
raise ImportError("Docker is not installed. Please install Docker by running `pip install docker`.")
|
|
240
|
+
|
|
241
|
+
# Normalize the input and output directories
|
|
242
|
+
self._input_data_dir = os.path.normpath(self._input_data_dir) if self._input_data_dir else ""
|
|
243
|
+
self._output_data_dir = os.path.normpath(self._output_data_dir) if self._output_data_dir else ""
|
|
244
|
+
|
|
245
|
+
output_directory = storage.get_random_local_directory()
|
|
246
|
+
cmd_and_args = (self._cmd or []) + (self._args or [])
|
|
247
|
+
commands, volume_bindings = self._prepare_command_and_volumes(cmd_and_args, **kwargs)
|
|
248
|
+
volume_bindings[str(output_directory)] = {"bind": self._output_data_dir, "mode": "rw"}
|
|
249
|
+
|
|
250
|
+
client = docker.from_env()
|
|
251
|
+
if isinstance(self._image, str):
|
|
252
|
+
raise AssertionError(f"Only Image objects are supported, not strings. Got {self._image} instead.")
|
|
253
|
+
uri = self._image.uri
|
|
254
|
+
self._pull_image_if_not_exists(client, uri)
|
|
255
|
+
print(f"Command: {commands!r}")
|
|
256
|
+
|
|
257
|
+
container = client.containers.run(uri, command=commands, remove=True, volumes=volume_bindings, detach=True)
|
|
258
|
+
|
|
259
|
+
# Wait for the container to finish the task
|
|
260
|
+
# TODO: Add a 'timeout' parameter to control the max wait time for the container to finish the task.
|
|
261
|
+
|
|
262
|
+
if self.local_logs:
|
|
263
|
+
for log in container.logs(stream=True):
|
|
264
|
+
print(f"[Local Container] {log.strip()!r}")
|
|
265
|
+
|
|
266
|
+
container.wait()
|
|
267
|
+
|
|
268
|
+
output = await self._get_output(output_directory)
|
|
269
|
+
return output
|
|
270
|
+
|
|
271
|
+
def data_loading_config(self, sctx: SerializationContext) -> tasks_pb2.DataLoadingConfig:
|
|
272
|
+
literal_to_protobuf = {
|
|
273
|
+
"JSON": tasks_pb2.DataLoadingConfig.JSON,
|
|
274
|
+
"YAML": tasks_pb2.DataLoadingConfig.YAML,
|
|
275
|
+
"PROTO": tasks_pb2.DataLoadingConfig.PROTO,
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return tasks_pb2.DataLoadingConfig(
|
|
279
|
+
input_path=str(self._input_data_dir) if self._input_data_dir else None,
|
|
280
|
+
output_path=str(self._output_data_dir) if self._output_data_dir else None,
|
|
281
|
+
enabled=True,
|
|
282
|
+
format=literal_to_protobuf.get(self._metadata_format, "JSON"),
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
def container_args(self, sctx: SerializationContext) -> List[str]:
|
|
286
|
+
return self._cmd + (self._args if self._args else [])
|
flyte/git/__init__.py
ADDED
flyte/git/_config.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
import subprocess
|
|
3
|
+
|
|
4
|
+
import flyte.config
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def config_from_root(path: pathlib.Path | str = ".flyte/config.yaml") -> flyte.config.Config | None:
|
|
8
|
+
"""Get the config file from the git root directory.
|
|
9
|
+
|
|
10
|
+
By default, the config file is expected to be in `.flyte/config.yaml` in the git root directory.
|
|
11
|
+
"""
|
|
12
|
+
try:
|
|
13
|
+
result = subprocess.run(["git", "rev-parse", "--show-toplevel"], check=False, capture_output=True, text=True)
|
|
14
|
+
if result.returncode != 0:
|
|
15
|
+
return None
|
|
16
|
+
root = pathlib.Path(result.stdout.strip())
|
|
17
|
+
if not (root / path).exists():
|
|
18
|
+
return None
|
|
19
|
+
return flyte.config.auto(root / path)
|
|
20
|
+
except Exception:
|
|
21
|
+
return None
|
flyte/io/__init__.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""
|
|
2
|
+
## IO data types
|
|
3
|
+
|
|
4
|
+
This package contains additional data types beyond the primitive data types in python to abstract data flow
|
|
5
|
+
of large datasets in Union.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"PARQUET",
|
|
11
|
+
"DataFrame",
|
|
12
|
+
"DataFrameDecoder",
|
|
13
|
+
"DataFrameEncoder",
|
|
14
|
+
"DataFrameTransformerEngine",
|
|
15
|
+
"Dir",
|
|
16
|
+
"File",
|
|
17
|
+
"lazy_import_dataframe_handler",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
from ._dataframe import (
|
|
21
|
+
PARQUET,
|
|
22
|
+
DataFrame,
|
|
23
|
+
DataFrameDecoder,
|
|
24
|
+
DataFrameEncoder,
|
|
25
|
+
DataFrameTransformerEngine,
|
|
26
|
+
lazy_import_dataframe_handler,
|
|
27
|
+
)
|
|
28
|
+
from ._dir import Dir
|
|
29
|
+
from ._file import File
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Flytekit DataFrame
|
|
3
|
+
==========================================================
|
|
4
|
+
.. currentmodule:: flyte.io._dataframe
|
|
5
|
+
|
|
6
|
+
.. autosummary::
|
|
7
|
+
:template: custom.rst
|
|
8
|
+
:toctree: generated/
|
|
9
|
+
|
|
10
|
+
DataFrame
|
|
11
|
+
DataFrameDecoder
|
|
12
|
+
DataFrameEncoder
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import functools
|
|
16
|
+
|
|
17
|
+
from flyte._logging import logger
|
|
18
|
+
from flyte._utils.lazy_module import is_imported
|
|
19
|
+
|
|
20
|
+
from .dataframe import (
|
|
21
|
+
PARQUET,
|
|
22
|
+
DataFrame,
|
|
23
|
+
DataFrameDecoder,
|
|
24
|
+
DataFrameEncoder,
|
|
25
|
+
DataFrameTransformerEngine,
|
|
26
|
+
DuplicateHandlerError,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@functools.lru_cache(maxsize=None)
|
|
31
|
+
def register_csv_handlers():
|
|
32
|
+
from .basic_dfs import CSVToPandasDecodingHandler, PandasToCSVEncodingHandler
|
|
33
|
+
|
|
34
|
+
DataFrameTransformerEngine.register(PandasToCSVEncodingHandler(), default_format_for_type=True)
|
|
35
|
+
DataFrameTransformerEngine.register(CSVToPandasDecodingHandler(), default_format_for_type=True)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@functools.lru_cache(maxsize=None)
|
|
39
|
+
def register_pandas_handlers():
|
|
40
|
+
import pandas as pd
|
|
41
|
+
|
|
42
|
+
from flyte.types._renderer import TopFrameRenderer
|
|
43
|
+
|
|
44
|
+
from .basic_dfs import PandasToParquetEncodingHandler, ParquetToPandasDecodingHandler
|
|
45
|
+
|
|
46
|
+
DataFrameTransformerEngine.register(PandasToParquetEncodingHandler(), default_format_for_type=True)
|
|
47
|
+
DataFrameTransformerEngine.register(ParquetToPandasDecodingHandler(), default_format_for_type=True)
|
|
48
|
+
DataFrameTransformerEngine.register_renderer(pd.DataFrame, TopFrameRenderer())
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@functools.lru_cache(maxsize=None)
|
|
52
|
+
def register_arrow_handlers():
|
|
53
|
+
import pyarrow as pa
|
|
54
|
+
|
|
55
|
+
from flyte.types._renderer import ArrowRenderer
|
|
56
|
+
|
|
57
|
+
from .basic_dfs import ArrowToParquetEncodingHandler, ParquetToArrowDecodingHandler
|
|
58
|
+
|
|
59
|
+
DataFrameTransformerEngine.register(ArrowToParquetEncodingHandler(), default_format_for_type=True)
|
|
60
|
+
DataFrameTransformerEngine.register(ParquetToArrowDecodingHandler(), default_format_for_type=True)
|
|
61
|
+
DataFrameTransformerEngine.register_renderer(pa.Table, ArrowRenderer())
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@functools.lru_cache(maxsize=None)
|
|
65
|
+
def register_bigquery_handlers():
|
|
66
|
+
try:
|
|
67
|
+
from .bigquery import (
|
|
68
|
+
ArrowToBQEncodingHandlers,
|
|
69
|
+
BQToArrowDecodingHandler,
|
|
70
|
+
BQToPandasDecodingHandler,
|
|
71
|
+
PandasToBQEncodingHandlers,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
DataFrameTransformerEngine.register(PandasToBQEncodingHandlers())
|
|
75
|
+
DataFrameTransformerEngine.register(BQToPandasDecodingHandler())
|
|
76
|
+
DataFrameTransformerEngine.register(ArrowToBQEncodingHandlers())
|
|
77
|
+
DataFrameTransformerEngine.register(BQToArrowDecodingHandler())
|
|
78
|
+
except ImportError:
|
|
79
|
+
logger.info(
|
|
80
|
+
"We won't register bigquery handler for structured dataset because "
|
|
81
|
+
"we can't find the packages google-cloud-bigquery-storage and google-cloud-bigquery"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@functools.lru_cache(maxsize=None)
|
|
86
|
+
def register_snowflake_handlers():
|
|
87
|
+
try:
|
|
88
|
+
from .snowflake import PandasToSnowflakeEncodingHandlers, SnowflakeToPandasDecodingHandler
|
|
89
|
+
|
|
90
|
+
DataFrameTransformerEngine.register(SnowflakeToPandasDecodingHandler())
|
|
91
|
+
DataFrameTransformerEngine.register(PandasToSnowflakeEncodingHandlers())
|
|
92
|
+
|
|
93
|
+
except ImportError:
|
|
94
|
+
logger.info(
|
|
95
|
+
"We won't register snowflake handler for structured dataset because "
|
|
96
|
+
"we can't find package snowflake-connector-python"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def lazy_import_dataframe_handler():
|
|
101
|
+
if is_imported("pandas"):
|
|
102
|
+
try:
|
|
103
|
+
register_pandas_handlers()
|
|
104
|
+
register_csv_handlers()
|
|
105
|
+
except DuplicateHandlerError:
|
|
106
|
+
logger.debug("Transformer for pandas is already registered.")
|
|
107
|
+
if is_imported("pyarrow"):
|
|
108
|
+
try:
|
|
109
|
+
register_arrow_handlers()
|
|
110
|
+
except DuplicateHandlerError:
|
|
111
|
+
logger.debug("Transformer for arrow is already registered.")
|
|
112
|
+
if is_imported("google.cloud.bigquery"):
|
|
113
|
+
try:
|
|
114
|
+
register_bigquery_handlers()
|
|
115
|
+
except DuplicateHandlerError:
|
|
116
|
+
logger.debug("Transformer for bigquery is already registered.")
|
|
117
|
+
if is_imported("snowflake.connector"):
|
|
118
|
+
try:
|
|
119
|
+
register_snowflake_handlers()
|
|
120
|
+
except DuplicateHandlerError:
|
|
121
|
+
logger.debug("Transformer for snowflake is already registered.")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
__all__ = [
|
|
125
|
+
"PARQUET",
|
|
126
|
+
"DataFrame",
|
|
127
|
+
"DataFrameDecoder",
|
|
128
|
+
"DataFrameEncoder",
|
|
129
|
+
"DataFrameTransformerEngine",
|
|
130
|
+
"lazy_import_dataframe_handler",
|
|
131
|
+
]
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import typing
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import TypeVar
|
|
5
|
+
|
|
6
|
+
from flyteidl2.core import literals_pb2, types_pb2
|
|
7
|
+
from fsspec.core import split_protocol, strip_protocol
|
|
8
|
+
|
|
9
|
+
import flyte.storage as storage
|
|
10
|
+
from flyte._logging import logger
|
|
11
|
+
from flyte._utils import lazy_module
|
|
12
|
+
from flyte.io._dataframe.dataframe import (
|
|
13
|
+
CSV,
|
|
14
|
+
PARQUET,
|
|
15
|
+
DataFrame,
|
|
16
|
+
DataFrameDecoder,
|
|
17
|
+
DataFrameEncoder,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
if typing.TYPE_CHECKING:
|
|
21
|
+
import pandas as pd
|
|
22
|
+
import pyarrow as pa
|
|
23
|
+
else:
|
|
24
|
+
pd = lazy_module("pandas")
|
|
25
|
+
pa = lazy_module("pyarrow")
|
|
26
|
+
|
|
27
|
+
T = TypeVar("T")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_pandas_storage_options(uri: str, anonymous: bool = False) -> typing.Optional[typing.Dict]:
|
|
31
|
+
from pandas.io.common import is_fsspec_url # type: ignore
|
|
32
|
+
|
|
33
|
+
if is_fsspec_url(uri):
|
|
34
|
+
if uri.startswith("s3"):
|
|
35
|
+
return storage.get_configured_fsspec_kwargs("s3", anonymous=anonymous)
|
|
36
|
+
return {}
|
|
37
|
+
|
|
38
|
+
# Pandas does not allow storage_options for non-fsspec paths e.g. local.
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class PandasToCSVEncodingHandler(DataFrameEncoder):
|
|
43
|
+
def __init__(self):
|
|
44
|
+
super().__init__(pd.DataFrame, None, CSV)
|
|
45
|
+
|
|
46
|
+
async def encode(
|
|
47
|
+
self,
|
|
48
|
+
dataframe: DataFrame,
|
|
49
|
+
structured_dataset_type: types_pb2.StructuredDatasetType,
|
|
50
|
+
) -> literals_pb2.StructuredDataset:
|
|
51
|
+
if not dataframe.uri:
|
|
52
|
+
from flyte._context import internal_ctx
|
|
53
|
+
|
|
54
|
+
ctx = internal_ctx()
|
|
55
|
+
uri = ctx.raw_data.get_random_remote_path()
|
|
56
|
+
else:
|
|
57
|
+
uri = typing.cast(str, dataframe.uri)
|
|
58
|
+
|
|
59
|
+
if not storage.is_remote(uri):
|
|
60
|
+
Path(uri).mkdir(parents=True, exist_ok=True)
|
|
61
|
+
csv_file = storage.join(uri, "data.csv")
|
|
62
|
+
df = typing.cast(pd.DataFrame, dataframe.val)
|
|
63
|
+
df.to_csv(
|
|
64
|
+
csv_file,
|
|
65
|
+
index=False,
|
|
66
|
+
storage_options=get_pandas_storage_options(uri=csv_file),
|
|
67
|
+
)
|
|
68
|
+
structured_dataset_type.format = CSV
|
|
69
|
+
return literals_pb2.StructuredDataset(
|
|
70
|
+
uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type=structured_dataset_type)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class CSVToPandasDecodingHandler(DataFrameDecoder):
|
|
75
|
+
def __init__(self):
|
|
76
|
+
super().__init__(pd.DataFrame, None, CSV)
|
|
77
|
+
|
|
78
|
+
async def decode(
|
|
79
|
+
self,
|
|
80
|
+
proto_value: literals_pb2.StructuredDataset,
|
|
81
|
+
current_task_metadata: literals_pb2.StructuredDatasetMetadata,
|
|
82
|
+
) -> "pd.DataFrame":
|
|
83
|
+
uri = proto_value.uri
|
|
84
|
+
columns = None
|
|
85
|
+
kwargs = get_pandas_storage_options(uri=uri)
|
|
86
|
+
csv_file = storage.join(uri, "data.csv")
|
|
87
|
+
if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
|
|
88
|
+
columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
|
|
89
|
+
try:
|
|
90
|
+
import io
|
|
91
|
+
|
|
92
|
+
# The pattern used here is a bit wonky because of obstore issues with csv, getting early eof error.
|
|
93
|
+
buf = io.BytesIO()
|
|
94
|
+
async for chunk in storage.get_stream(csv_file):
|
|
95
|
+
buf.write(chunk)
|
|
96
|
+
buf.seek(0)
|
|
97
|
+
df = pd.read_csv(buf)
|
|
98
|
+
return df
|
|
99
|
+
|
|
100
|
+
except Exception as exc:
|
|
101
|
+
if exc.__class__.__name__ == "NoCredentialsError":
|
|
102
|
+
logger.debug("S3 source detected, attempting anonymous S3 access")
|
|
103
|
+
kwargs = get_pandas_storage_options(uri=uri, anonymous=True)
|
|
104
|
+
return pd.read_csv(csv_file, usecols=columns, storage_options=kwargs)
|
|
105
|
+
else:
|
|
106
|
+
raise
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class PandasToParquetEncodingHandler(DataFrameEncoder):
|
|
110
|
+
def __init__(self):
|
|
111
|
+
super().__init__(pd.DataFrame, None, PARQUET)
|
|
112
|
+
|
|
113
|
+
async def encode(
|
|
114
|
+
self,
|
|
115
|
+
dataframe: DataFrame,
|
|
116
|
+
structured_dataset_type: types_pb2.StructuredDatasetType,
|
|
117
|
+
) -> literals_pb2.StructuredDataset:
|
|
118
|
+
if not dataframe.uri:
|
|
119
|
+
from flyte._context import internal_ctx
|
|
120
|
+
|
|
121
|
+
ctx = internal_ctx()
|
|
122
|
+
uri = str(ctx.raw_data.get_random_remote_path())
|
|
123
|
+
else:
|
|
124
|
+
uri = typing.cast(str, dataframe.uri)
|
|
125
|
+
|
|
126
|
+
if not storage.is_remote(uri):
|
|
127
|
+
Path(uri).mkdir(parents=True, exist_ok=True)
|
|
128
|
+
path = os.path.join(uri, f"{0:05}")
|
|
129
|
+
df = typing.cast(pd.DataFrame, dataframe.val)
|
|
130
|
+
df.to_parquet(
|
|
131
|
+
path,
|
|
132
|
+
coerce_timestamps="us",
|
|
133
|
+
allow_truncated_timestamps=False,
|
|
134
|
+
storage_options=get_pandas_storage_options(uri=path),
|
|
135
|
+
)
|
|
136
|
+
structured_dataset_type.format = PARQUET
|
|
137
|
+
return literals_pb2.StructuredDataset(
|
|
138
|
+
uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type=structured_dataset_type)
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class ParquetToPandasDecodingHandler(DataFrameDecoder):
|
|
143
|
+
def __init__(self):
|
|
144
|
+
super().__init__(pd.DataFrame, None, PARQUET)
|
|
145
|
+
|
|
146
|
+
async def decode(
|
|
147
|
+
self,
|
|
148
|
+
flyte_value: literals_pb2.StructuredDataset,
|
|
149
|
+
current_task_metadata: literals_pb2.StructuredDatasetMetadata,
|
|
150
|
+
) -> "pd.DataFrame":
|
|
151
|
+
uri = flyte_value.uri
|
|
152
|
+
columns = None
|
|
153
|
+
kwargs = get_pandas_storage_options(uri=uri)
|
|
154
|
+
if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
|
|
155
|
+
columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
|
|
156
|
+
try:
|
|
157
|
+
return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
|
|
158
|
+
except Exception as exc:
|
|
159
|
+
if exc.__class__.__name__ == "NoCredentialsError":
|
|
160
|
+
logger.debug("S3 source detected, attempting anonymous S3 access")
|
|
161
|
+
kwargs = get_pandas_storage_options(uri=uri, anonymous=True)
|
|
162
|
+
return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
|
|
163
|
+
else:
|
|
164
|
+
raise
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class ArrowToParquetEncodingHandler(DataFrameEncoder):
|
|
168
|
+
def __init__(self):
|
|
169
|
+
super().__init__(pa.Table, None, PARQUET)
|
|
170
|
+
|
|
171
|
+
async def encode(
|
|
172
|
+
self,
|
|
173
|
+
dataframe: DataFrame,
|
|
174
|
+
dataframe_type: types_pb2.StructuredDatasetType,
|
|
175
|
+
) -> literals_pb2.StructuredDataset:
|
|
176
|
+
import pyarrow.parquet as pq
|
|
177
|
+
|
|
178
|
+
if not dataframe.uri:
|
|
179
|
+
from flyte._context import internal_ctx
|
|
180
|
+
|
|
181
|
+
ctx = internal_ctx()
|
|
182
|
+
uri = ctx.raw_data.get_random_remote_path()
|
|
183
|
+
else:
|
|
184
|
+
uri = typing.cast(str, dataframe.uri)
|
|
185
|
+
|
|
186
|
+
if not storage.is_remote(uri):
|
|
187
|
+
Path(uri).mkdir(parents=True, exist_ok=True)
|
|
188
|
+
path = os.path.join(uri, f"{0:05}")
|
|
189
|
+
filesystem = storage.get_underlying_filesystem(path=path)
|
|
190
|
+
pq.write_table(dataframe.val, strip_protocol(path), filesystem=filesystem)
|
|
191
|
+
return literals_pb2.StructuredDataset(uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(dataframe_type))
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class ParquetToArrowDecodingHandler(DataFrameDecoder):
|
|
195
|
+
def __init__(self):
|
|
196
|
+
super().__init__(pa.Table, None, PARQUET)
|
|
197
|
+
|
|
198
|
+
async def decode(
|
|
199
|
+
self,
|
|
200
|
+
proto_value: literals_pb2.StructuredDataset,
|
|
201
|
+
current_task_metadata: literals_pb2.StructuredDatasetMetadata,
|
|
202
|
+
) -> "pa.Table":
|
|
203
|
+
import pyarrow.parquet as pq
|
|
204
|
+
|
|
205
|
+
uri = proto_value.uri
|
|
206
|
+
if not storage.is_remote(uri):
|
|
207
|
+
Path(uri).parent.mkdir(parents=True, exist_ok=True)
|
|
208
|
+
_, path = split_protocol(uri)
|
|
209
|
+
|
|
210
|
+
columns = None
|
|
211
|
+
if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
|
|
212
|
+
columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
|
|
213
|
+
try:
|
|
214
|
+
return pq.read_table(path, columns=columns)
|
|
215
|
+
except Exception as exc:
|
|
216
|
+
if exc.__class__.__name__ == "NoCredentialsError":
|
|
217
|
+
logger.debug("S3 source detected, attempting anonymous S3 access")
|
|
218
|
+
fs = storage.get_underlying_filesystem(path=uri, anonymous=True)
|
|
219
|
+
if fs is not None:
|
|
220
|
+
return pq.read_table(path, filesystem=fs, columns=columns)
|
|
221
|
+
return None
|
|
222
|
+
else:
|
|
223
|
+
raise
|