flyte 0.0.1b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +62 -0
- flyte/_api_commons.py +3 -0
- flyte/_bin/__init__.py +0 -0
- flyte/_bin/runtime.py +126 -0
- flyte/_build.py +25 -0
- flyte/_cache/__init__.py +12 -0
- flyte/_cache/cache.py +146 -0
- flyte/_cache/defaults.py +9 -0
- flyte/_cache/policy_function_body.py +42 -0
- flyte/_cli/__init__.py +0 -0
- flyte/_cli/_common.py +287 -0
- flyte/_cli/_create.py +42 -0
- flyte/_cli/_delete.py +23 -0
- flyte/_cli/_deploy.py +140 -0
- flyte/_cli/_get.py +235 -0
- flyte/_cli/_run.py +152 -0
- flyte/_cli/main.py +72 -0
- flyte/_code_bundle/__init__.py +8 -0
- flyte/_code_bundle/_ignore.py +113 -0
- flyte/_code_bundle/_packaging.py +187 -0
- flyte/_code_bundle/_utils.py +339 -0
- flyte/_code_bundle/bundle.py +178 -0
- flyte/_context.py +146 -0
- flyte/_datastructures.py +342 -0
- flyte/_deploy.py +202 -0
- flyte/_doc.py +29 -0
- flyte/_docstring.py +32 -0
- flyte/_environment.py +43 -0
- flyte/_group.py +31 -0
- flyte/_hash.py +23 -0
- flyte/_image.py +760 -0
- flyte/_initialize.py +634 -0
- flyte/_interface.py +84 -0
- flyte/_internal/__init__.py +3 -0
- flyte/_internal/controllers/__init__.py +115 -0
- flyte/_internal/controllers/_local_controller.py +118 -0
- flyte/_internal/controllers/_trace.py +40 -0
- flyte/_internal/controllers/pbhash.py +39 -0
- flyte/_internal/controllers/remote/__init__.py +40 -0
- flyte/_internal/controllers/remote/_action.py +141 -0
- flyte/_internal/controllers/remote/_client.py +43 -0
- flyte/_internal/controllers/remote/_controller.py +361 -0
- flyte/_internal/controllers/remote/_core.py +402 -0
- flyte/_internal/controllers/remote/_informer.py +361 -0
- flyte/_internal/controllers/remote/_service_protocol.py +50 -0
- flyte/_internal/imagebuild/__init__.py +11 -0
- flyte/_internal/imagebuild/docker_builder.py +416 -0
- flyte/_internal/imagebuild/image_builder.py +241 -0
- flyte/_internal/imagebuild/remote_builder.py +0 -0
- flyte/_internal/resolvers/__init__.py +0 -0
- flyte/_internal/resolvers/_task_module.py +54 -0
- flyte/_internal/resolvers/common.py +31 -0
- flyte/_internal/resolvers/default.py +28 -0
- flyte/_internal/runtime/__init__.py +0 -0
- flyte/_internal/runtime/convert.py +199 -0
- flyte/_internal/runtime/entrypoints.py +135 -0
- flyte/_internal/runtime/io.py +136 -0
- flyte/_internal/runtime/resources_serde.py +138 -0
- flyte/_internal/runtime/task_serde.py +210 -0
- flyte/_internal/runtime/taskrunner.py +190 -0
- flyte/_internal/runtime/types_serde.py +54 -0
- flyte/_logging.py +124 -0
- flyte/_protos/__init__.py +0 -0
- flyte/_protos/common/authorization_pb2.py +66 -0
- flyte/_protos/common/authorization_pb2.pyi +108 -0
- flyte/_protos/common/authorization_pb2_grpc.py +4 -0
- flyte/_protos/common/identifier_pb2.py +71 -0
- flyte/_protos/common/identifier_pb2.pyi +82 -0
- flyte/_protos/common/identifier_pb2_grpc.py +4 -0
- flyte/_protos/common/identity_pb2.py +48 -0
- flyte/_protos/common/identity_pb2.pyi +72 -0
- flyte/_protos/common/identity_pb2_grpc.py +4 -0
- flyte/_protos/common/list_pb2.py +36 -0
- flyte/_protos/common/list_pb2.pyi +69 -0
- flyte/_protos/common/list_pb2_grpc.py +4 -0
- flyte/_protos/common/policy_pb2.py +37 -0
- flyte/_protos/common/policy_pb2.pyi +27 -0
- flyte/_protos/common/policy_pb2_grpc.py +4 -0
- flyte/_protos/common/role_pb2.py +37 -0
- flyte/_protos/common/role_pb2.pyi +53 -0
- flyte/_protos/common/role_pb2_grpc.py +4 -0
- flyte/_protos/common/runtime_version_pb2.py +28 -0
- flyte/_protos/common/runtime_version_pb2.pyi +24 -0
- flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
- flyte/_protos/logs/dataplane/payload_pb2.py +96 -0
- flyte/_protos/logs/dataplane/payload_pb2.pyi +168 -0
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/definition_pb2.py +49 -0
- flyte/_protos/secret/definition_pb2.pyi +93 -0
- flyte/_protos/secret/definition_pb2_grpc.py +4 -0
- flyte/_protos/secret/payload_pb2.py +62 -0
- flyte/_protos/secret/payload_pb2.pyi +94 -0
- flyte/_protos/secret/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/secret_pb2.py +38 -0
- flyte/_protos/secret/secret_pb2.pyi +6 -0
- flyte/_protos/secret/secret_pb2_grpc.py +198 -0
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
- flyte/_protos/validate/validate/validate_pb2.py +76 -0
- flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
- flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- flyte/_protos/workflow/queue_service_pb2.py +106 -0
- flyte/_protos/workflow/queue_service_pb2.pyi +141 -0
- flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- flyte/_protos/workflow/run_definition_pb2.py +128 -0
- flyte/_protos/workflow/run_definition_pb2.pyi +310 -0
- flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
- flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- flyte/_protos/workflow/run_service_pb2.py +133 -0
- flyte/_protos/workflow/run_service_pb2.pyi +175 -0
- flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
- flyte/_protos/workflow/state_service_pb2.py +58 -0
- flyte/_protos/workflow/state_service_pb2.pyi +71 -0
- flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
- flyte/_protos/workflow/task_definition_pb2.py +72 -0
- flyte/_protos/workflow/task_definition_pb2.pyi +65 -0
- flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/task_service_pb2.py +44 -0
- flyte/_protos/workflow/task_service_pb2.pyi +31 -0
- flyte/_protos/workflow/task_service_pb2_grpc.py +104 -0
- flyte/_resources.py +226 -0
- flyte/_retry.py +32 -0
- flyte/_reusable_environment.py +25 -0
- flyte/_run.py +411 -0
- flyte/_secret.py +61 -0
- flyte/_task.py +367 -0
- flyte/_task_environment.py +200 -0
- flyte/_timeout.py +47 -0
- flyte/_tools.py +27 -0
- flyte/_trace.py +128 -0
- flyte/_utils/__init__.py +20 -0
- flyte/_utils/asyn.py +119 -0
- flyte/_utils/coro_management.py +25 -0
- flyte/_utils/file_handling.py +72 -0
- flyte/_utils/helpers.py +108 -0
- flyte/_utils/lazy_module.py +54 -0
- flyte/_utils/uv_script_parser.py +49 -0
- flyte/_version.py +21 -0
- flyte/connectors/__init__.py +0 -0
- flyte/errors.py +143 -0
- flyte/extras/__init__.py +5 -0
- flyte/extras/_container.py +273 -0
- flyte/io/__init__.py +11 -0
- flyte/io/_dataframe.py +0 -0
- flyte/io/_dir.py +448 -0
- flyte/io/_file.py +468 -0
- flyte/io/pickle/__init__.py +0 -0
- flyte/io/pickle/transformer.py +117 -0
- flyte/io/structured_dataset/__init__.py +129 -0
- flyte/io/structured_dataset/basic_dfs.py +219 -0
- flyte/io/structured_dataset/structured_dataset.py +1061 -0
- flyte/py.typed +0 -0
- flyte/remote/__init__.py +25 -0
- flyte/remote/_client/__init__.py +0 -0
- flyte/remote/_client/_protocols.py +131 -0
- flyte/remote/_client/auth/__init__.py +12 -0
- flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
- flyte/remote/_client/auth/_authenticators/base.py +397 -0
- flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
- flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
- flyte/remote/_client/auth/_authenticators/factory.py +200 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
- flyte/remote/_client/auth/_channel.py +184 -0
- flyte/remote/_client/auth/_client_config.py +83 -0
- flyte/remote/_client/auth/_default_html.py +32 -0
- flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
- flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
- flyte/remote/_client/auth/_keyring.py +143 -0
- flyte/remote/_client/auth/_token_client.py +260 -0
- flyte/remote/_client/auth/errors.py +16 -0
- flyte/remote/_client/controlplane.py +95 -0
- flyte/remote/_console.py +18 -0
- flyte/remote/_data.py +155 -0
- flyte/remote/_logs.py +116 -0
- flyte/remote/_project.py +86 -0
- flyte/remote/_run.py +873 -0
- flyte/remote/_secret.py +132 -0
- flyte/remote/_task.py +227 -0
- flyte/report/__init__.py +3 -0
- flyte/report/_report.py +178 -0
- flyte/report/_template.html +124 -0
- flyte/storage/__init__.py +24 -0
- flyte/storage/_remote_fs.py +34 -0
- flyte/storage/_storage.py +251 -0
- flyte/storage/_utils.py +5 -0
- flyte/types/__init__.py +13 -0
- flyte/types/_interface.py +25 -0
- flyte/types/_renderer.py +162 -0
- flyte/types/_string_literals.py +120 -0
- flyte/types/_type_engine.py +2210 -0
- flyte/types/_utils.py +80 -0
- flyte-0.0.1b0.dist-info/METADATA +179 -0
- flyte-0.0.1b0.dist-info/RECORD +390 -0
- flyte-0.0.1b0.dist-info/WHEEL +5 -0
- flyte-0.0.1b0.dist-info/entry_points.txt +3 -0
- flyte-0.0.1b0.dist-info/top_level.txt +1 -0
- union/__init__.py +54 -0
- union/_api_commons.py +3 -0
- union/_bin/__init__.py +0 -0
- union/_bin/runtime.py +113 -0
- union/_build.py +25 -0
- union/_cache/__init__.py +12 -0
- union/_cache/cache.py +141 -0
- union/_cache/defaults.py +9 -0
- union/_cache/policy_function_body.py +42 -0
- union/_cli/__init__.py +0 -0
- union/_cli/_common.py +263 -0
- union/_cli/_create.py +40 -0
- union/_cli/_delete.py +23 -0
- union/_cli/_deploy.py +120 -0
- union/_cli/_get.py +162 -0
- union/_cli/_params.py +579 -0
- union/_cli/_run.py +150 -0
- union/_cli/main.py +72 -0
- union/_code_bundle/__init__.py +8 -0
- union/_code_bundle/_ignore.py +113 -0
- union/_code_bundle/_packaging.py +187 -0
- union/_code_bundle/_utils.py +342 -0
- union/_code_bundle/bundle.py +176 -0
- union/_context.py +146 -0
- union/_datastructures.py +295 -0
- union/_deploy.py +185 -0
- union/_doc.py +29 -0
- union/_docstring.py +26 -0
- union/_environment.py +43 -0
- union/_group.py +31 -0
- union/_hash.py +23 -0
- union/_image.py +760 -0
- union/_initialize.py +585 -0
- union/_interface.py +84 -0
- union/_internal/__init__.py +3 -0
- union/_internal/controllers/__init__.py +77 -0
- union/_internal/controllers/_local_controller.py +77 -0
- union/_internal/controllers/pbhash.py +39 -0
- union/_internal/controllers/remote/__init__.py +40 -0
- union/_internal/controllers/remote/_action.py +131 -0
- union/_internal/controllers/remote/_client.py +43 -0
- union/_internal/controllers/remote/_controller.py +169 -0
- union/_internal/controllers/remote/_core.py +341 -0
- union/_internal/controllers/remote/_informer.py +260 -0
- union/_internal/controllers/remote/_service_protocol.py +44 -0
- union/_internal/imagebuild/__init__.py +11 -0
- union/_internal/imagebuild/docker_builder.py +416 -0
- union/_internal/imagebuild/image_builder.py +243 -0
- union/_internal/imagebuild/remote_builder.py +0 -0
- union/_internal/resolvers/__init__.py +0 -0
- union/_internal/resolvers/_task_module.py +31 -0
- union/_internal/resolvers/common.py +24 -0
- union/_internal/resolvers/default.py +27 -0
- union/_internal/runtime/__init__.py +0 -0
- union/_internal/runtime/convert.py +163 -0
- union/_internal/runtime/entrypoints.py +121 -0
- union/_internal/runtime/io.py +136 -0
- union/_internal/runtime/resources_serde.py +134 -0
- union/_internal/runtime/task_serde.py +202 -0
- union/_internal/runtime/taskrunner.py +179 -0
- union/_internal/runtime/types_serde.py +53 -0
- union/_logging.py +124 -0
- union/_protos/__init__.py +0 -0
- union/_protos/common/authorization_pb2.py +66 -0
- union/_protos/common/authorization_pb2.pyi +106 -0
- union/_protos/common/authorization_pb2_grpc.py +4 -0
- union/_protos/common/identifier_pb2.py +71 -0
- union/_protos/common/identifier_pb2.pyi +82 -0
- union/_protos/common/identifier_pb2_grpc.py +4 -0
- union/_protos/common/identity_pb2.py +48 -0
- union/_protos/common/identity_pb2.pyi +72 -0
- union/_protos/common/identity_pb2_grpc.py +4 -0
- union/_protos/common/list_pb2.py +36 -0
- union/_protos/common/list_pb2.pyi +69 -0
- union/_protos/common/list_pb2_grpc.py +4 -0
- union/_protos/common/policy_pb2.py +37 -0
- union/_protos/common/policy_pb2.pyi +27 -0
- union/_protos/common/policy_pb2_grpc.py +4 -0
- union/_protos/common/role_pb2.py +37 -0
- union/_protos/common/role_pb2.pyi +51 -0
- union/_protos/common/role_pb2_grpc.py +4 -0
- union/_protos/common/runtime_version_pb2.py +28 -0
- union/_protos/common/runtime_version_pb2.pyi +24 -0
- union/_protos/common/runtime_version_pb2_grpc.py +4 -0
- union/_protos/logs/dataplane/payload_pb2.py +96 -0
- union/_protos/logs/dataplane/payload_pb2.pyi +168 -0
- union/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- union/_protos/secret/definition_pb2.py +49 -0
- union/_protos/secret/definition_pb2.pyi +93 -0
- union/_protos/secret/definition_pb2_grpc.py +4 -0
- union/_protos/secret/payload_pb2.py +62 -0
- union/_protos/secret/payload_pb2.pyi +94 -0
- union/_protos/secret/payload_pb2_grpc.py +4 -0
- union/_protos/secret/secret_pb2.py +38 -0
- union/_protos/secret/secret_pb2.pyi +6 -0
- union/_protos/secret/secret_pb2_grpc.py +198 -0
- union/_protos/validate/validate/validate_pb2.py +76 -0
- union/_protos/workflow/node_execution_service_pb2.py +26 -0
- union/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- union/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- union/_protos/workflow/queue_service_pb2.py +75 -0
- union/_protos/workflow/queue_service_pb2.pyi +103 -0
- union/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- union/_protos/workflow/run_definition_pb2.py +100 -0
- union/_protos/workflow/run_definition_pb2.pyi +256 -0
- union/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- union/_protos/workflow/run_logs_service_pb2.py +41 -0
- union/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- union/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- union/_protos/workflow/run_service_pb2.py +133 -0
- union/_protos/workflow/run_service_pb2.pyi +173 -0
- union/_protos/workflow/run_service_pb2_grpc.py +412 -0
- union/_protos/workflow/state_service_pb2.py +58 -0
- union/_protos/workflow/state_service_pb2.pyi +69 -0
- union/_protos/workflow/state_service_pb2_grpc.py +138 -0
- union/_protos/workflow/task_definition_pb2.py +72 -0
- union/_protos/workflow/task_definition_pb2.pyi +65 -0
- union/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- union/_protos/workflow/task_service_pb2.py +44 -0
- union/_protos/workflow/task_service_pb2.pyi +31 -0
- union/_protos/workflow/task_service_pb2_grpc.py +104 -0
- union/_resources.py +226 -0
- union/_retry.py +32 -0
- union/_reusable_environment.py +25 -0
- union/_run.py +374 -0
- union/_secret.py +61 -0
- union/_task.py +354 -0
- union/_task_environment.py +186 -0
- union/_timeout.py +47 -0
- union/_tools.py +27 -0
- union/_utils/__init__.py +11 -0
- union/_utils/asyn.py +119 -0
- union/_utils/file_handling.py +71 -0
- union/_utils/helpers.py +46 -0
- union/_utils/lazy_module.py +54 -0
- union/_utils/uv_script_parser.py +49 -0
- union/_version.py +21 -0
- union/connectors/__init__.py +0 -0
- union/errors.py +128 -0
- union/extras/__init__.py +5 -0
- union/extras/_container.py +263 -0
- union/io/__init__.py +11 -0
- union/io/_dataframe.py +0 -0
- union/io/_dir.py +425 -0
- union/io/_file.py +418 -0
- union/io/pickle/__init__.py +0 -0
- union/io/pickle/transformer.py +117 -0
- union/io/structured_dataset/__init__.py +122 -0
- union/io/structured_dataset/basic_dfs.py +219 -0
- union/io/structured_dataset/structured_dataset.py +1057 -0
- union/py.typed +0 -0
- union/remote/__init__.py +23 -0
- union/remote/_client/__init__.py +0 -0
- union/remote/_client/_protocols.py +129 -0
- union/remote/_client/auth/__init__.py +12 -0
- union/remote/_client/auth/_authenticators/__init__.py +0 -0
- union/remote/_client/auth/_authenticators/base.py +391 -0
- union/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- union/remote/_client/auth/_authenticators/device_code.py +120 -0
- union/remote/_client/auth/_authenticators/external_command.py +77 -0
- union/remote/_client/auth/_authenticators/factory.py +200 -0
- union/remote/_client/auth/_authenticators/pkce.py +515 -0
- union/remote/_client/auth/_channel.py +184 -0
- union/remote/_client/auth/_client_config.py +83 -0
- union/remote/_client/auth/_default_html.py +32 -0
- union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- union/remote/_client/auth/_grpc_utils/auth_interceptor.py +204 -0
- union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +144 -0
- union/remote/_client/auth/_keyring.py +154 -0
- union/remote/_client/auth/_token_client.py +258 -0
- union/remote/_client/auth/errors.py +16 -0
- union/remote/_client/controlplane.py +86 -0
- union/remote/_data.py +149 -0
- union/remote/_logs.py +74 -0
- union/remote/_project.py +86 -0
- union/remote/_run.py +820 -0
- union/remote/_secret.py +132 -0
- union/remote/_task.py +193 -0
- union/report/__init__.py +3 -0
- union/report/_report.py +178 -0
- union/report/_template.html +124 -0
- union/storage/__init__.py +24 -0
- union/storage/_remote_fs.py +34 -0
- union/storage/_storage.py +247 -0
- union/storage/_utils.py +5 -0
- union/types/__init__.py +11 -0
- union/types/_renderer.py +162 -0
- union/types/_string_literals.py +120 -0
- union/types/_type_engine.py +2131 -0
- union/types/_utils.py +80 -0
flyte/io/_dir.py
ADDED
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import AsyncIterator, Dict, Generic, Iterator, List, Optional, Type, TypeVar, Union
|
|
6
|
+
|
|
7
|
+
from flyteidl.core import literals_pb2, types_pb2
|
|
8
|
+
from fsspec.asyn import AsyncFileSystem
|
|
9
|
+
from mashumaro.types import SerializableType
|
|
10
|
+
from pydantic import BaseModel, model_validator
|
|
11
|
+
|
|
12
|
+
import flyte.storage as storage
|
|
13
|
+
from flyte.io._file import File
|
|
14
|
+
from flyte.types import TypeEngine, TypeTransformer, TypeTransformerFailedError
|
|
15
|
+
|
|
16
|
+
# Type variable for the directory format
|
|
17
|
+
T = TypeVar("T")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Dir(BaseModel, Generic[T], SerializableType):
|
|
21
|
+
"""
|
|
22
|
+
A generic directory class representing a directory with files of a specified format.
|
|
23
|
+
Provides both async and sync interfaces for directory operations.
|
|
24
|
+
Users are responsible for handling all I/O - the type transformer for Dir does not do any automatic uploading
|
|
25
|
+
or downloading of files.
|
|
26
|
+
|
|
27
|
+
The generic type T represents the format of the files in the directory.
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
```python
|
|
31
|
+
# Async usage
|
|
32
|
+
from pandas import DataFrame
|
|
33
|
+
data_dir = Dir[DataFrame](path="s3://my-bucket/data/")
|
|
34
|
+
|
|
35
|
+
# Walk through files
|
|
36
|
+
async for file in data_dir.walk():
|
|
37
|
+
async with file.open() as f:
|
|
38
|
+
content = await f.read()
|
|
39
|
+
|
|
40
|
+
# Sync alternative
|
|
41
|
+
for file in data_dir.walk_sync():
|
|
42
|
+
with file.open_sync() as f:
|
|
43
|
+
content = f.read()
|
|
44
|
+
```
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
# Represents either a local or remote path.
|
|
48
|
+
path: str
|
|
49
|
+
name: Optional[str] = None
|
|
50
|
+
format: str = ""
|
|
51
|
+
|
|
52
|
+
class Config:
|
|
53
|
+
arbitrary_types_allowed = True
|
|
54
|
+
|
|
55
|
+
@model_validator(mode="before")
|
|
56
|
+
@classmethod
|
|
57
|
+
def pre_init(cls, data):
|
|
58
|
+
if data.get("name") is None:
|
|
59
|
+
data["name"] = Path(data["path"]).name
|
|
60
|
+
return data
|
|
61
|
+
|
|
62
|
+
def _serialize(self) -> Dict[str, Optional[str]]:
|
|
63
|
+
pyd_dump = self.model_dump()
|
|
64
|
+
return pyd_dump
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def _deserialize(cls, file_dump: Dict[str, Optional[str]]) -> Dir:
|
|
68
|
+
return cls.model_validate(file_dump)
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def schema_match(cls, incoming: dict):
|
|
72
|
+
this_schema = cls.model_json_schema()
|
|
73
|
+
current_required = this_schema.get("required")
|
|
74
|
+
incoming_required = incoming.get("required")
|
|
75
|
+
if (
|
|
76
|
+
current_required
|
|
77
|
+
and incoming_required
|
|
78
|
+
and incoming.get("type") == this_schema.get("type")
|
|
79
|
+
and incoming.get("title") == this_schema.get("title")
|
|
80
|
+
and set(current_required) == set(incoming_required)
|
|
81
|
+
):
|
|
82
|
+
return True
|
|
83
|
+
|
|
84
|
+
async def walk(self, recursive: bool = True, max_depth: Optional[int] = None) -> AsyncIterator[File[T]]:
|
|
85
|
+
"""
|
|
86
|
+
Asynchronously walk through the directory and yield File objects.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
recursive: If True, recursively walk subdirectories
|
|
90
|
+
max_depth: Maximum depth for recursive walking
|
|
91
|
+
|
|
92
|
+
Yields:
|
|
93
|
+
File objects for each file found in the directory
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
```python
|
|
97
|
+
async for file in directory.walk():
|
|
98
|
+
local_path = await file.download()
|
|
99
|
+
# Process the file
|
|
100
|
+
```
|
|
101
|
+
"""
|
|
102
|
+
fs = storage.get_underlying_filesystem(path=self.path)
|
|
103
|
+
if recursive is False:
|
|
104
|
+
max_depth = 2
|
|
105
|
+
|
|
106
|
+
# Note if the path is actually just a file, no walking is done.
|
|
107
|
+
if isinstance(fs, AsyncFileSystem):
|
|
108
|
+
async for parent, _, files in fs._walk(self.path, maxdepth=max_depth):
|
|
109
|
+
for file in files:
|
|
110
|
+
full_file = fs.unstrip_protocol(parent + fs.sep + file)
|
|
111
|
+
yield File[T](path=full_file)
|
|
112
|
+
else:
|
|
113
|
+
for parent, _, files in fs.walk(self.path, maxdepth=max_depth):
|
|
114
|
+
for file in files:
|
|
115
|
+
if "file" in fs.protocol:
|
|
116
|
+
full_file = os.path.join(parent, file)
|
|
117
|
+
else:
|
|
118
|
+
full_file = fs.unstrip_protocol(parent + fs.sep + file)
|
|
119
|
+
yield File[T](path=full_file)
|
|
120
|
+
|
|
121
|
+
def walk_sync(
|
|
122
|
+
self, recursive: bool = True, file_pattern: str = "*", max_depth: Optional[int] = None
|
|
123
|
+
) -> Iterator[File[T]]:
|
|
124
|
+
"""
|
|
125
|
+
Synchronously walk through the directory and yield File objects.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
recursive: If True, recursively walk subdirectories
|
|
129
|
+
file_pattern: Glob pattern to filter files
|
|
130
|
+
max_depth: Maximum depth for recursive walking
|
|
131
|
+
|
|
132
|
+
Yields:
|
|
133
|
+
File objects for each file found in the directory
|
|
134
|
+
|
|
135
|
+
Example:
|
|
136
|
+
```python
|
|
137
|
+
for file in directory.walk_sync():
|
|
138
|
+
local_path = file.download_sync()
|
|
139
|
+
# Process the file
|
|
140
|
+
```
|
|
141
|
+
"""
|
|
142
|
+
fs = storage.get_underlying_filesystem(path=self.path)
|
|
143
|
+
for parent, _, files in fs.walk(self.path, maxdepth=max_depth):
|
|
144
|
+
for file in files:
|
|
145
|
+
if "file" in fs.protocol:
|
|
146
|
+
full_file = os.path.join(parent, file)
|
|
147
|
+
else:
|
|
148
|
+
full_file = fs.unstrip_protocol(parent + fs.sep + file)
|
|
149
|
+
yield File[T](path=full_file)
|
|
150
|
+
|
|
151
|
+
async def list_files(self) -> List[File[T]]:
|
|
152
|
+
"""
|
|
153
|
+
Asynchronously get a list of all files in the directory (non-recursive).
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
A list of File objects
|
|
157
|
+
|
|
158
|
+
Example:
|
|
159
|
+
```python
|
|
160
|
+
files = await directory.list_files()
|
|
161
|
+
for file in files:
|
|
162
|
+
# Process the file
|
|
163
|
+
```
|
|
164
|
+
"""
|
|
165
|
+
# todo: this should probably also just defer to fsspec.find()
|
|
166
|
+
files = []
|
|
167
|
+
async for file in self.walk(recursive=False):
|
|
168
|
+
files.append(file)
|
|
169
|
+
return files
|
|
170
|
+
|
|
171
|
+
def list_files_sync(self) -> List[File[T]]:
|
|
172
|
+
"""
|
|
173
|
+
Synchronously get a list of all files in the directory (non-recursive).
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
A list of File objects
|
|
177
|
+
|
|
178
|
+
Example:
|
|
179
|
+
```python
|
|
180
|
+
files = directory.list_files_sync()
|
|
181
|
+
for file in files:
|
|
182
|
+
# Process the file
|
|
183
|
+
```
|
|
184
|
+
"""
|
|
185
|
+
return list(self.walk_sync(recursive=False))
|
|
186
|
+
|
|
187
|
+
async def download(self, local_path: Optional[Union[str, Path]] = None) -> str:
|
|
188
|
+
"""
|
|
189
|
+
Asynchronously download the entire directory to a local path.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
local_path: The local path to download the directory to. If None, a temporary
|
|
193
|
+
directory will be used.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
The path to the downloaded directory
|
|
197
|
+
|
|
198
|
+
Example:
|
|
199
|
+
```python
|
|
200
|
+
local_dir = await directory.download('/tmp/my_data/')
|
|
201
|
+
```
|
|
202
|
+
"""
|
|
203
|
+
local_dest = str(local_path) if local_path else str(storage.get_random_local_path())
|
|
204
|
+
if not storage.is_remote(self.path):
|
|
205
|
+
if not local_path or local_path == self.path:
|
|
206
|
+
# Skip copying
|
|
207
|
+
return self.path
|
|
208
|
+
else:
|
|
209
|
+
# Shell out to a thread to copy
|
|
210
|
+
import asyncio
|
|
211
|
+
import shutil
|
|
212
|
+
|
|
213
|
+
async def copy_tree():
|
|
214
|
+
loop = asyncio.get_event_loop()
|
|
215
|
+
await loop.run_in_executor(None, lambda: shutil.copytree(self.path, local_dest, dirs_exist_ok=True))
|
|
216
|
+
|
|
217
|
+
await copy_tree()
|
|
218
|
+
return await storage.get(self.path, local_dest, recursive=True)
|
|
219
|
+
|
|
220
|
+
def download_sync(self, local_path: Optional[Union[str, Path]] = None) -> str:
|
|
221
|
+
"""
|
|
222
|
+
Synchronously download the entire directory to a local path.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
local_path: The local path to download the directory to. If None, a temporary
|
|
226
|
+
directory will be used.
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
The path to the downloaded directory
|
|
230
|
+
|
|
231
|
+
Example:
|
|
232
|
+
```python
|
|
233
|
+
local_dir = directory.download_sync('/tmp/my_data/')
|
|
234
|
+
```
|
|
235
|
+
"""
|
|
236
|
+
local_dest = str(local_path) if local_path else str(storage.get_random_local_path())
|
|
237
|
+
if not storage.is_remote(self.path):
|
|
238
|
+
if not local_path or local_path == self.path:
|
|
239
|
+
# Skip copying
|
|
240
|
+
return self.path
|
|
241
|
+
else:
|
|
242
|
+
# Shell out to a thread to copy
|
|
243
|
+
import shutil
|
|
244
|
+
|
|
245
|
+
shutil.copytree(self.path, local_dest, dirs_exist_ok=True)
|
|
246
|
+
|
|
247
|
+
# Figure this out when we figure out the final synchronicity story
|
|
248
|
+
raise NotImplementedError("Sync download is not implemented for remote paths")
|
|
249
|
+
|
|
250
|
+
@classmethod
|
|
251
|
+
async def from_local(cls, local_path: Union[str, Path], remote_path: Optional[str] = None) -> Dir[T]:
|
|
252
|
+
"""
|
|
253
|
+
Asynchronously create a new Dir by uploading a local directory to the configured remote store.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
local_path: Path to the local directory
|
|
257
|
+
remote_path: Optional path to store the directory remotely. If None, a path will be generated.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
A new Dir instance pointing to the uploaded directory
|
|
261
|
+
|
|
262
|
+
Example:
|
|
263
|
+
```python
|
|
264
|
+
remote_dir = await Dir[DataFrame].from_local('/tmp/data_dir/', 's3://bucket/data/')
|
|
265
|
+
```
|
|
266
|
+
"""
|
|
267
|
+
local_path_str = str(local_path)
|
|
268
|
+
dirname = os.path.basename(os.path.normpath(local_path_str))
|
|
269
|
+
|
|
270
|
+
output_path = await storage.put(from_path=local_path_str, to_path=remote_path, recursive=True)
|
|
271
|
+
return cls(path=output_path, name=dirname)
|
|
272
|
+
|
|
273
|
+
@classmethod
|
|
274
|
+
def from_local_sync(cls, local_path: Union[str, Path], remote_path: Optional[str] = None) -> Dir[T]:
|
|
275
|
+
"""
|
|
276
|
+
Synchronously create a new Dir by uploading a local directory to the configured remote store.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
local_path: Path to the local directory
|
|
280
|
+
remote_path: Optional path to store the directory remotely. If None, a path will be generated.
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
A new Dir instance pointing to the uploaded directory
|
|
284
|
+
|
|
285
|
+
Example:
|
|
286
|
+
```python
|
|
287
|
+
remote_dir = Dir[DataFrame].from_local_sync('/tmp/data_dir/', 's3://bucket/data/')
|
|
288
|
+
```
|
|
289
|
+
"""
|
|
290
|
+
# Implement this after we figure out the final synchronicity story
|
|
291
|
+
raise NotImplementedError("Sync upload is not implemented for remote paths")
|
|
292
|
+
|
|
293
|
+
async def exists(self) -> bool:
|
|
294
|
+
"""
|
|
295
|
+
Asynchronously check if the directory exists.
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
True if the directory exists, False otherwise
|
|
299
|
+
|
|
300
|
+
Example:
|
|
301
|
+
```python
|
|
302
|
+
if await directory.exists():
|
|
303
|
+
# Process the directory
|
|
304
|
+
```
|
|
305
|
+
"""
|
|
306
|
+
fs = storage.get_underlying_filesystem(path=self.path)
|
|
307
|
+
if isinstance(fs, AsyncFileSystem):
|
|
308
|
+
return await fs._exists(self.path)
|
|
309
|
+
else:
|
|
310
|
+
return fs.exists(self.path)
|
|
311
|
+
|
|
312
|
+
def exists_sync(self) -> bool:
|
|
313
|
+
"""
|
|
314
|
+
Synchronously check if the directory exists.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
True if the directory exists, False otherwise
|
|
318
|
+
|
|
319
|
+
Example:
|
|
320
|
+
```python
|
|
321
|
+
if directory.exists_sync():
|
|
322
|
+
# Process the directory
|
|
323
|
+
```
|
|
324
|
+
"""
|
|
325
|
+
fs = storage.get_underlying_filesystem(path=self.path)
|
|
326
|
+
return fs.exists(self.path)
|
|
327
|
+
|
|
328
|
+
async def get_file(self, file_name: str) -> Optional[File[T]]:
|
|
329
|
+
"""
|
|
330
|
+
Asynchronously get a specific file from the directory.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
file_name: The name of the file to get
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
A File instance if the file exists, None otherwise
|
|
337
|
+
|
|
338
|
+
Example:
|
|
339
|
+
```python
|
|
340
|
+
file = await directory.get_file("data.csv")
|
|
341
|
+
if file:
|
|
342
|
+
# Process the file
|
|
343
|
+
```
|
|
344
|
+
"""
|
|
345
|
+
fs = storage.get_underlying_filesystem(path=self.path)
|
|
346
|
+
file_path = fs.sep.join([self.path, file_name])
|
|
347
|
+
file = File[T](path=file_path)
|
|
348
|
+
|
|
349
|
+
if fs.exists(file_path):
|
|
350
|
+
return file
|
|
351
|
+
return None
|
|
352
|
+
|
|
353
|
+
def get_file_sync(self, file_name: str) -> Optional[File[T]]:
|
|
354
|
+
"""
|
|
355
|
+
Synchronously get a specific file from the directory.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
file_name: The name of the file to get
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
A File instance if the file exists, None otherwise
|
|
362
|
+
|
|
363
|
+
Example:
|
|
364
|
+
```python
|
|
365
|
+
file = directory.get_file_sync("data.csv")
|
|
366
|
+
if file:
|
|
367
|
+
# Process the file
|
|
368
|
+
```
|
|
369
|
+
"""
|
|
370
|
+
file_path = os.path.join(self.path, file_name)
|
|
371
|
+
file = File[T](path=file_path)
|
|
372
|
+
|
|
373
|
+
if file.exists_sync():
|
|
374
|
+
return file
|
|
375
|
+
return None
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
class DirTransformer(TypeTransformer[Dir]):
|
|
379
|
+
"""
|
|
380
|
+
Transformer for Dir objects. This type transformer does not handle any i/o. That is now the responsibility of the
|
|
381
|
+
user.
|
|
382
|
+
"""
|
|
383
|
+
|
|
384
|
+
def __init__(self):
|
|
385
|
+
super().__init__(name="Dir", t=Dir)
|
|
386
|
+
|
|
387
|
+
def get_literal_type(self, t: Type[Dir]) -> types_pb2.LiteralType:
|
|
388
|
+
"""Get the Flyte literal type for a File type."""
|
|
389
|
+
return types_pb2.LiteralType(
|
|
390
|
+
blob=types_pb2.BlobType(
|
|
391
|
+
# todo: set format from generic
|
|
392
|
+
format="", # Format is determined by the generic type T
|
|
393
|
+
dimensionality=types_pb2.BlobType.BlobDimensionality.MULTIPART,
|
|
394
|
+
)
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
async def to_literal(
|
|
398
|
+
self,
|
|
399
|
+
python_val: Dir,
|
|
400
|
+
python_type: Type[Dir],
|
|
401
|
+
expected: types_pb2.LiteralType,
|
|
402
|
+
) -> literals_pb2.Literal:
|
|
403
|
+
"""Convert a Dir object to a Flyte literal."""
|
|
404
|
+
if not isinstance(python_val, Dir):
|
|
405
|
+
raise TypeTransformerFailedError(f"Expected Dir object, received {type(python_val)}")
|
|
406
|
+
|
|
407
|
+
return literals_pb2.Literal(
|
|
408
|
+
scalar=literals_pb2.Scalar(
|
|
409
|
+
blob=literals_pb2.Blob(
|
|
410
|
+
metadata=literals_pb2.BlobMetadata(
|
|
411
|
+
type=types_pb2.BlobType(
|
|
412
|
+
format=python_val.format, dimensionality=types_pb2.BlobType.BlobDimensionality.MULTIPART
|
|
413
|
+
)
|
|
414
|
+
),
|
|
415
|
+
uri=python_val.path,
|
|
416
|
+
)
|
|
417
|
+
)
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
async def to_python_value(
|
|
421
|
+
self,
|
|
422
|
+
lv: literals_pb2.Literal,
|
|
423
|
+
expected_python_type: Type[Dir],
|
|
424
|
+
) -> Dir:
|
|
425
|
+
"""Convert a Flyte literal to a File object."""
|
|
426
|
+
if not lv.scalar.HasField("blob"):
|
|
427
|
+
raise TypeTransformerFailedError(f"Expected blob literal, received {lv}")
|
|
428
|
+
if not lv.scalar.blob.metadata.type.dimensionality == types_pb2.BlobType.BlobDimensionality.MULTIPART:
|
|
429
|
+
raise TypeTransformerFailedError(
|
|
430
|
+
f"Expected multipart, received {lv.scalar.blob.metadata.type.dimensionality}"
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
uri = lv.scalar.blob.uri
|
|
434
|
+
filename = Path(uri).name
|
|
435
|
+
f: Dir = Dir(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format)
|
|
436
|
+
return f
|
|
437
|
+
|
|
438
|
+
def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[Dir]:
|
|
439
|
+
"""Guess the Python type from a Flyte literal type."""
|
|
440
|
+
if (
|
|
441
|
+
literal_type.HasField("blob")
|
|
442
|
+
and literal_type.blob.dimensionality == types_pb2.BlobType.BlobDimensionality.MULTIPART
|
|
443
|
+
):
|
|
444
|
+
return Dir
|
|
445
|
+
raise ValueError(f"Cannot guess python type from {literal_type}")
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
TypeEngine.register(DirTransformer())
|