flyte 0.0.1b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +62 -0
- flyte/_api_commons.py +3 -0
- flyte/_bin/__init__.py +0 -0
- flyte/_bin/runtime.py +126 -0
- flyte/_build.py +25 -0
- flyte/_cache/__init__.py +12 -0
- flyte/_cache/cache.py +146 -0
- flyte/_cache/defaults.py +9 -0
- flyte/_cache/policy_function_body.py +42 -0
- flyte/_cli/__init__.py +0 -0
- flyte/_cli/_common.py +287 -0
- flyte/_cli/_create.py +42 -0
- flyte/_cli/_delete.py +23 -0
- flyte/_cli/_deploy.py +140 -0
- flyte/_cli/_get.py +235 -0
- flyte/_cli/_run.py +152 -0
- flyte/_cli/main.py +72 -0
- flyte/_code_bundle/__init__.py +8 -0
- flyte/_code_bundle/_ignore.py +113 -0
- flyte/_code_bundle/_packaging.py +187 -0
- flyte/_code_bundle/_utils.py +339 -0
- flyte/_code_bundle/bundle.py +178 -0
- flyte/_context.py +146 -0
- flyte/_datastructures.py +342 -0
- flyte/_deploy.py +202 -0
- flyte/_doc.py +29 -0
- flyte/_docstring.py +32 -0
- flyte/_environment.py +43 -0
- flyte/_group.py +31 -0
- flyte/_hash.py +23 -0
- flyte/_image.py +760 -0
- flyte/_initialize.py +634 -0
- flyte/_interface.py +84 -0
- flyte/_internal/__init__.py +3 -0
- flyte/_internal/controllers/__init__.py +115 -0
- flyte/_internal/controllers/_local_controller.py +118 -0
- flyte/_internal/controllers/_trace.py +40 -0
- flyte/_internal/controllers/pbhash.py +39 -0
- flyte/_internal/controllers/remote/__init__.py +40 -0
- flyte/_internal/controllers/remote/_action.py +141 -0
- flyte/_internal/controllers/remote/_client.py +43 -0
- flyte/_internal/controllers/remote/_controller.py +361 -0
- flyte/_internal/controllers/remote/_core.py +402 -0
- flyte/_internal/controllers/remote/_informer.py +361 -0
- flyte/_internal/controllers/remote/_service_protocol.py +50 -0
- flyte/_internal/imagebuild/__init__.py +11 -0
- flyte/_internal/imagebuild/docker_builder.py +416 -0
- flyte/_internal/imagebuild/image_builder.py +241 -0
- flyte/_internal/imagebuild/remote_builder.py +0 -0
- flyte/_internal/resolvers/__init__.py +0 -0
- flyte/_internal/resolvers/_task_module.py +54 -0
- flyte/_internal/resolvers/common.py +31 -0
- flyte/_internal/resolvers/default.py +28 -0
- flyte/_internal/runtime/__init__.py +0 -0
- flyte/_internal/runtime/convert.py +199 -0
- flyte/_internal/runtime/entrypoints.py +135 -0
- flyte/_internal/runtime/io.py +136 -0
- flyte/_internal/runtime/resources_serde.py +138 -0
- flyte/_internal/runtime/task_serde.py +210 -0
- flyte/_internal/runtime/taskrunner.py +190 -0
- flyte/_internal/runtime/types_serde.py +54 -0
- flyte/_logging.py +124 -0
- flyte/_protos/__init__.py +0 -0
- flyte/_protos/common/authorization_pb2.py +66 -0
- flyte/_protos/common/authorization_pb2.pyi +108 -0
- flyte/_protos/common/authorization_pb2_grpc.py +4 -0
- flyte/_protos/common/identifier_pb2.py +71 -0
- flyte/_protos/common/identifier_pb2.pyi +82 -0
- flyte/_protos/common/identifier_pb2_grpc.py +4 -0
- flyte/_protos/common/identity_pb2.py +48 -0
- flyte/_protos/common/identity_pb2.pyi +72 -0
- flyte/_protos/common/identity_pb2_grpc.py +4 -0
- flyte/_protos/common/list_pb2.py +36 -0
- flyte/_protos/common/list_pb2.pyi +69 -0
- flyte/_protos/common/list_pb2_grpc.py +4 -0
- flyte/_protos/common/policy_pb2.py +37 -0
- flyte/_protos/common/policy_pb2.pyi +27 -0
- flyte/_protos/common/policy_pb2_grpc.py +4 -0
- flyte/_protos/common/role_pb2.py +37 -0
- flyte/_protos/common/role_pb2.pyi +53 -0
- flyte/_protos/common/role_pb2_grpc.py +4 -0
- flyte/_protos/common/runtime_version_pb2.py +28 -0
- flyte/_protos/common/runtime_version_pb2.pyi +24 -0
- flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
- flyte/_protos/logs/dataplane/payload_pb2.py +96 -0
- flyte/_protos/logs/dataplane/payload_pb2.pyi +168 -0
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/definition_pb2.py +49 -0
- flyte/_protos/secret/definition_pb2.pyi +93 -0
- flyte/_protos/secret/definition_pb2_grpc.py +4 -0
- flyte/_protos/secret/payload_pb2.py +62 -0
- flyte/_protos/secret/payload_pb2.pyi +94 -0
- flyte/_protos/secret/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/secret_pb2.py +38 -0
- flyte/_protos/secret/secret_pb2.pyi +6 -0
- flyte/_protos/secret/secret_pb2_grpc.py +198 -0
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
- flyte/_protos/validate/validate/validate_pb2.py +76 -0
- flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
- flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- flyte/_protos/workflow/queue_service_pb2.py +106 -0
- flyte/_protos/workflow/queue_service_pb2.pyi +141 -0
- flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- flyte/_protos/workflow/run_definition_pb2.py +128 -0
- flyte/_protos/workflow/run_definition_pb2.pyi +310 -0
- flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
- flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- flyte/_protos/workflow/run_service_pb2.py +133 -0
- flyte/_protos/workflow/run_service_pb2.pyi +175 -0
- flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
- flyte/_protos/workflow/state_service_pb2.py +58 -0
- flyte/_protos/workflow/state_service_pb2.pyi +71 -0
- flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
- flyte/_protos/workflow/task_definition_pb2.py +72 -0
- flyte/_protos/workflow/task_definition_pb2.pyi +65 -0
- flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/task_service_pb2.py +44 -0
- flyte/_protos/workflow/task_service_pb2.pyi +31 -0
- flyte/_protos/workflow/task_service_pb2_grpc.py +104 -0
- flyte/_resources.py +226 -0
- flyte/_retry.py +32 -0
- flyte/_reusable_environment.py +25 -0
- flyte/_run.py +411 -0
- flyte/_secret.py +61 -0
- flyte/_task.py +367 -0
- flyte/_task_environment.py +200 -0
- flyte/_timeout.py +47 -0
- flyte/_tools.py +27 -0
- flyte/_trace.py +128 -0
- flyte/_utils/__init__.py +20 -0
- flyte/_utils/asyn.py +119 -0
- flyte/_utils/coro_management.py +25 -0
- flyte/_utils/file_handling.py +72 -0
- flyte/_utils/helpers.py +108 -0
- flyte/_utils/lazy_module.py +54 -0
- flyte/_utils/uv_script_parser.py +49 -0
- flyte/_version.py +21 -0
- flyte/connectors/__init__.py +0 -0
- flyte/errors.py +143 -0
- flyte/extras/__init__.py +5 -0
- flyte/extras/_container.py +273 -0
- flyte/io/__init__.py +11 -0
- flyte/io/_dataframe.py +0 -0
- flyte/io/_dir.py +448 -0
- flyte/io/_file.py +468 -0
- flyte/io/pickle/__init__.py +0 -0
- flyte/io/pickle/transformer.py +117 -0
- flyte/io/structured_dataset/__init__.py +129 -0
- flyte/io/structured_dataset/basic_dfs.py +219 -0
- flyte/io/structured_dataset/structured_dataset.py +1061 -0
- flyte/py.typed +0 -0
- flyte/remote/__init__.py +25 -0
- flyte/remote/_client/__init__.py +0 -0
- flyte/remote/_client/_protocols.py +131 -0
- flyte/remote/_client/auth/__init__.py +12 -0
- flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
- flyte/remote/_client/auth/_authenticators/base.py +397 -0
- flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
- flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
- flyte/remote/_client/auth/_authenticators/factory.py +200 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
- flyte/remote/_client/auth/_channel.py +184 -0
- flyte/remote/_client/auth/_client_config.py +83 -0
- flyte/remote/_client/auth/_default_html.py +32 -0
- flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
- flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
- flyte/remote/_client/auth/_keyring.py +143 -0
- flyte/remote/_client/auth/_token_client.py +260 -0
- flyte/remote/_client/auth/errors.py +16 -0
- flyte/remote/_client/controlplane.py +95 -0
- flyte/remote/_console.py +18 -0
- flyte/remote/_data.py +155 -0
- flyte/remote/_logs.py +116 -0
- flyte/remote/_project.py +86 -0
- flyte/remote/_run.py +873 -0
- flyte/remote/_secret.py +132 -0
- flyte/remote/_task.py +227 -0
- flyte/report/__init__.py +3 -0
- flyte/report/_report.py +178 -0
- flyte/report/_template.html +124 -0
- flyte/storage/__init__.py +24 -0
- flyte/storage/_remote_fs.py +34 -0
- flyte/storage/_storage.py +251 -0
- flyte/storage/_utils.py +5 -0
- flyte/types/__init__.py +13 -0
- flyte/types/_interface.py +25 -0
- flyte/types/_renderer.py +162 -0
- flyte/types/_string_literals.py +120 -0
- flyte/types/_type_engine.py +2210 -0
- flyte/types/_utils.py +80 -0
- flyte-0.0.1b0.dist-info/METADATA +179 -0
- flyte-0.0.1b0.dist-info/RECORD +390 -0
- flyte-0.0.1b0.dist-info/WHEEL +5 -0
- flyte-0.0.1b0.dist-info/entry_points.txt +3 -0
- flyte-0.0.1b0.dist-info/top_level.txt +1 -0
- union/__init__.py +54 -0
- union/_api_commons.py +3 -0
- union/_bin/__init__.py +0 -0
- union/_bin/runtime.py +113 -0
- union/_build.py +25 -0
- union/_cache/__init__.py +12 -0
- union/_cache/cache.py +141 -0
- union/_cache/defaults.py +9 -0
- union/_cache/policy_function_body.py +42 -0
- union/_cli/__init__.py +0 -0
- union/_cli/_common.py +263 -0
- union/_cli/_create.py +40 -0
- union/_cli/_delete.py +23 -0
- union/_cli/_deploy.py +120 -0
- union/_cli/_get.py +162 -0
- union/_cli/_params.py +579 -0
- union/_cli/_run.py +150 -0
- union/_cli/main.py +72 -0
- union/_code_bundle/__init__.py +8 -0
- union/_code_bundle/_ignore.py +113 -0
- union/_code_bundle/_packaging.py +187 -0
- union/_code_bundle/_utils.py +342 -0
- union/_code_bundle/bundle.py +176 -0
- union/_context.py +146 -0
- union/_datastructures.py +295 -0
- union/_deploy.py +185 -0
- union/_doc.py +29 -0
- union/_docstring.py +26 -0
- union/_environment.py +43 -0
- union/_group.py +31 -0
- union/_hash.py +23 -0
- union/_image.py +760 -0
- union/_initialize.py +585 -0
- union/_interface.py +84 -0
- union/_internal/__init__.py +3 -0
- union/_internal/controllers/__init__.py +77 -0
- union/_internal/controllers/_local_controller.py +77 -0
- union/_internal/controllers/pbhash.py +39 -0
- union/_internal/controllers/remote/__init__.py +40 -0
- union/_internal/controllers/remote/_action.py +131 -0
- union/_internal/controllers/remote/_client.py +43 -0
- union/_internal/controllers/remote/_controller.py +169 -0
- union/_internal/controllers/remote/_core.py +341 -0
- union/_internal/controllers/remote/_informer.py +260 -0
- union/_internal/controllers/remote/_service_protocol.py +44 -0
- union/_internal/imagebuild/__init__.py +11 -0
- union/_internal/imagebuild/docker_builder.py +416 -0
- union/_internal/imagebuild/image_builder.py +243 -0
- union/_internal/imagebuild/remote_builder.py +0 -0
- union/_internal/resolvers/__init__.py +0 -0
- union/_internal/resolvers/_task_module.py +31 -0
- union/_internal/resolvers/common.py +24 -0
- union/_internal/resolvers/default.py +27 -0
- union/_internal/runtime/__init__.py +0 -0
- union/_internal/runtime/convert.py +163 -0
- union/_internal/runtime/entrypoints.py +121 -0
- union/_internal/runtime/io.py +136 -0
- union/_internal/runtime/resources_serde.py +134 -0
- union/_internal/runtime/task_serde.py +202 -0
- union/_internal/runtime/taskrunner.py +179 -0
- union/_internal/runtime/types_serde.py +53 -0
- union/_logging.py +124 -0
- union/_protos/__init__.py +0 -0
- union/_protos/common/authorization_pb2.py +66 -0
- union/_protos/common/authorization_pb2.pyi +106 -0
- union/_protos/common/authorization_pb2_grpc.py +4 -0
- union/_protos/common/identifier_pb2.py +71 -0
- union/_protos/common/identifier_pb2.pyi +82 -0
- union/_protos/common/identifier_pb2_grpc.py +4 -0
- union/_protos/common/identity_pb2.py +48 -0
- union/_protos/common/identity_pb2.pyi +72 -0
- union/_protos/common/identity_pb2_grpc.py +4 -0
- union/_protos/common/list_pb2.py +36 -0
- union/_protos/common/list_pb2.pyi +69 -0
- union/_protos/common/list_pb2_grpc.py +4 -0
- union/_protos/common/policy_pb2.py +37 -0
- union/_protos/common/policy_pb2.pyi +27 -0
- union/_protos/common/policy_pb2_grpc.py +4 -0
- union/_protos/common/role_pb2.py +37 -0
- union/_protos/common/role_pb2.pyi +51 -0
- union/_protos/common/role_pb2_grpc.py +4 -0
- union/_protos/common/runtime_version_pb2.py +28 -0
- union/_protos/common/runtime_version_pb2.pyi +24 -0
- union/_protos/common/runtime_version_pb2_grpc.py +4 -0
- union/_protos/logs/dataplane/payload_pb2.py +96 -0
- union/_protos/logs/dataplane/payload_pb2.pyi +168 -0
- union/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- union/_protos/secret/definition_pb2.py +49 -0
- union/_protos/secret/definition_pb2.pyi +93 -0
- union/_protos/secret/definition_pb2_grpc.py +4 -0
- union/_protos/secret/payload_pb2.py +62 -0
- union/_protos/secret/payload_pb2.pyi +94 -0
- union/_protos/secret/payload_pb2_grpc.py +4 -0
- union/_protos/secret/secret_pb2.py +38 -0
- union/_protos/secret/secret_pb2.pyi +6 -0
- union/_protos/secret/secret_pb2_grpc.py +198 -0
- union/_protos/validate/validate/validate_pb2.py +76 -0
- union/_protos/workflow/node_execution_service_pb2.py +26 -0
- union/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- union/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- union/_protos/workflow/queue_service_pb2.py +75 -0
- union/_protos/workflow/queue_service_pb2.pyi +103 -0
- union/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- union/_protos/workflow/run_definition_pb2.py +100 -0
- union/_protos/workflow/run_definition_pb2.pyi +256 -0
- union/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- union/_protos/workflow/run_logs_service_pb2.py +41 -0
- union/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- union/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- union/_protos/workflow/run_service_pb2.py +133 -0
- union/_protos/workflow/run_service_pb2.pyi +173 -0
- union/_protos/workflow/run_service_pb2_grpc.py +412 -0
- union/_protos/workflow/state_service_pb2.py +58 -0
- union/_protos/workflow/state_service_pb2.pyi +69 -0
- union/_protos/workflow/state_service_pb2_grpc.py +138 -0
- union/_protos/workflow/task_definition_pb2.py +72 -0
- union/_protos/workflow/task_definition_pb2.pyi +65 -0
- union/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- union/_protos/workflow/task_service_pb2.py +44 -0
- union/_protos/workflow/task_service_pb2.pyi +31 -0
- union/_protos/workflow/task_service_pb2_grpc.py +104 -0
- union/_resources.py +226 -0
- union/_retry.py +32 -0
- union/_reusable_environment.py +25 -0
- union/_run.py +374 -0
- union/_secret.py +61 -0
- union/_task.py +354 -0
- union/_task_environment.py +186 -0
- union/_timeout.py +47 -0
- union/_tools.py +27 -0
- union/_utils/__init__.py +11 -0
- union/_utils/asyn.py +119 -0
- union/_utils/file_handling.py +71 -0
- union/_utils/helpers.py +46 -0
- union/_utils/lazy_module.py +54 -0
- union/_utils/uv_script_parser.py +49 -0
- union/_version.py +21 -0
- union/connectors/__init__.py +0 -0
- union/errors.py +128 -0
- union/extras/__init__.py +5 -0
- union/extras/_container.py +263 -0
- union/io/__init__.py +11 -0
- union/io/_dataframe.py +0 -0
- union/io/_dir.py +425 -0
- union/io/_file.py +418 -0
- union/io/pickle/__init__.py +0 -0
- union/io/pickle/transformer.py +117 -0
- union/io/structured_dataset/__init__.py +122 -0
- union/io/structured_dataset/basic_dfs.py +219 -0
- union/io/structured_dataset/structured_dataset.py +1057 -0
- union/py.typed +0 -0
- union/remote/__init__.py +23 -0
- union/remote/_client/__init__.py +0 -0
- union/remote/_client/_protocols.py +129 -0
- union/remote/_client/auth/__init__.py +12 -0
- union/remote/_client/auth/_authenticators/__init__.py +0 -0
- union/remote/_client/auth/_authenticators/base.py +391 -0
- union/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- union/remote/_client/auth/_authenticators/device_code.py +120 -0
- union/remote/_client/auth/_authenticators/external_command.py +77 -0
- union/remote/_client/auth/_authenticators/factory.py +200 -0
- union/remote/_client/auth/_authenticators/pkce.py +515 -0
- union/remote/_client/auth/_channel.py +184 -0
- union/remote/_client/auth/_client_config.py +83 -0
- union/remote/_client/auth/_default_html.py +32 -0
- union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- union/remote/_client/auth/_grpc_utils/auth_interceptor.py +204 -0
- union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +144 -0
- union/remote/_client/auth/_keyring.py +154 -0
- union/remote/_client/auth/_token_client.py +258 -0
- union/remote/_client/auth/errors.py +16 -0
- union/remote/_client/controlplane.py +86 -0
- union/remote/_data.py +149 -0
- union/remote/_logs.py +74 -0
- union/remote/_project.py +86 -0
- union/remote/_run.py +820 -0
- union/remote/_secret.py +132 -0
- union/remote/_task.py +193 -0
- union/report/__init__.py +3 -0
- union/report/_report.py +178 -0
- union/report/_template.html +124 -0
- union/storage/__init__.py +24 -0
- union/storage/_remote_fs.py +34 -0
- union/storage/_storage.py +247 -0
- union/storage/_utils.py +5 -0
- union/types/__init__.py +11 -0
- union/types/_renderer.py +162 -0
- union/types/_string_literals.py +120 -0
- union/types/_type_engine.py +2131 -0
- union/types/_utils.py +80 -0
union/io/_file.py
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from contextlib import asynccontextmanager, contextmanager
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import IO, Any, AsyncContextManager, Generic, Optional, Type, TypeVar, Union
|
|
7
|
+
|
|
8
|
+
import aiofiles
|
|
9
|
+
from flyteidl.core import literals_pb2, types_pb2
|
|
10
|
+
from fsspec.asyn import AsyncFileSystem
|
|
11
|
+
from fsspec.utils import get_protocol
|
|
12
|
+
from pydantic import BaseModel, model_validator
|
|
13
|
+
from synchronicity import Synchronizer
|
|
14
|
+
|
|
15
|
+
import union.storage as storage
|
|
16
|
+
from union._context import internal_ctx
|
|
17
|
+
from union._initialize import requires_initialization
|
|
18
|
+
from union._logging import logger
|
|
19
|
+
from union.types import TypeEngine, TypeTransformer, TypeTransformerFailedError
|
|
20
|
+
|
|
21
|
+
# Type variable for the file format
|
|
22
|
+
T = TypeVar("T")
|
|
23
|
+
|
|
24
|
+
synced = Synchronizer()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class File(BaseModel, Generic[T]):
|
|
28
|
+
"""
|
|
29
|
+
A generic file class representing a file with a specified format.
|
|
30
|
+
Provides both async and sync interfaces for file operations.
|
|
31
|
+
Users must handle all I/O operations themselves by instantiating this class with the appropriate class methods.
|
|
32
|
+
|
|
33
|
+
The generic type T represents the format of the file.
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
```python
|
|
37
|
+
# Async usage
|
|
38
|
+
from pandas import DataFrame
|
|
39
|
+
csv_file = File[DataFrame](path="s3://my-bucket/data.csv")
|
|
40
|
+
|
|
41
|
+
async with csv_file.open() as f:
|
|
42
|
+
content = await f.read()
|
|
43
|
+
|
|
44
|
+
# Sync alternative
|
|
45
|
+
with csv_file.open_sync() as f:
|
|
46
|
+
content = f.read()
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Example: Read a file input in a Task.
|
|
50
|
+
```
|
|
51
|
+
@env.task
|
|
52
|
+
async def my_task(file: File[DataFrame]):
|
|
53
|
+
async with file.open() as f:
|
|
54
|
+
df = pd.read_csv(f)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Example: Write a file by streaming it directly to blob storage
|
|
58
|
+
```
|
|
59
|
+
@env.task
|
|
60
|
+
async def my_task() -> File[DataFrame]:
|
|
61
|
+
df = pd.DataFrame(...)
|
|
62
|
+
file = File.new_remote()
|
|
63
|
+
async with file.open("wb") as f:
|
|
64
|
+
df.to_csv(f)
|
|
65
|
+
# No additional uploading will be done here.
|
|
66
|
+
return file
|
|
67
|
+
```
|
|
68
|
+
Example: Write a file by writing it locally first, and then uploading it.
|
|
69
|
+
```
|
|
70
|
+
@env.task
|
|
71
|
+
async def my_task() -> File[DataFrame]:
|
|
72
|
+
# write to /tmp/data.csv
|
|
73
|
+
return File.from_local("/tmp/data.csv", optional="s3://my-bucket/data.csv")
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Example: From an existing remote file
|
|
77
|
+
```
|
|
78
|
+
@env.task
|
|
79
|
+
async def my_task() -> File[DataFrame]:
|
|
80
|
+
return File.from_existing_remote("s3://my-bucket/data.csv")
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Example: Take a remote file as input and return the same one, should not do any copy
|
|
84
|
+
```
|
|
85
|
+
@env.task
|
|
86
|
+
async def my_task(file: File[DataFrame]) -> File[DataFrame]:
|
|
87
|
+
return file
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
path: The path to the file (can be local or remote)
|
|
92
|
+
name: Optional name for the file (defaults to basename of path)
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
path: str
|
|
96
|
+
name: Optional[str] = None
|
|
97
|
+
format: str = ""
|
|
98
|
+
|
|
99
|
+
class Config:
|
|
100
|
+
arbitrary_types_allowed = True
|
|
101
|
+
|
|
102
|
+
@model_validator(mode="before")
|
|
103
|
+
@classmethod
|
|
104
|
+
def pre_init(cls, data):
|
|
105
|
+
if data.get("name") is None:
|
|
106
|
+
data["name"] = Path(data["path"]).name
|
|
107
|
+
return data
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
@requires_initialization
|
|
111
|
+
def new_remote(cls) -> File[T]:
|
|
112
|
+
"""
|
|
113
|
+
Create a new File reference for a remote file that will be written to.
|
|
114
|
+
|
|
115
|
+
Example:
|
|
116
|
+
```
|
|
117
|
+
@env.task
|
|
118
|
+
async def my_task() -> File[DataFrame]:
|
|
119
|
+
df = pd.DataFrame(...)
|
|
120
|
+
file = File.new_remote()
|
|
121
|
+
async with file.open("wb") as f:
|
|
122
|
+
df.to_csv(f)
|
|
123
|
+
return file
|
|
124
|
+
```
|
|
125
|
+
"""
|
|
126
|
+
ctx = internal_ctx()
|
|
127
|
+
|
|
128
|
+
return cls(path=ctx.raw_data.get_random_remote_path())
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def from_existing_remote(cls, remote_path: str) -> File[T]:
|
|
132
|
+
"""
|
|
133
|
+
Create a File reference from an existing remote file.
|
|
134
|
+
|
|
135
|
+
Example:
|
|
136
|
+
```python
|
|
137
|
+
@env.task
|
|
138
|
+
async def my_task() -> File[DataFrame]:
|
|
139
|
+
return File.from_existing_remote("s3://my-bucket/data.csv")
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
remote_path: The remote path to the existing file
|
|
144
|
+
"""
|
|
145
|
+
return cls(path=remote_path)
|
|
146
|
+
|
|
147
|
+
@asynccontextmanager
|
|
148
|
+
async def open(
|
|
149
|
+
self,
|
|
150
|
+
mode: str = "rb",
|
|
151
|
+
block_size: Optional[int] = None,
|
|
152
|
+
cache_type: str = "readahead",
|
|
153
|
+
cache_options: Optional[dict] = None,
|
|
154
|
+
compression: Optional[str] = None,
|
|
155
|
+
**kwargs,
|
|
156
|
+
) -> AsyncContextManager[IO[Any]]:
|
|
157
|
+
"""
|
|
158
|
+
Asynchronously open the file and return a file-like object.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
mode: The mode to open the file in (default: 'rb')
|
|
162
|
+
block_size: Size of blocks for reading (bytes)
|
|
163
|
+
cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
|
|
164
|
+
cache_options: Dictionary of options for the cache
|
|
165
|
+
compression: Compression format or None for auto-detection
|
|
166
|
+
**kwargs: Additional arguments passed to fsspec's open method
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
An async file-like object
|
|
170
|
+
|
|
171
|
+
Example:
|
|
172
|
+
```python
|
|
173
|
+
async with file.open('rb') as f:
|
|
174
|
+
data = await f.read()
|
|
175
|
+
```
|
|
176
|
+
"""
|
|
177
|
+
fs = storage.get_underlying_filesystem(path=self.path)
|
|
178
|
+
|
|
179
|
+
# Set up cache options if provided
|
|
180
|
+
if cache_options is None:
|
|
181
|
+
cache_options = {}
|
|
182
|
+
|
|
183
|
+
# Configure the open parameters
|
|
184
|
+
open_kwargs = {"mode": mode, **kwargs}
|
|
185
|
+
if compression:
|
|
186
|
+
open_kwargs["compression"] = compression
|
|
187
|
+
|
|
188
|
+
if block_size:
|
|
189
|
+
open_kwargs["block_size"] = block_size
|
|
190
|
+
|
|
191
|
+
# Apply caching strategy
|
|
192
|
+
if cache_type != "none":
|
|
193
|
+
open_kwargs["cache_type"] = cache_type
|
|
194
|
+
open_kwargs["cache_options"] = cache_options
|
|
195
|
+
|
|
196
|
+
# Use aiofiles for local files
|
|
197
|
+
if fs.protocol == "file":
|
|
198
|
+
async with aiofiles.open(self.path, mode=mode, **kwargs) as f:
|
|
199
|
+
yield f
|
|
200
|
+
else:
|
|
201
|
+
# This code is broadly similar to what storage.get_stream does, but without actually reading from the stream
|
|
202
|
+
file_handle = None
|
|
203
|
+
try:
|
|
204
|
+
if isinstance(fs, AsyncFileSystem):
|
|
205
|
+
file_handle = await fs.open_async(self.path, mode)
|
|
206
|
+
yield file_handle
|
|
207
|
+
return
|
|
208
|
+
except NotImplementedError:
|
|
209
|
+
logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
|
|
210
|
+
finally:
|
|
211
|
+
if file_handle is not None:
|
|
212
|
+
file_handle.close()
|
|
213
|
+
|
|
214
|
+
with fs.open(self.path, mode) as file_handle:
|
|
215
|
+
yield file_handle
|
|
216
|
+
|
|
217
|
+
@contextmanager
|
|
218
|
+
def open_sync(
|
|
219
|
+
self,
|
|
220
|
+
mode: str = "rb",
|
|
221
|
+
block_size: Optional[int] = None,
|
|
222
|
+
cache_type: str = "readahead",
|
|
223
|
+
cache_options: Optional[dict] = None,
|
|
224
|
+
compression: Optional[str] = None,
|
|
225
|
+
**kwargs,
|
|
226
|
+
) -> IO[Any]:
|
|
227
|
+
"""
|
|
228
|
+
Synchronously open the file and return a file-like object.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
mode: The mode to open the file in (default: 'rb')
|
|
232
|
+
block_size: Size of blocks for reading (bytes)
|
|
233
|
+
cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
|
|
234
|
+
cache_options: Dictionary of options for the cache
|
|
235
|
+
compression: Compression format or None for auto-detection
|
|
236
|
+
**kwargs: Additional arguments passed to fsspec's open method
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
A file-like object
|
|
240
|
+
|
|
241
|
+
Example:
|
|
242
|
+
```python
|
|
243
|
+
with file.open_sync('rb') as f:
|
|
244
|
+
data = f.read()
|
|
245
|
+
```
|
|
246
|
+
"""
|
|
247
|
+
fs = storage.get_underlying_filesystem(path=self.path)
|
|
248
|
+
|
|
249
|
+
# Set up cache options if provided
|
|
250
|
+
if cache_options is None:
|
|
251
|
+
cache_options = {}
|
|
252
|
+
|
|
253
|
+
# Configure the open parameters
|
|
254
|
+
open_kwargs = {"mode": mode, "compression": compression, **kwargs}
|
|
255
|
+
|
|
256
|
+
if block_size:
|
|
257
|
+
open_kwargs["block_size"] = block_size
|
|
258
|
+
|
|
259
|
+
# Apply caching strategy
|
|
260
|
+
if cache_type != "none":
|
|
261
|
+
open_kwargs["cache_type"] = cache_type
|
|
262
|
+
open_kwargs["cache_options"] = cache_options
|
|
263
|
+
|
|
264
|
+
with fs.open(self.path, **open_kwargs) as f:
|
|
265
|
+
yield f
|
|
266
|
+
|
|
267
|
+
# @synced.wrap - enabling this did not work - synchronicity/pydantic issue
|
|
268
|
+
async def download(self, local_path: Optional[Union[str, Path]] = None) -> str:
|
|
269
|
+
"""
|
|
270
|
+
Asynchronously download the file to a local path.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
local_path: The local path to download the file to. If None, a temporary
|
|
274
|
+
directory will be used.
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
The path to the downloaded file
|
|
278
|
+
|
|
279
|
+
Example:
|
|
280
|
+
```python
|
|
281
|
+
local_file = await file.download('/tmp/myfile.csv')
|
|
282
|
+
```
|
|
283
|
+
"""
|
|
284
|
+
if local_path is None:
|
|
285
|
+
local_path = storage.get_random_local_path(file_path_or_file_name=local_path)
|
|
286
|
+
else:
|
|
287
|
+
local_path = str(Path(local_path).absolute())
|
|
288
|
+
|
|
289
|
+
fs = storage.get_underlying_filesystem(path=self.path)
|
|
290
|
+
|
|
291
|
+
# If it's already a local file, just copy it
|
|
292
|
+
if "file" in fs.protocol:
|
|
293
|
+
# Use aiofiles for async copy
|
|
294
|
+
async with aiofiles.open(self.path, "rb") as src:
|
|
295
|
+
async with aiofiles.open(local_path, "wb") as dst:
|
|
296
|
+
await dst.write(await src.read())
|
|
297
|
+
return local_path
|
|
298
|
+
|
|
299
|
+
# Otherwise download from remote using async functionality
|
|
300
|
+
await storage.get(self.path, local_path)
|
|
301
|
+
return local_path
|
|
302
|
+
|
|
303
|
+
@classmethod
|
|
304
|
+
@requires_initialization
|
|
305
|
+
async def from_local(cls, local_path: Union[str, Path], remote_destination: Optional[str] = None) -> File[T]:
|
|
306
|
+
"""
|
|
307
|
+
Create a new File object from a local file that will be uploaded to the configured remote store.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
local_path: Path to the local file
|
|
311
|
+
remote_destination: Optional path to store the file remotely. If None, a path will be generated.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
A new File instance pointing to the uploaded file
|
|
315
|
+
|
|
316
|
+
Example:
|
|
317
|
+
```python
|
|
318
|
+
remote_file = await File[DataFrame].from_local('/tmp/data.csv', 's3://bucket/data.csv')
|
|
319
|
+
```
|
|
320
|
+
"""
|
|
321
|
+
if not os.path.exists(local_path):
|
|
322
|
+
raise ValueError(f"File not found: {local_path}")
|
|
323
|
+
|
|
324
|
+
remote_path = remote_destination or internal_ctx().raw_data.get_random_remote_path()
|
|
325
|
+
protocol = get_protocol(remote_path)
|
|
326
|
+
filename = Path(local_path).name
|
|
327
|
+
|
|
328
|
+
# If remote_destination was not set by the user, and the configured raw data path is also local,
|
|
329
|
+
# then let's optimize by not uploading.
|
|
330
|
+
if "file" in protocol:
|
|
331
|
+
if remote_destination is None:
|
|
332
|
+
path = str(Path(local_path).absolute())
|
|
333
|
+
else:
|
|
334
|
+
# Otherwise, actually make a copy of the file
|
|
335
|
+
async with aiofiles.open(remote_path, "rb") as src:
|
|
336
|
+
async with aiofiles.open(local_path, "wb") as dst:
|
|
337
|
+
await dst.write(await src.read())
|
|
338
|
+
path = str(Path(remote_path).absolute())
|
|
339
|
+
else:
|
|
340
|
+
# Otherwise upload to remote using async storage layer
|
|
341
|
+
path = await storage.put(local_path, remote_path)
|
|
342
|
+
|
|
343
|
+
f = cls(path=path, name=filename)
|
|
344
|
+
return f
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
class FileTransformer(TypeTransformer[File]):
|
|
348
|
+
"""
|
|
349
|
+
Transformer for File objects. This type transformer does not handle any i/o. That is now the responsibility of the
|
|
350
|
+
user.
|
|
351
|
+
"""
|
|
352
|
+
|
|
353
|
+
def __init__(self):
|
|
354
|
+
super().__init__(name="File", t=File)
|
|
355
|
+
|
|
356
|
+
def get_literal_type(self, t: Type[File]) -> types_pb2.LiteralType:
|
|
357
|
+
"""Get the Flyte literal type for a File type."""
|
|
358
|
+
return types_pb2.LiteralType(
|
|
359
|
+
blob=types_pb2.BlobType(
|
|
360
|
+
# todo: set format from generic
|
|
361
|
+
format="", # Format is determined by the generic type T
|
|
362
|
+
dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE,
|
|
363
|
+
)
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
async def to_literal(
|
|
367
|
+
self,
|
|
368
|
+
python_val: File,
|
|
369
|
+
python_type: Type[File],
|
|
370
|
+
expected: types_pb2.LiteralType,
|
|
371
|
+
) -> literals_pb2.Literal:
|
|
372
|
+
"""Convert a File object to a Flyte literal."""
|
|
373
|
+
if not isinstance(python_val, File):
|
|
374
|
+
raise TypeTransformerFailedError(f"Expected File object, received {type(python_val)}")
|
|
375
|
+
|
|
376
|
+
return literals_pb2.Literal(
|
|
377
|
+
scalar=literals_pb2.Scalar(
|
|
378
|
+
blob=literals_pb2.Blob(
|
|
379
|
+
metadata=literals_pb2.BlobMetadata(
|
|
380
|
+
type=types_pb2.BlobType(
|
|
381
|
+
format=python_val.format, dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE
|
|
382
|
+
)
|
|
383
|
+
),
|
|
384
|
+
uri=python_val.path,
|
|
385
|
+
)
|
|
386
|
+
)
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
async def to_python_value(
|
|
390
|
+
self,
|
|
391
|
+
lv: literals_pb2.Literal,
|
|
392
|
+
expected_python_type: Type[File],
|
|
393
|
+
) -> File:
|
|
394
|
+
"""Convert a Flyte literal to a File object."""
|
|
395
|
+
if not lv.scalar.HasField("blob"):
|
|
396
|
+
raise TypeTransformerFailedError(f"Expected blob literal, received {lv}")
|
|
397
|
+
if not lv.scalar.blob.metadata.type.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE:
|
|
398
|
+
raise TypeTransformerFailedError(
|
|
399
|
+
f"Expected single part blob, received {lv.scalar.blob.metadata.type.dimensionality}"
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
uri = lv.scalar.blob.uri
|
|
403
|
+
filename = Path(uri).name
|
|
404
|
+
f = File(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format)
|
|
405
|
+
return f
|
|
406
|
+
|
|
407
|
+
def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[File]:
|
|
408
|
+
"""Guess the Python type from a Flyte literal type."""
|
|
409
|
+
if (
|
|
410
|
+
literal_type.HasField("blob")
|
|
411
|
+
and literal_type.blob.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE
|
|
412
|
+
and literal_type.blob.format != "PythonPickle" # see pickle transformer
|
|
413
|
+
):
|
|
414
|
+
return File
|
|
415
|
+
raise ValueError(f"Cannot guess python type from {literal_type}")
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
TypeEngine.register(FileTransformer())
|
|
File without changes
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import os
|
|
3
|
+
import typing
|
|
4
|
+
from typing import Type
|
|
5
|
+
|
|
6
|
+
import aiofiles
|
|
7
|
+
import cloudpickle
|
|
8
|
+
from flyteidl.core import literals_pb2, types_pb2
|
|
9
|
+
|
|
10
|
+
import union.storage as storage
|
|
11
|
+
from union.types import TypeEngine, TypeTransformer
|
|
12
|
+
|
|
13
|
+
T = typing.TypeVar("T")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class FlytePickle(typing.Generic[T]):
|
|
17
|
+
"""
|
|
18
|
+
This type is only used by flytekit internally. User should not use this type.
|
|
19
|
+
Any type that flyte can't recognize will become FlytePickle
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def python_type(cls) -> typing.Type:
|
|
24
|
+
return type(None)
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def __class_getitem__(cls, python_type: typing.Type) -> typing.Type:
|
|
28
|
+
if python_type is None:
|
|
29
|
+
return cls
|
|
30
|
+
|
|
31
|
+
class _SpecificFormatClass(FlytePickle):
|
|
32
|
+
# Get the type engine to see this as kind of a generic
|
|
33
|
+
__origin__ = FlytePickle
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def python_type(cls) -> typing.Type:
|
|
37
|
+
return python_type
|
|
38
|
+
|
|
39
|
+
return _SpecificFormatClass
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
async def to_pickle(cls, python_val: typing.Any) -> str:
|
|
43
|
+
h = hashlib.md5()
|
|
44
|
+
str_bytes = cloudpickle.dumps(python_val)
|
|
45
|
+
h.update(str_bytes)
|
|
46
|
+
|
|
47
|
+
uri = storage.get_random_local_path(file_path_or_file_name=h.hexdigest())
|
|
48
|
+
os.makedirs(os.path.dirname(uri), exist_ok=True)
|
|
49
|
+
async with aiofiles.open(uri, "w+b") as outfile:
|
|
50
|
+
await outfile.write(str_bytes)
|
|
51
|
+
|
|
52
|
+
return await storage.put(str(uri))
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
async def from_pickle(cls, uri: str) -> typing.Any:
|
|
56
|
+
# Deserialize the pickle, and return data in the pickle,
|
|
57
|
+
# and download pickle file to local first if file is not in the local file systems.
|
|
58
|
+
if storage.is_remote(uri):
|
|
59
|
+
local_path = storage.get_random_local_path()
|
|
60
|
+
await storage.get(uri, str(local_path), False)
|
|
61
|
+
uri = local_path
|
|
62
|
+
async with aiofiles.open(uri, "rb") as infile:
|
|
63
|
+
data = cloudpickle.loads(await infile.read())
|
|
64
|
+
return data
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class FlytePickleTransformer(TypeTransformer[FlytePickle]):
|
|
68
|
+
PYTHON_PICKLE_FORMAT = "PythonPickle"
|
|
69
|
+
|
|
70
|
+
def __init__(self):
|
|
71
|
+
super().__init__(name="FlytePickle", t=FlytePickle)
|
|
72
|
+
|
|
73
|
+
def assert_type(self, t: Type[T], v: T):
|
|
74
|
+
# Every type can serialize to pickle, so we don't need to check the type here.
|
|
75
|
+
...
|
|
76
|
+
|
|
77
|
+
async def to_python_value(self, lv: literals_pb2.Literal, expected_python_type: Type[T]) -> T:
|
|
78
|
+
uri = lv.scalar.blob.uri
|
|
79
|
+
return await FlytePickle.from_pickle(uri)
|
|
80
|
+
|
|
81
|
+
async def to_literal(
|
|
82
|
+
self,
|
|
83
|
+
python_val: T,
|
|
84
|
+
python_type: Type[T],
|
|
85
|
+
expected: types_pb2.LiteralType,
|
|
86
|
+
) -> literals_pb2.Literal:
|
|
87
|
+
if python_val is None:
|
|
88
|
+
raise AssertionError("Cannot pickle None Value.")
|
|
89
|
+
meta = literals_pb2.BlobMetadata(
|
|
90
|
+
type=types_pb2.BlobType(
|
|
91
|
+
format=self.PYTHON_PICKLE_FORMAT, dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
remote_path = await FlytePickle.to_pickle(python_val)
|
|
95
|
+
return literals_pb2.Literal(scalar=literals_pb2.Scalar(blob=literals_pb2.Blob(metadata=meta, uri=remote_path)))
|
|
96
|
+
|
|
97
|
+
def guess_python_type(self, literal_type: types_pb2.LiteralType) -> typing.Type[FlytePickle[typing.Any]]:
|
|
98
|
+
if (
|
|
99
|
+
literal_type.blob is not None
|
|
100
|
+
and literal_type.blob.dimensionality == types_pb2.BlobType.BlobDimensionality.SINGLE
|
|
101
|
+
and literal_type.blob.format == FlytePickleTransformer.PYTHON_PICKLE_FORMAT
|
|
102
|
+
):
|
|
103
|
+
return FlytePickle
|
|
104
|
+
|
|
105
|
+
raise ValueError(f"Transformer {self} cannot reverse {literal_type}")
|
|
106
|
+
|
|
107
|
+
def get_literal_type(self, t: Type[T]) -> types_pb2.LiteralType:
|
|
108
|
+
lt = types_pb2.LiteralType(
|
|
109
|
+
blob=types_pb2.BlobType(
|
|
110
|
+
format=self.PYTHON_PICKLE_FORMAT, dimensionality=types_pb2.BlobType.BlobDimensionality.SINGLE
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
lt.metadata = {"python_class_name": str(t)}
|
|
114
|
+
return lt
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
TypeEngine.register(FlytePickleTransformer())
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Flytekit StructuredDataset
|
|
3
|
+
==========================================================
|
|
4
|
+
.. currentmodule:: flytekit.types.structured
|
|
5
|
+
|
|
6
|
+
.. autosummary::
|
|
7
|
+
:template: custom.rst
|
|
8
|
+
:toctree: generated/
|
|
9
|
+
|
|
10
|
+
StructuredDataset
|
|
11
|
+
StructuredDatasetDecoder
|
|
12
|
+
StructuredDatasetEncoder
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from union._logging import logger
|
|
16
|
+
from union._utils.lazy_module import is_imported
|
|
17
|
+
|
|
18
|
+
from .structured_dataset import (
|
|
19
|
+
DuplicateHandlerError,
|
|
20
|
+
StructuredDataset,
|
|
21
|
+
StructuredDatasetDecoder,
|
|
22
|
+
StructuredDatasetEncoder,
|
|
23
|
+
StructuredDatasetTransformerEngine,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def register_csv_handlers():
|
|
28
|
+
from .basic_dfs import CSVToPandasDecodingHandler, PandasToCSVEncodingHandler
|
|
29
|
+
|
|
30
|
+
StructuredDatasetTransformerEngine.register(PandasToCSVEncodingHandler(), default_format_for_type=True)
|
|
31
|
+
StructuredDatasetTransformerEngine.register(CSVToPandasDecodingHandler(), default_format_for_type=True)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def register_pandas_handlers():
|
|
35
|
+
import pandas as pd
|
|
36
|
+
|
|
37
|
+
from union.types._renderer import TopFrameRenderer
|
|
38
|
+
|
|
39
|
+
from .basic_dfs import PandasToParquetEncodingHandler, ParquetToPandasDecodingHandler
|
|
40
|
+
|
|
41
|
+
StructuredDatasetTransformerEngine.register(PandasToParquetEncodingHandler(), default_format_for_type=True)
|
|
42
|
+
StructuredDatasetTransformerEngine.register(ParquetToPandasDecodingHandler(), default_format_for_type=True)
|
|
43
|
+
StructuredDatasetTransformerEngine.register_renderer(pd.DataFrame, TopFrameRenderer())
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def register_arrow_handlers():
|
|
47
|
+
import pyarrow as pa
|
|
48
|
+
|
|
49
|
+
from union.types._renderer import ArrowRenderer
|
|
50
|
+
|
|
51
|
+
from .basic_dfs import ArrowToParquetEncodingHandler, ParquetToArrowDecodingHandler
|
|
52
|
+
|
|
53
|
+
StructuredDatasetTransformerEngine.register(ArrowToParquetEncodingHandler(), default_format_for_type=True)
|
|
54
|
+
StructuredDatasetTransformerEngine.register(ParquetToArrowDecodingHandler(), default_format_for_type=True)
|
|
55
|
+
StructuredDatasetTransformerEngine.register_renderer(pa.Table, ArrowRenderer())
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def register_bigquery_handlers():
|
|
59
|
+
try:
|
|
60
|
+
from .bigquery import (
|
|
61
|
+
ArrowToBQEncodingHandlers,
|
|
62
|
+
BQToArrowDecodingHandler,
|
|
63
|
+
BQToPandasDecodingHandler,
|
|
64
|
+
PandasToBQEncodingHandlers,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
StructuredDatasetTransformerEngine.register(PandasToBQEncodingHandlers())
|
|
68
|
+
StructuredDatasetTransformerEngine.register(BQToPandasDecodingHandler())
|
|
69
|
+
StructuredDatasetTransformerEngine.register(ArrowToBQEncodingHandlers())
|
|
70
|
+
StructuredDatasetTransformerEngine.register(BQToArrowDecodingHandler())
|
|
71
|
+
except ImportError:
|
|
72
|
+
logger.info(
|
|
73
|
+
"We won't register bigquery handler for structured dataset because "
|
|
74
|
+
"we can't find the packages google-cloud-bigquery-storage and google-cloud-bigquery"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def register_snowflake_handlers():
|
|
79
|
+
try:
|
|
80
|
+
from .snowflake import PandasToSnowflakeEncodingHandlers, SnowflakeToPandasDecodingHandler
|
|
81
|
+
|
|
82
|
+
StructuredDatasetTransformerEngine.register(SnowflakeToPandasDecodingHandler())
|
|
83
|
+
StructuredDatasetTransformerEngine.register(PandasToSnowflakeEncodingHandlers())
|
|
84
|
+
|
|
85
|
+
except ImportError:
|
|
86
|
+
logger.info(
|
|
87
|
+
"We won't register snowflake handler for structured dataset because "
|
|
88
|
+
"we can't find package snowflake-connector-python"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def lazy_import_structured_dataset_handler():
|
|
93
|
+
if is_imported("pandas"):
|
|
94
|
+
try:
|
|
95
|
+
register_pandas_handlers()
|
|
96
|
+
register_csv_handlers()
|
|
97
|
+
except DuplicateHandlerError:
|
|
98
|
+
logger.debug("Transformer for pandas is already registered.")
|
|
99
|
+
if is_imported("pyarrow"):
|
|
100
|
+
try:
|
|
101
|
+
register_arrow_handlers()
|
|
102
|
+
except DuplicateHandlerError:
|
|
103
|
+
logger.debug("Transformer for arrow is already registered.")
|
|
104
|
+
if is_imported("google.cloud.bigquery"):
|
|
105
|
+
try:
|
|
106
|
+
register_bigquery_handlers()
|
|
107
|
+
except DuplicateHandlerError:
|
|
108
|
+
logger.debug("Transformer for bigquery is already registered.")
|
|
109
|
+
if is_imported("snowflake.connector"):
|
|
110
|
+
try:
|
|
111
|
+
register_snowflake_handlers()
|
|
112
|
+
except DuplicateHandlerError:
|
|
113
|
+
logger.debug("Transformer for snowflake is already registered.")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
__all__ = [
|
|
117
|
+
"StructuredDataset",
|
|
118
|
+
"StructuredDatasetDecoder",
|
|
119
|
+
"StructuredDatasetEncoder",
|
|
120
|
+
"StructuredDatasetTransformerEngine",
|
|
121
|
+
"lazy_import_structured_dataset_handler",
|
|
122
|
+
]
|