flyte 0.0.1b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +62 -0
- flyte/_api_commons.py +3 -0
- flyte/_bin/__init__.py +0 -0
- flyte/_bin/runtime.py +126 -0
- flyte/_build.py +25 -0
- flyte/_cache/__init__.py +12 -0
- flyte/_cache/cache.py +146 -0
- flyte/_cache/defaults.py +9 -0
- flyte/_cache/policy_function_body.py +42 -0
- flyte/_cli/__init__.py +0 -0
- flyte/_cli/_common.py +287 -0
- flyte/_cli/_create.py +42 -0
- flyte/_cli/_delete.py +23 -0
- flyte/_cli/_deploy.py +140 -0
- flyte/_cli/_get.py +235 -0
- flyte/_cli/_run.py +152 -0
- flyte/_cli/main.py +72 -0
- flyte/_code_bundle/__init__.py +8 -0
- flyte/_code_bundle/_ignore.py +113 -0
- flyte/_code_bundle/_packaging.py +187 -0
- flyte/_code_bundle/_utils.py +339 -0
- flyte/_code_bundle/bundle.py +178 -0
- flyte/_context.py +146 -0
- flyte/_datastructures.py +342 -0
- flyte/_deploy.py +202 -0
- flyte/_doc.py +29 -0
- flyte/_docstring.py +32 -0
- flyte/_environment.py +43 -0
- flyte/_group.py +31 -0
- flyte/_hash.py +23 -0
- flyte/_image.py +760 -0
- flyte/_initialize.py +634 -0
- flyte/_interface.py +84 -0
- flyte/_internal/__init__.py +3 -0
- flyte/_internal/controllers/__init__.py +115 -0
- flyte/_internal/controllers/_local_controller.py +118 -0
- flyte/_internal/controllers/_trace.py +40 -0
- flyte/_internal/controllers/pbhash.py +39 -0
- flyte/_internal/controllers/remote/__init__.py +40 -0
- flyte/_internal/controllers/remote/_action.py +141 -0
- flyte/_internal/controllers/remote/_client.py +43 -0
- flyte/_internal/controllers/remote/_controller.py +361 -0
- flyte/_internal/controllers/remote/_core.py +402 -0
- flyte/_internal/controllers/remote/_informer.py +361 -0
- flyte/_internal/controllers/remote/_service_protocol.py +50 -0
- flyte/_internal/imagebuild/__init__.py +11 -0
- flyte/_internal/imagebuild/docker_builder.py +416 -0
- flyte/_internal/imagebuild/image_builder.py +241 -0
- flyte/_internal/imagebuild/remote_builder.py +0 -0
- flyte/_internal/resolvers/__init__.py +0 -0
- flyte/_internal/resolvers/_task_module.py +54 -0
- flyte/_internal/resolvers/common.py +31 -0
- flyte/_internal/resolvers/default.py +28 -0
- flyte/_internal/runtime/__init__.py +0 -0
- flyte/_internal/runtime/convert.py +199 -0
- flyte/_internal/runtime/entrypoints.py +135 -0
- flyte/_internal/runtime/io.py +136 -0
- flyte/_internal/runtime/resources_serde.py +138 -0
- flyte/_internal/runtime/task_serde.py +210 -0
- flyte/_internal/runtime/taskrunner.py +190 -0
- flyte/_internal/runtime/types_serde.py +54 -0
- flyte/_logging.py +124 -0
- flyte/_protos/__init__.py +0 -0
- flyte/_protos/common/authorization_pb2.py +66 -0
- flyte/_protos/common/authorization_pb2.pyi +108 -0
- flyte/_protos/common/authorization_pb2_grpc.py +4 -0
- flyte/_protos/common/identifier_pb2.py +71 -0
- flyte/_protos/common/identifier_pb2.pyi +82 -0
- flyte/_protos/common/identifier_pb2_grpc.py +4 -0
- flyte/_protos/common/identity_pb2.py +48 -0
- flyte/_protos/common/identity_pb2.pyi +72 -0
- flyte/_protos/common/identity_pb2_grpc.py +4 -0
- flyte/_protos/common/list_pb2.py +36 -0
- flyte/_protos/common/list_pb2.pyi +69 -0
- flyte/_protos/common/list_pb2_grpc.py +4 -0
- flyte/_protos/common/policy_pb2.py +37 -0
- flyte/_protos/common/policy_pb2.pyi +27 -0
- flyte/_protos/common/policy_pb2_grpc.py +4 -0
- flyte/_protos/common/role_pb2.py +37 -0
- flyte/_protos/common/role_pb2.pyi +53 -0
- flyte/_protos/common/role_pb2_grpc.py +4 -0
- flyte/_protos/common/runtime_version_pb2.py +28 -0
- flyte/_protos/common/runtime_version_pb2.pyi +24 -0
- flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
- flyte/_protos/logs/dataplane/payload_pb2.py +96 -0
- flyte/_protos/logs/dataplane/payload_pb2.pyi +168 -0
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/definition_pb2.py +49 -0
- flyte/_protos/secret/definition_pb2.pyi +93 -0
- flyte/_protos/secret/definition_pb2_grpc.py +4 -0
- flyte/_protos/secret/payload_pb2.py +62 -0
- flyte/_protos/secret/payload_pb2.pyi +94 -0
- flyte/_protos/secret/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/secret_pb2.py +38 -0
- flyte/_protos/secret/secret_pb2.pyi +6 -0
- flyte/_protos/secret/secret_pb2_grpc.py +198 -0
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
- flyte/_protos/validate/validate/validate_pb2.py +76 -0
- flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
- flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- flyte/_protos/workflow/queue_service_pb2.py +106 -0
- flyte/_protos/workflow/queue_service_pb2.pyi +141 -0
- flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- flyte/_protos/workflow/run_definition_pb2.py +128 -0
- flyte/_protos/workflow/run_definition_pb2.pyi +310 -0
- flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
- flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- flyte/_protos/workflow/run_service_pb2.py +133 -0
- flyte/_protos/workflow/run_service_pb2.pyi +175 -0
- flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
- flyte/_protos/workflow/state_service_pb2.py +58 -0
- flyte/_protos/workflow/state_service_pb2.pyi +71 -0
- flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
- flyte/_protos/workflow/task_definition_pb2.py +72 -0
- flyte/_protos/workflow/task_definition_pb2.pyi +65 -0
- flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/task_service_pb2.py +44 -0
- flyte/_protos/workflow/task_service_pb2.pyi +31 -0
- flyte/_protos/workflow/task_service_pb2_grpc.py +104 -0
- flyte/_resources.py +226 -0
- flyte/_retry.py +32 -0
- flyte/_reusable_environment.py +25 -0
- flyte/_run.py +411 -0
- flyte/_secret.py +61 -0
- flyte/_task.py +367 -0
- flyte/_task_environment.py +200 -0
- flyte/_timeout.py +47 -0
- flyte/_tools.py +27 -0
- flyte/_trace.py +128 -0
- flyte/_utils/__init__.py +20 -0
- flyte/_utils/asyn.py +119 -0
- flyte/_utils/coro_management.py +25 -0
- flyte/_utils/file_handling.py +72 -0
- flyte/_utils/helpers.py +108 -0
- flyte/_utils/lazy_module.py +54 -0
- flyte/_utils/uv_script_parser.py +49 -0
- flyte/_version.py +21 -0
- flyte/connectors/__init__.py +0 -0
- flyte/errors.py +143 -0
- flyte/extras/__init__.py +5 -0
- flyte/extras/_container.py +273 -0
- flyte/io/__init__.py +11 -0
- flyte/io/_dataframe.py +0 -0
- flyte/io/_dir.py +448 -0
- flyte/io/_file.py +468 -0
- flyte/io/pickle/__init__.py +0 -0
- flyte/io/pickle/transformer.py +117 -0
- flyte/io/structured_dataset/__init__.py +129 -0
- flyte/io/structured_dataset/basic_dfs.py +219 -0
- flyte/io/structured_dataset/structured_dataset.py +1061 -0
- flyte/py.typed +0 -0
- flyte/remote/__init__.py +25 -0
- flyte/remote/_client/__init__.py +0 -0
- flyte/remote/_client/_protocols.py +131 -0
- flyte/remote/_client/auth/__init__.py +12 -0
- flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
- flyte/remote/_client/auth/_authenticators/base.py +397 -0
- flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
- flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
- flyte/remote/_client/auth/_authenticators/factory.py +200 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
- flyte/remote/_client/auth/_channel.py +184 -0
- flyte/remote/_client/auth/_client_config.py +83 -0
- flyte/remote/_client/auth/_default_html.py +32 -0
- flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
- flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
- flyte/remote/_client/auth/_keyring.py +143 -0
- flyte/remote/_client/auth/_token_client.py +260 -0
- flyte/remote/_client/auth/errors.py +16 -0
- flyte/remote/_client/controlplane.py +95 -0
- flyte/remote/_console.py +18 -0
- flyte/remote/_data.py +155 -0
- flyte/remote/_logs.py +116 -0
- flyte/remote/_project.py +86 -0
- flyte/remote/_run.py +873 -0
- flyte/remote/_secret.py +132 -0
- flyte/remote/_task.py +227 -0
- flyte/report/__init__.py +3 -0
- flyte/report/_report.py +178 -0
- flyte/report/_template.html +124 -0
- flyte/storage/__init__.py +24 -0
- flyte/storage/_remote_fs.py +34 -0
- flyte/storage/_storage.py +251 -0
- flyte/storage/_utils.py +5 -0
- flyte/types/__init__.py +13 -0
- flyte/types/_interface.py +25 -0
- flyte/types/_renderer.py +162 -0
- flyte/types/_string_literals.py +120 -0
- flyte/types/_type_engine.py +2210 -0
- flyte/types/_utils.py +80 -0
- flyte-0.0.1b0.dist-info/METADATA +179 -0
- flyte-0.0.1b0.dist-info/RECORD +390 -0
- flyte-0.0.1b0.dist-info/WHEEL +5 -0
- flyte-0.0.1b0.dist-info/entry_points.txt +3 -0
- flyte-0.0.1b0.dist-info/top_level.txt +1 -0
- union/__init__.py +54 -0
- union/_api_commons.py +3 -0
- union/_bin/__init__.py +0 -0
- union/_bin/runtime.py +113 -0
- union/_build.py +25 -0
- union/_cache/__init__.py +12 -0
- union/_cache/cache.py +141 -0
- union/_cache/defaults.py +9 -0
- union/_cache/policy_function_body.py +42 -0
- union/_cli/__init__.py +0 -0
- union/_cli/_common.py +263 -0
- union/_cli/_create.py +40 -0
- union/_cli/_delete.py +23 -0
- union/_cli/_deploy.py +120 -0
- union/_cli/_get.py +162 -0
- union/_cli/_params.py +579 -0
- union/_cli/_run.py +150 -0
- union/_cli/main.py +72 -0
- union/_code_bundle/__init__.py +8 -0
- union/_code_bundle/_ignore.py +113 -0
- union/_code_bundle/_packaging.py +187 -0
- union/_code_bundle/_utils.py +342 -0
- union/_code_bundle/bundle.py +176 -0
- union/_context.py +146 -0
- union/_datastructures.py +295 -0
- union/_deploy.py +185 -0
- union/_doc.py +29 -0
- union/_docstring.py +26 -0
- union/_environment.py +43 -0
- union/_group.py +31 -0
- union/_hash.py +23 -0
- union/_image.py +760 -0
- union/_initialize.py +585 -0
- union/_interface.py +84 -0
- union/_internal/__init__.py +3 -0
- union/_internal/controllers/__init__.py +77 -0
- union/_internal/controllers/_local_controller.py +77 -0
- union/_internal/controllers/pbhash.py +39 -0
- union/_internal/controllers/remote/__init__.py +40 -0
- union/_internal/controllers/remote/_action.py +131 -0
- union/_internal/controllers/remote/_client.py +43 -0
- union/_internal/controllers/remote/_controller.py +169 -0
- union/_internal/controllers/remote/_core.py +341 -0
- union/_internal/controllers/remote/_informer.py +260 -0
- union/_internal/controllers/remote/_service_protocol.py +44 -0
- union/_internal/imagebuild/__init__.py +11 -0
- union/_internal/imagebuild/docker_builder.py +416 -0
- union/_internal/imagebuild/image_builder.py +243 -0
- union/_internal/imagebuild/remote_builder.py +0 -0
- union/_internal/resolvers/__init__.py +0 -0
- union/_internal/resolvers/_task_module.py +31 -0
- union/_internal/resolvers/common.py +24 -0
- union/_internal/resolvers/default.py +27 -0
- union/_internal/runtime/__init__.py +0 -0
- union/_internal/runtime/convert.py +163 -0
- union/_internal/runtime/entrypoints.py +121 -0
- union/_internal/runtime/io.py +136 -0
- union/_internal/runtime/resources_serde.py +134 -0
- union/_internal/runtime/task_serde.py +202 -0
- union/_internal/runtime/taskrunner.py +179 -0
- union/_internal/runtime/types_serde.py +53 -0
- union/_logging.py +124 -0
- union/_protos/__init__.py +0 -0
- union/_protos/common/authorization_pb2.py +66 -0
- union/_protos/common/authorization_pb2.pyi +106 -0
- union/_protos/common/authorization_pb2_grpc.py +4 -0
- union/_protos/common/identifier_pb2.py +71 -0
- union/_protos/common/identifier_pb2.pyi +82 -0
- union/_protos/common/identifier_pb2_grpc.py +4 -0
- union/_protos/common/identity_pb2.py +48 -0
- union/_protos/common/identity_pb2.pyi +72 -0
- union/_protos/common/identity_pb2_grpc.py +4 -0
- union/_protos/common/list_pb2.py +36 -0
- union/_protos/common/list_pb2.pyi +69 -0
- union/_protos/common/list_pb2_grpc.py +4 -0
- union/_protos/common/policy_pb2.py +37 -0
- union/_protos/common/policy_pb2.pyi +27 -0
- union/_protos/common/policy_pb2_grpc.py +4 -0
- union/_protos/common/role_pb2.py +37 -0
- union/_protos/common/role_pb2.pyi +51 -0
- union/_protos/common/role_pb2_grpc.py +4 -0
- union/_protos/common/runtime_version_pb2.py +28 -0
- union/_protos/common/runtime_version_pb2.pyi +24 -0
- union/_protos/common/runtime_version_pb2_grpc.py +4 -0
- union/_protos/logs/dataplane/payload_pb2.py +96 -0
- union/_protos/logs/dataplane/payload_pb2.pyi +168 -0
- union/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- union/_protos/secret/definition_pb2.py +49 -0
- union/_protos/secret/definition_pb2.pyi +93 -0
- union/_protos/secret/definition_pb2_grpc.py +4 -0
- union/_protos/secret/payload_pb2.py +62 -0
- union/_protos/secret/payload_pb2.pyi +94 -0
- union/_protos/secret/payload_pb2_grpc.py +4 -0
- union/_protos/secret/secret_pb2.py +38 -0
- union/_protos/secret/secret_pb2.pyi +6 -0
- union/_protos/secret/secret_pb2_grpc.py +198 -0
- union/_protos/validate/validate/validate_pb2.py +76 -0
- union/_protos/workflow/node_execution_service_pb2.py +26 -0
- union/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- union/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- union/_protos/workflow/queue_service_pb2.py +75 -0
- union/_protos/workflow/queue_service_pb2.pyi +103 -0
- union/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- union/_protos/workflow/run_definition_pb2.py +100 -0
- union/_protos/workflow/run_definition_pb2.pyi +256 -0
- union/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- union/_protos/workflow/run_logs_service_pb2.py +41 -0
- union/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- union/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- union/_protos/workflow/run_service_pb2.py +133 -0
- union/_protos/workflow/run_service_pb2.pyi +173 -0
- union/_protos/workflow/run_service_pb2_grpc.py +412 -0
- union/_protos/workflow/state_service_pb2.py +58 -0
- union/_protos/workflow/state_service_pb2.pyi +69 -0
- union/_protos/workflow/state_service_pb2_grpc.py +138 -0
- union/_protos/workflow/task_definition_pb2.py +72 -0
- union/_protos/workflow/task_definition_pb2.pyi +65 -0
- union/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- union/_protos/workflow/task_service_pb2.py +44 -0
- union/_protos/workflow/task_service_pb2.pyi +31 -0
- union/_protos/workflow/task_service_pb2_grpc.py +104 -0
- union/_resources.py +226 -0
- union/_retry.py +32 -0
- union/_reusable_environment.py +25 -0
- union/_run.py +374 -0
- union/_secret.py +61 -0
- union/_task.py +354 -0
- union/_task_environment.py +186 -0
- union/_timeout.py +47 -0
- union/_tools.py +27 -0
- union/_utils/__init__.py +11 -0
- union/_utils/asyn.py +119 -0
- union/_utils/file_handling.py +71 -0
- union/_utils/helpers.py +46 -0
- union/_utils/lazy_module.py +54 -0
- union/_utils/uv_script_parser.py +49 -0
- union/_version.py +21 -0
- union/connectors/__init__.py +0 -0
- union/errors.py +128 -0
- union/extras/__init__.py +5 -0
- union/extras/_container.py +263 -0
- union/io/__init__.py +11 -0
- union/io/_dataframe.py +0 -0
- union/io/_dir.py +425 -0
- union/io/_file.py +418 -0
- union/io/pickle/__init__.py +0 -0
- union/io/pickle/transformer.py +117 -0
- union/io/structured_dataset/__init__.py +122 -0
- union/io/structured_dataset/basic_dfs.py +219 -0
- union/io/structured_dataset/structured_dataset.py +1057 -0
- union/py.typed +0 -0
- union/remote/__init__.py +23 -0
- union/remote/_client/__init__.py +0 -0
- union/remote/_client/_protocols.py +129 -0
- union/remote/_client/auth/__init__.py +12 -0
- union/remote/_client/auth/_authenticators/__init__.py +0 -0
- union/remote/_client/auth/_authenticators/base.py +391 -0
- union/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- union/remote/_client/auth/_authenticators/device_code.py +120 -0
- union/remote/_client/auth/_authenticators/external_command.py +77 -0
- union/remote/_client/auth/_authenticators/factory.py +200 -0
- union/remote/_client/auth/_authenticators/pkce.py +515 -0
- union/remote/_client/auth/_channel.py +184 -0
- union/remote/_client/auth/_client_config.py +83 -0
- union/remote/_client/auth/_default_html.py +32 -0
- union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- union/remote/_client/auth/_grpc_utils/auth_interceptor.py +204 -0
- union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +144 -0
- union/remote/_client/auth/_keyring.py +154 -0
- union/remote/_client/auth/_token_client.py +258 -0
- union/remote/_client/auth/errors.py +16 -0
- union/remote/_client/controlplane.py +86 -0
- union/remote/_data.py +149 -0
- union/remote/_logs.py +74 -0
- union/remote/_project.py +86 -0
- union/remote/_run.py +820 -0
- union/remote/_secret.py +132 -0
- union/remote/_task.py +193 -0
- union/report/__init__.py +3 -0
- union/report/_report.py +178 -0
- union/report/_template.html +124 -0
- union/storage/__init__.py +24 -0
- union/storage/_remote_fs.py +34 -0
- union/storage/_storage.py +247 -0
- union/storage/_utils.py +5 -0
- union/types/__init__.py +11 -0
- union/types/_renderer.py +162 -0
- union/types/_string_literals.py +120 -0
- union/types/_type_engine.py +2131 -0
- union/types/_utils.py +80 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Flytekit StructuredDataset
|
|
3
|
+
==========================================================
|
|
4
|
+
.. currentmodule:: flytekit.types.structured
|
|
5
|
+
|
|
6
|
+
.. autosummary::
|
|
7
|
+
:template: custom.rst
|
|
8
|
+
:toctree: generated/
|
|
9
|
+
|
|
10
|
+
StructuredDataset
|
|
11
|
+
StructuredDatasetDecoder
|
|
12
|
+
StructuredDatasetEncoder
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import functools
|
|
16
|
+
|
|
17
|
+
from flyte._logging import logger
|
|
18
|
+
from flyte._utils.lazy_module import is_imported
|
|
19
|
+
|
|
20
|
+
from .structured_dataset import (
|
|
21
|
+
DuplicateHandlerError,
|
|
22
|
+
StructuredDataset,
|
|
23
|
+
StructuredDatasetDecoder,
|
|
24
|
+
StructuredDatasetEncoder,
|
|
25
|
+
StructuredDatasetTransformerEngine,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@functools.lru_cache(maxsize=None)
|
|
30
|
+
def register_csv_handlers():
|
|
31
|
+
from .basic_dfs import CSVToPandasDecodingHandler, PandasToCSVEncodingHandler
|
|
32
|
+
|
|
33
|
+
StructuredDatasetTransformerEngine.register(PandasToCSVEncodingHandler(), default_format_for_type=True)
|
|
34
|
+
StructuredDatasetTransformerEngine.register(CSVToPandasDecodingHandler(), default_format_for_type=True)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@functools.lru_cache(maxsize=None)
|
|
38
|
+
def register_pandas_handlers():
|
|
39
|
+
import pandas as pd
|
|
40
|
+
|
|
41
|
+
from flyte.types._renderer import TopFrameRenderer
|
|
42
|
+
|
|
43
|
+
from .basic_dfs import PandasToParquetEncodingHandler, ParquetToPandasDecodingHandler
|
|
44
|
+
|
|
45
|
+
StructuredDatasetTransformerEngine.register(PandasToParquetEncodingHandler(), default_format_for_type=True)
|
|
46
|
+
StructuredDatasetTransformerEngine.register(ParquetToPandasDecodingHandler(), default_format_for_type=True)
|
|
47
|
+
StructuredDatasetTransformerEngine.register_renderer(pd.DataFrame, TopFrameRenderer())
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@functools.lru_cache(maxsize=None)
|
|
51
|
+
def register_arrow_handlers():
|
|
52
|
+
import pyarrow as pa
|
|
53
|
+
|
|
54
|
+
from flyte.types._renderer import ArrowRenderer
|
|
55
|
+
|
|
56
|
+
from .basic_dfs import ArrowToParquetEncodingHandler, ParquetToArrowDecodingHandler
|
|
57
|
+
|
|
58
|
+
StructuredDatasetTransformerEngine.register(ArrowToParquetEncodingHandler(), default_format_for_type=True)
|
|
59
|
+
StructuredDatasetTransformerEngine.register(ParquetToArrowDecodingHandler(), default_format_for_type=True)
|
|
60
|
+
StructuredDatasetTransformerEngine.register_renderer(pa.Table, ArrowRenderer())
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@functools.lru_cache(maxsize=None)
|
|
64
|
+
def register_bigquery_handlers():
|
|
65
|
+
try:
|
|
66
|
+
from .bigquery import (
|
|
67
|
+
ArrowToBQEncodingHandlers,
|
|
68
|
+
BQToArrowDecodingHandler,
|
|
69
|
+
BQToPandasDecodingHandler,
|
|
70
|
+
PandasToBQEncodingHandlers,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
StructuredDatasetTransformerEngine.register(PandasToBQEncodingHandlers())
|
|
74
|
+
StructuredDatasetTransformerEngine.register(BQToPandasDecodingHandler())
|
|
75
|
+
StructuredDatasetTransformerEngine.register(ArrowToBQEncodingHandlers())
|
|
76
|
+
StructuredDatasetTransformerEngine.register(BQToArrowDecodingHandler())
|
|
77
|
+
except ImportError:
|
|
78
|
+
logger.info(
|
|
79
|
+
"We won't register bigquery handler for structured dataset because "
|
|
80
|
+
"we can't find the packages google-cloud-bigquery-storage and google-cloud-bigquery"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@functools.lru_cache(maxsize=None)
|
|
85
|
+
def register_snowflake_handlers():
|
|
86
|
+
try:
|
|
87
|
+
from .snowflake import PandasToSnowflakeEncodingHandlers, SnowflakeToPandasDecodingHandler
|
|
88
|
+
|
|
89
|
+
StructuredDatasetTransformerEngine.register(SnowflakeToPandasDecodingHandler())
|
|
90
|
+
StructuredDatasetTransformerEngine.register(PandasToSnowflakeEncodingHandlers())
|
|
91
|
+
|
|
92
|
+
except ImportError:
|
|
93
|
+
logger.info(
|
|
94
|
+
"We won't register snowflake handler for structured dataset because "
|
|
95
|
+
"we can't find package snowflake-connector-python"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def lazy_import_structured_dataset_handler():
|
|
100
|
+
if is_imported("pandas"):
|
|
101
|
+
try:
|
|
102
|
+
register_pandas_handlers()
|
|
103
|
+
register_csv_handlers()
|
|
104
|
+
except DuplicateHandlerError:
|
|
105
|
+
logger.debug("Transformer for pandas is already registered.")
|
|
106
|
+
if is_imported("pyarrow"):
|
|
107
|
+
try:
|
|
108
|
+
register_arrow_handlers()
|
|
109
|
+
except DuplicateHandlerError:
|
|
110
|
+
logger.debug("Transformer for arrow is already registered.")
|
|
111
|
+
if is_imported("google.cloud.bigquery"):
|
|
112
|
+
try:
|
|
113
|
+
register_bigquery_handlers()
|
|
114
|
+
except DuplicateHandlerError:
|
|
115
|
+
logger.debug("Transformer for bigquery is already registered.")
|
|
116
|
+
if is_imported("snowflake.connector"):
|
|
117
|
+
try:
|
|
118
|
+
register_snowflake_handlers()
|
|
119
|
+
except DuplicateHandlerError:
|
|
120
|
+
logger.debug("Transformer for snowflake is already registered.")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
__all__ = [
|
|
124
|
+
"StructuredDataset",
|
|
125
|
+
"StructuredDatasetDecoder",
|
|
126
|
+
"StructuredDatasetEncoder",
|
|
127
|
+
"StructuredDatasetTransformerEngine",
|
|
128
|
+
"lazy_import_structured_dataset_handler",
|
|
129
|
+
]
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import typing
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import TypeVar
|
|
5
|
+
|
|
6
|
+
from flyteidl.core import literals_pb2, types_pb2
|
|
7
|
+
from fsspec.core import split_protocol, strip_protocol
|
|
8
|
+
|
|
9
|
+
import flyte.storage as storage
|
|
10
|
+
from flyte._logging import logger
|
|
11
|
+
from flyte._utils import lazy_module
|
|
12
|
+
from flyte.io.structured_dataset.structured_dataset import (
|
|
13
|
+
CSV,
|
|
14
|
+
PARQUET,
|
|
15
|
+
StructuredDataset,
|
|
16
|
+
StructuredDatasetDecoder,
|
|
17
|
+
StructuredDatasetEncoder,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
if typing.TYPE_CHECKING:
|
|
21
|
+
import pandas as pd
|
|
22
|
+
import pyarrow as pa
|
|
23
|
+
else:
|
|
24
|
+
pd = lazy_module("pandas")
|
|
25
|
+
pa = lazy_module("pyarrow")
|
|
26
|
+
|
|
27
|
+
T = TypeVar("T")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# pr: add back after storage
|
|
31
|
+
def get_pandas_storage_options(uri: str, data_config=None, anonymous: bool = False) -> typing.Optional[typing.Dict]:
|
|
32
|
+
from pandas.io.common import is_fsspec_url # type: ignore
|
|
33
|
+
|
|
34
|
+
if is_fsspec_url(uri):
|
|
35
|
+
if uri.startswith("s3"):
|
|
36
|
+
# pr: after storage, replace with real call to get_fsspec_storage_options
|
|
37
|
+
return {
|
|
38
|
+
"cache_regions": True,
|
|
39
|
+
"client_kwargs": {"endpoint_url": "http://localhost:30002"},
|
|
40
|
+
"key": "minio",
|
|
41
|
+
"secret": "miniostorage",
|
|
42
|
+
}
|
|
43
|
+
return {}
|
|
44
|
+
|
|
45
|
+
# Pandas does not allow storage_options for non-fsspec paths e.g. local.
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class PandasToCSVEncodingHandler(StructuredDatasetEncoder):
|
|
50
|
+
def __init__(self):
|
|
51
|
+
super().__init__(pd.DataFrame, None, CSV)
|
|
52
|
+
|
|
53
|
+
async def encode(
|
|
54
|
+
self,
|
|
55
|
+
structured_dataset: StructuredDataset,
|
|
56
|
+
structured_dataset_type: types_pb2.StructuredDatasetType,
|
|
57
|
+
) -> literals_pb2.StructuredDataset:
|
|
58
|
+
if not structured_dataset.uri:
|
|
59
|
+
from flyte._context import internal_ctx
|
|
60
|
+
|
|
61
|
+
ctx = internal_ctx()
|
|
62
|
+
uri = ctx.raw_data.get_random_remote_path()
|
|
63
|
+
else:
|
|
64
|
+
uri = typing.cast(str, structured_dataset.uri)
|
|
65
|
+
|
|
66
|
+
if not storage.is_remote(uri):
|
|
67
|
+
Path(uri).mkdir(parents=True, exist_ok=True)
|
|
68
|
+
path = os.path.join(uri, ".csv")
|
|
69
|
+
df = typing.cast(pd.DataFrame, structured_dataset.dataframe)
|
|
70
|
+
df.to_csv(
|
|
71
|
+
path,
|
|
72
|
+
index=False,
|
|
73
|
+
storage_options=get_pandas_storage_options(uri=path, data_config=None),
|
|
74
|
+
)
|
|
75
|
+
structured_dataset_type.format = CSV
|
|
76
|
+
return literals_pb2.StructuredDataset(
|
|
77
|
+
uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type)
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class CSVToPandasDecodingHandler(StructuredDatasetDecoder):
|
|
82
|
+
def __init__(self):
|
|
83
|
+
super().__init__(pd.DataFrame, None, CSV)
|
|
84
|
+
|
|
85
|
+
async def decode(
|
|
86
|
+
self,
|
|
87
|
+
proto_value: literals_pb2.StructuredDataset,
|
|
88
|
+
current_task_metadata: literals_pb2.StructuredDatasetMetadata,
|
|
89
|
+
) -> "pd.DataFrame":
|
|
90
|
+
from botocore.exceptions import NoCredentialsError
|
|
91
|
+
|
|
92
|
+
uri = proto_value.uri
|
|
93
|
+
columns = None
|
|
94
|
+
kwargs = get_pandas_storage_options(uri=uri, data_config=None)
|
|
95
|
+
path = os.path.join(uri, ".csv")
|
|
96
|
+
if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
|
|
97
|
+
columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
|
|
98
|
+
try:
|
|
99
|
+
return pd.read_csv(path, usecols=columns, storage_options=kwargs)
|
|
100
|
+
except NoCredentialsError:
|
|
101
|
+
logger.debug("S3 source detected, attempting anonymous S3 access")
|
|
102
|
+
kwargs = get_pandas_storage_options(uri=uri, data_config=None, anonymous=True)
|
|
103
|
+
return pd.read_csv(path, usecols=columns, storage_options=kwargs)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class PandasToParquetEncodingHandler(StructuredDatasetEncoder):
|
|
107
|
+
def __init__(self):
|
|
108
|
+
super().__init__(pd.DataFrame, None, PARQUET)
|
|
109
|
+
|
|
110
|
+
async def encode(
|
|
111
|
+
self,
|
|
112
|
+
structured_dataset: StructuredDataset,
|
|
113
|
+
structured_dataset_type: types_pb2.StructuredDatasetType,
|
|
114
|
+
) -> literals_pb2.StructuredDataset:
|
|
115
|
+
if not structured_dataset.uri:
|
|
116
|
+
from flyte._context import internal_ctx
|
|
117
|
+
|
|
118
|
+
ctx = internal_ctx()
|
|
119
|
+
uri = str(ctx.raw_data.get_random_remote_path())
|
|
120
|
+
else:
|
|
121
|
+
uri = typing.cast(str, structured_dataset.uri)
|
|
122
|
+
|
|
123
|
+
if not storage.is_remote(uri):
|
|
124
|
+
Path(uri).mkdir(parents=True, exist_ok=True)
|
|
125
|
+
path = os.path.join(uri, f"{0:05}")
|
|
126
|
+
df = typing.cast(pd.DataFrame, structured_dataset.dataframe)
|
|
127
|
+
df.to_parquet(
|
|
128
|
+
path,
|
|
129
|
+
coerce_timestamps="us",
|
|
130
|
+
allow_truncated_timestamps=False,
|
|
131
|
+
storage_options=get_pandas_storage_options(uri=path, data_config=None),
|
|
132
|
+
)
|
|
133
|
+
structured_dataset_type.format = PARQUET
|
|
134
|
+
return literals_pb2.StructuredDataset(
|
|
135
|
+
uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type=structured_dataset_type)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class ParquetToPandasDecodingHandler(StructuredDatasetDecoder):
|
|
140
|
+
def __init__(self):
|
|
141
|
+
super().__init__(pd.DataFrame, None, PARQUET)
|
|
142
|
+
|
|
143
|
+
async def decode(
|
|
144
|
+
self,
|
|
145
|
+
flyte_value: literals_pb2.StructuredDataset,
|
|
146
|
+
current_task_metadata: literals_pb2.StructuredDatasetMetadata,
|
|
147
|
+
) -> "pd.DataFrame":
|
|
148
|
+
from botocore.exceptions import NoCredentialsError
|
|
149
|
+
|
|
150
|
+
uri = flyte_value.uri
|
|
151
|
+
columns = None
|
|
152
|
+
kwargs = get_pandas_storage_options(uri=uri, data_config=None)
|
|
153
|
+
if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
|
|
154
|
+
columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
|
|
155
|
+
try:
|
|
156
|
+
return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
|
|
157
|
+
except NoCredentialsError:
|
|
158
|
+
logger.debug("S3 source detected, attempting anonymous S3 access")
|
|
159
|
+
kwargs = get_pandas_storage_options(uri=uri, data_config=None, anonymous=True)
|
|
160
|
+
return pd.read_parquet(uri, columns=columns, storage_options=kwargs)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class ArrowToParquetEncodingHandler(StructuredDatasetEncoder):
|
|
164
|
+
def __init__(self):
|
|
165
|
+
super().__init__(pa.Table, None, PARQUET)
|
|
166
|
+
|
|
167
|
+
async def encode(
|
|
168
|
+
self,
|
|
169
|
+
structured_dataset: StructuredDataset,
|
|
170
|
+
structured_dataset_type: types_pb2.StructuredDatasetType,
|
|
171
|
+
) -> literals_pb2.StructuredDataset:
|
|
172
|
+
import pyarrow.parquet as pq
|
|
173
|
+
|
|
174
|
+
if not structured_dataset.uri:
|
|
175
|
+
from flyte._context import internal_ctx
|
|
176
|
+
|
|
177
|
+
ctx = internal_ctx()
|
|
178
|
+
uri = ctx.raw_data.get_random_remote_path()
|
|
179
|
+
else:
|
|
180
|
+
uri = typing.cast(str, structured_dataset.uri)
|
|
181
|
+
|
|
182
|
+
if not storage.is_remote(uri):
|
|
183
|
+
Path(uri).mkdir(parents=True, exist_ok=True)
|
|
184
|
+
path = os.path.join(uri, f"{0:05}")
|
|
185
|
+
filesystem = storage.get_underlying_filesystem(path=path)
|
|
186
|
+
pq.write_table(structured_dataset.dataframe, strip_protocol(path), filesystem=filesystem)
|
|
187
|
+
return literals_pb2.StructuredDataset(
|
|
188
|
+
uri=uri, metadata=literals_pb2.StructuredDatasetMetadata(structured_dataset_type)
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class ParquetToArrowDecodingHandler(StructuredDatasetDecoder):
|
|
193
|
+
def __init__(self):
|
|
194
|
+
super().__init__(pa.Table, None, PARQUET)
|
|
195
|
+
|
|
196
|
+
async def decode(
|
|
197
|
+
self,
|
|
198
|
+
proto_value: literals_pb2.StructuredDataset,
|
|
199
|
+
current_task_metadata: literals_pb2.StructuredDatasetMetadata,
|
|
200
|
+
) -> "pa.Table":
|
|
201
|
+
import pyarrow.parquet as pq
|
|
202
|
+
from botocore.exceptions import NoCredentialsError
|
|
203
|
+
|
|
204
|
+
uri = proto_value.uri
|
|
205
|
+
if not storage.is_remote(uri):
|
|
206
|
+
Path(uri).parent.mkdir(parents=True, exist_ok=True)
|
|
207
|
+
_, path = split_protocol(uri)
|
|
208
|
+
|
|
209
|
+
columns = None
|
|
210
|
+
if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
|
|
211
|
+
columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
|
|
212
|
+
try:
|
|
213
|
+
return pq.read_table(path, columns=columns)
|
|
214
|
+
except NoCredentialsError as e:
|
|
215
|
+
logger.debug("S3 source detected, attempting anonymous S3 access")
|
|
216
|
+
fs = storage.get_underlying_filesystem(path=uri, anonymous=True)
|
|
217
|
+
if fs is not None:
|
|
218
|
+
return pq.read_table(path, filesystem=fs, columns=columns)
|
|
219
|
+
raise e
|