flyte 0.0.1b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyte might be problematic. Click here for more details.
- flyte/__init__.py +62 -0
- flyte/_api_commons.py +3 -0
- flyte/_bin/__init__.py +0 -0
- flyte/_bin/runtime.py +126 -0
- flyte/_build.py +25 -0
- flyte/_cache/__init__.py +12 -0
- flyte/_cache/cache.py +146 -0
- flyte/_cache/defaults.py +9 -0
- flyte/_cache/policy_function_body.py +42 -0
- flyte/_cli/__init__.py +0 -0
- flyte/_cli/_common.py +287 -0
- flyte/_cli/_create.py +42 -0
- flyte/_cli/_delete.py +23 -0
- flyte/_cli/_deploy.py +140 -0
- flyte/_cli/_get.py +235 -0
- flyte/_cli/_run.py +152 -0
- flyte/_cli/main.py +72 -0
- flyte/_code_bundle/__init__.py +8 -0
- flyte/_code_bundle/_ignore.py +113 -0
- flyte/_code_bundle/_packaging.py +187 -0
- flyte/_code_bundle/_utils.py +339 -0
- flyte/_code_bundle/bundle.py +178 -0
- flyte/_context.py +146 -0
- flyte/_datastructures.py +342 -0
- flyte/_deploy.py +202 -0
- flyte/_doc.py +29 -0
- flyte/_docstring.py +32 -0
- flyte/_environment.py +43 -0
- flyte/_group.py +31 -0
- flyte/_hash.py +23 -0
- flyte/_image.py +760 -0
- flyte/_initialize.py +634 -0
- flyte/_interface.py +84 -0
- flyte/_internal/__init__.py +3 -0
- flyte/_internal/controllers/__init__.py +115 -0
- flyte/_internal/controllers/_local_controller.py +118 -0
- flyte/_internal/controllers/_trace.py +40 -0
- flyte/_internal/controllers/pbhash.py +39 -0
- flyte/_internal/controllers/remote/__init__.py +40 -0
- flyte/_internal/controllers/remote/_action.py +141 -0
- flyte/_internal/controllers/remote/_client.py +43 -0
- flyte/_internal/controllers/remote/_controller.py +361 -0
- flyte/_internal/controllers/remote/_core.py +402 -0
- flyte/_internal/controllers/remote/_informer.py +361 -0
- flyte/_internal/controllers/remote/_service_protocol.py +50 -0
- flyte/_internal/imagebuild/__init__.py +11 -0
- flyte/_internal/imagebuild/docker_builder.py +416 -0
- flyte/_internal/imagebuild/image_builder.py +241 -0
- flyte/_internal/imagebuild/remote_builder.py +0 -0
- flyte/_internal/resolvers/__init__.py +0 -0
- flyte/_internal/resolvers/_task_module.py +54 -0
- flyte/_internal/resolvers/common.py +31 -0
- flyte/_internal/resolvers/default.py +28 -0
- flyte/_internal/runtime/__init__.py +0 -0
- flyte/_internal/runtime/convert.py +199 -0
- flyte/_internal/runtime/entrypoints.py +135 -0
- flyte/_internal/runtime/io.py +136 -0
- flyte/_internal/runtime/resources_serde.py +138 -0
- flyte/_internal/runtime/task_serde.py +210 -0
- flyte/_internal/runtime/taskrunner.py +190 -0
- flyte/_internal/runtime/types_serde.py +54 -0
- flyte/_logging.py +124 -0
- flyte/_protos/__init__.py +0 -0
- flyte/_protos/common/authorization_pb2.py +66 -0
- flyte/_protos/common/authorization_pb2.pyi +108 -0
- flyte/_protos/common/authorization_pb2_grpc.py +4 -0
- flyte/_protos/common/identifier_pb2.py +71 -0
- flyte/_protos/common/identifier_pb2.pyi +82 -0
- flyte/_protos/common/identifier_pb2_grpc.py +4 -0
- flyte/_protos/common/identity_pb2.py +48 -0
- flyte/_protos/common/identity_pb2.pyi +72 -0
- flyte/_protos/common/identity_pb2_grpc.py +4 -0
- flyte/_protos/common/list_pb2.py +36 -0
- flyte/_protos/common/list_pb2.pyi +69 -0
- flyte/_protos/common/list_pb2_grpc.py +4 -0
- flyte/_protos/common/policy_pb2.py +37 -0
- flyte/_protos/common/policy_pb2.pyi +27 -0
- flyte/_protos/common/policy_pb2_grpc.py +4 -0
- flyte/_protos/common/role_pb2.py +37 -0
- flyte/_protos/common/role_pb2.pyi +53 -0
- flyte/_protos/common/role_pb2_grpc.py +4 -0
- flyte/_protos/common/runtime_version_pb2.py +28 -0
- flyte/_protos/common/runtime_version_pb2.pyi +24 -0
- flyte/_protos/common/runtime_version_pb2_grpc.py +4 -0
- flyte/_protos/logs/dataplane/payload_pb2.py +96 -0
- flyte/_protos/logs/dataplane/payload_pb2.pyi +168 -0
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/definition_pb2.py +49 -0
- flyte/_protos/secret/definition_pb2.pyi +93 -0
- flyte/_protos/secret/definition_pb2_grpc.py +4 -0
- flyte/_protos/secret/payload_pb2.py +62 -0
- flyte/_protos/secret/payload_pb2.pyi +94 -0
- flyte/_protos/secret/payload_pb2_grpc.py +4 -0
- flyte/_protos/secret/secret_pb2.py +38 -0
- flyte/_protos/secret/secret_pb2.pyi +6 -0
- flyte/_protos/secret/secret_pb2_grpc.py +198 -0
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +198 -0
- flyte/_protos/validate/validate/validate_pb2.py +76 -0
- flyte/_protos/workflow/node_execution_service_pb2.py +26 -0
- flyte/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- flyte/_protos/workflow/queue_service_pb2.py +106 -0
- flyte/_protos/workflow/queue_service_pb2.pyi +141 -0
- flyte/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- flyte/_protos/workflow/run_definition_pb2.py +128 -0
- flyte/_protos/workflow/run_definition_pb2.pyi +310 -0
- flyte/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/run_logs_service_pb2.py +41 -0
- flyte/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- flyte/_protos/workflow/run_service_pb2.py +133 -0
- flyte/_protos/workflow/run_service_pb2.pyi +175 -0
- flyte/_protos/workflow/run_service_pb2_grpc.py +412 -0
- flyte/_protos/workflow/state_service_pb2.py +58 -0
- flyte/_protos/workflow/state_service_pb2.pyi +71 -0
- flyte/_protos/workflow/state_service_pb2_grpc.py +138 -0
- flyte/_protos/workflow/task_definition_pb2.py +72 -0
- flyte/_protos/workflow/task_definition_pb2.pyi +65 -0
- flyte/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- flyte/_protos/workflow/task_service_pb2.py +44 -0
- flyte/_protos/workflow/task_service_pb2.pyi +31 -0
- flyte/_protos/workflow/task_service_pb2_grpc.py +104 -0
- flyte/_resources.py +226 -0
- flyte/_retry.py +32 -0
- flyte/_reusable_environment.py +25 -0
- flyte/_run.py +411 -0
- flyte/_secret.py +61 -0
- flyte/_task.py +367 -0
- flyte/_task_environment.py +200 -0
- flyte/_timeout.py +47 -0
- flyte/_tools.py +27 -0
- flyte/_trace.py +128 -0
- flyte/_utils/__init__.py +20 -0
- flyte/_utils/asyn.py +119 -0
- flyte/_utils/coro_management.py +25 -0
- flyte/_utils/file_handling.py +72 -0
- flyte/_utils/helpers.py +108 -0
- flyte/_utils/lazy_module.py +54 -0
- flyte/_utils/uv_script_parser.py +49 -0
- flyte/_version.py +21 -0
- flyte/connectors/__init__.py +0 -0
- flyte/errors.py +143 -0
- flyte/extras/__init__.py +5 -0
- flyte/extras/_container.py +273 -0
- flyte/io/__init__.py +11 -0
- flyte/io/_dataframe.py +0 -0
- flyte/io/_dir.py +448 -0
- flyte/io/_file.py +468 -0
- flyte/io/pickle/__init__.py +0 -0
- flyte/io/pickle/transformer.py +117 -0
- flyte/io/structured_dataset/__init__.py +129 -0
- flyte/io/structured_dataset/basic_dfs.py +219 -0
- flyte/io/structured_dataset/structured_dataset.py +1061 -0
- flyte/py.typed +0 -0
- flyte/remote/__init__.py +25 -0
- flyte/remote/_client/__init__.py +0 -0
- flyte/remote/_client/_protocols.py +131 -0
- flyte/remote/_client/auth/__init__.py +12 -0
- flyte/remote/_client/auth/_authenticators/__init__.py +0 -0
- flyte/remote/_client/auth/_authenticators/base.py +397 -0
- flyte/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- flyte/remote/_client/auth/_authenticators/device_code.py +118 -0
- flyte/remote/_client/auth/_authenticators/external_command.py +79 -0
- flyte/remote/_client/auth/_authenticators/factory.py +200 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +516 -0
- flyte/remote/_client/auth/_channel.py +184 -0
- flyte/remote/_client/auth/_client_config.py +83 -0
- flyte/remote/_client/auth/_default_html.py +32 -0
- flyte/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- flyte/remote/_client/auth/_grpc_utils/auth_interceptor.py +288 -0
- flyte/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +151 -0
- flyte/remote/_client/auth/_keyring.py +143 -0
- flyte/remote/_client/auth/_token_client.py +260 -0
- flyte/remote/_client/auth/errors.py +16 -0
- flyte/remote/_client/controlplane.py +95 -0
- flyte/remote/_console.py +18 -0
- flyte/remote/_data.py +155 -0
- flyte/remote/_logs.py +116 -0
- flyte/remote/_project.py +86 -0
- flyte/remote/_run.py +873 -0
- flyte/remote/_secret.py +132 -0
- flyte/remote/_task.py +227 -0
- flyte/report/__init__.py +3 -0
- flyte/report/_report.py +178 -0
- flyte/report/_template.html +124 -0
- flyte/storage/__init__.py +24 -0
- flyte/storage/_remote_fs.py +34 -0
- flyte/storage/_storage.py +251 -0
- flyte/storage/_utils.py +5 -0
- flyte/types/__init__.py +13 -0
- flyte/types/_interface.py +25 -0
- flyte/types/_renderer.py +162 -0
- flyte/types/_string_literals.py +120 -0
- flyte/types/_type_engine.py +2210 -0
- flyte/types/_utils.py +80 -0
- flyte-0.0.1b0.dist-info/METADATA +179 -0
- flyte-0.0.1b0.dist-info/RECORD +390 -0
- flyte-0.0.1b0.dist-info/WHEEL +5 -0
- flyte-0.0.1b0.dist-info/entry_points.txt +3 -0
- flyte-0.0.1b0.dist-info/top_level.txt +1 -0
- union/__init__.py +54 -0
- union/_api_commons.py +3 -0
- union/_bin/__init__.py +0 -0
- union/_bin/runtime.py +113 -0
- union/_build.py +25 -0
- union/_cache/__init__.py +12 -0
- union/_cache/cache.py +141 -0
- union/_cache/defaults.py +9 -0
- union/_cache/policy_function_body.py +42 -0
- union/_cli/__init__.py +0 -0
- union/_cli/_common.py +263 -0
- union/_cli/_create.py +40 -0
- union/_cli/_delete.py +23 -0
- union/_cli/_deploy.py +120 -0
- union/_cli/_get.py +162 -0
- union/_cli/_params.py +579 -0
- union/_cli/_run.py +150 -0
- union/_cli/main.py +72 -0
- union/_code_bundle/__init__.py +8 -0
- union/_code_bundle/_ignore.py +113 -0
- union/_code_bundle/_packaging.py +187 -0
- union/_code_bundle/_utils.py +342 -0
- union/_code_bundle/bundle.py +176 -0
- union/_context.py +146 -0
- union/_datastructures.py +295 -0
- union/_deploy.py +185 -0
- union/_doc.py +29 -0
- union/_docstring.py +26 -0
- union/_environment.py +43 -0
- union/_group.py +31 -0
- union/_hash.py +23 -0
- union/_image.py +760 -0
- union/_initialize.py +585 -0
- union/_interface.py +84 -0
- union/_internal/__init__.py +3 -0
- union/_internal/controllers/__init__.py +77 -0
- union/_internal/controllers/_local_controller.py +77 -0
- union/_internal/controllers/pbhash.py +39 -0
- union/_internal/controllers/remote/__init__.py +40 -0
- union/_internal/controllers/remote/_action.py +131 -0
- union/_internal/controllers/remote/_client.py +43 -0
- union/_internal/controllers/remote/_controller.py +169 -0
- union/_internal/controllers/remote/_core.py +341 -0
- union/_internal/controllers/remote/_informer.py +260 -0
- union/_internal/controllers/remote/_service_protocol.py +44 -0
- union/_internal/imagebuild/__init__.py +11 -0
- union/_internal/imagebuild/docker_builder.py +416 -0
- union/_internal/imagebuild/image_builder.py +243 -0
- union/_internal/imagebuild/remote_builder.py +0 -0
- union/_internal/resolvers/__init__.py +0 -0
- union/_internal/resolvers/_task_module.py +31 -0
- union/_internal/resolvers/common.py +24 -0
- union/_internal/resolvers/default.py +27 -0
- union/_internal/runtime/__init__.py +0 -0
- union/_internal/runtime/convert.py +163 -0
- union/_internal/runtime/entrypoints.py +121 -0
- union/_internal/runtime/io.py +136 -0
- union/_internal/runtime/resources_serde.py +134 -0
- union/_internal/runtime/task_serde.py +202 -0
- union/_internal/runtime/taskrunner.py +179 -0
- union/_internal/runtime/types_serde.py +53 -0
- union/_logging.py +124 -0
- union/_protos/__init__.py +0 -0
- union/_protos/common/authorization_pb2.py +66 -0
- union/_protos/common/authorization_pb2.pyi +106 -0
- union/_protos/common/authorization_pb2_grpc.py +4 -0
- union/_protos/common/identifier_pb2.py +71 -0
- union/_protos/common/identifier_pb2.pyi +82 -0
- union/_protos/common/identifier_pb2_grpc.py +4 -0
- union/_protos/common/identity_pb2.py +48 -0
- union/_protos/common/identity_pb2.pyi +72 -0
- union/_protos/common/identity_pb2_grpc.py +4 -0
- union/_protos/common/list_pb2.py +36 -0
- union/_protos/common/list_pb2.pyi +69 -0
- union/_protos/common/list_pb2_grpc.py +4 -0
- union/_protos/common/policy_pb2.py +37 -0
- union/_protos/common/policy_pb2.pyi +27 -0
- union/_protos/common/policy_pb2_grpc.py +4 -0
- union/_protos/common/role_pb2.py +37 -0
- union/_protos/common/role_pb2.pyi +51 -0
- union/_protos/common/role_pb2_grpc.py +4 -0
- union/_protos/common/runtime_version_pb2.py +28 -0
- union/_protos/common/runtime_version_pb2.pyi +24 -0
- union/_protos/common/runtime_version_pb2_grpc.py +4 -0
- union/_protos/logs/dataplane/payload_pb2.py +96 -0
- union/_protos/logs/dataplane/payload_pb2.pyi +168 -0
- union/_protos/logs/dataplane/payload_pb2_grpc.py +4 -0
- union/_protos/secret/definition_pb2.py +49 -0
- union/_protos/secret/definition_pb2.pyi +93 -0
- union/_protos/secret/definition_pb2_grpc.py +4 -0
- union/_protos/secret/payload_pb2.py +62 -0
- union/_protos/secret/payload_pb2.pyi +94 -0
- union/_protos/secret/payload_pb2_grpc.py +4 -0
- union/_protos/secret/secret_pb2.py +38 -0
- union/_protos/secret/secret_pb2.pyi +6 -0
- union/_protos/secret/secret_pb2_grpc.py +198 -0
- union/_protos/validate/validate/validate_pb2.py +76 -0
- union/_protos/workflow/node_execution_service_pb2.py +26 -0
- union/_protos/workflow/node_execution_service_pb2.pyi +4 -0
- union/_protos/workflow/node_execution_service_pb2_grpc.py +32 -0
- union/_protos/workflow/queue_service_pb2.py +75 -0
- union/_protos/workflow/queue_service_pb2.pyi +103 -0
- union/_protos/workflow/queue_service_pb2_grpc.py +172 -0
- union/_protos/workflow/run_definition_pb2.py +100 -0
- union/_protos/workflow/run_definition_pb2.pyi +256 -0
- union/_protos/workflow/run_definition_pb2_grpc.py +4 -0
- union/_protos/workflow/run_logs_service_pb2.py +41 -0
- union/_protos/workflow/run_logs_service_pb2.pyi +28 -0
- union/_protos/workflow/run_logs_service_pb2_grpc.py +69 -0
- union/_protos/workflow/run_service_pb2.py +133 -0
- union/_protos/workflow/run_service_pb2.pyi +173 -0
- union/_protos/workflow/run_service_pb2_grpc.py +412 -0
- union/_protos/workflow/state_service_pb2.py +58 -0
- union/_protos/workflow/state_service_pb2.pyi +69 -0
- union/_protos/workflow/state_service_pb2_grpc.py +138 -0
- union/_protos/workflow/task_definition_pb2.py +72 -0
- union/_protos/workflow/task_definition_pb2.pyi +65 -0
- union/_protos/workflow/task_definition_pb2_grpc.py +4 -0
- union/_protos/workflow/task_service_pb2.py +44 -0
- union/_protos/workflow/task_service_pb2.pyi +31 -0
- union/_protos/workflow/task_service_pb2_grpc.py +104 -0
- union/_resources.py +226 -0
- union/_retry.py +32 -0
- union/_reusable_environment.py +25 -0
- union/_run.py +374 -0
- union/_secret.py +61 -0
- union/_task.py +354 -0
- union/_task_environment.py +186 -0
- union/_timeout.py +47 -0
- union/_tools.py +27 -0
- union/_utils/__init__.py +11 -0
- union/_utils/asyn.py +119 -0
- union/_utils/file_handling.py +71 -0
- union/_utils/helpers.py +46 -0
- union/_utils/lazy_module.py +54 -0
- union/_utils/uv_script_parser.py +49 -0
- union/_version.py +21 -0
- union/connectors/__init__.py +0 -0
- union/errors.py +128 -0
- union/extras/__init__.py +5 -0
- union/extras/_container.py +263 -0
- union/io/__init__.py +11 -0
- union/io/_dataframe.py +0 -0
- union/io/_dir.py +425 -0
- union/io/_file.py +418 -0
- union/io/pickle/__init__.py +0 -0
- union/io/pickle/transformer.py +117 -0
- union/io/structured_dataset/__init__.py +122 -0
- union/io/structured_dataset/basic_dfs.py +219 -0
- union/io/structured_dataset/structured_dataset.py +1057 -0
- union/py.typed +0 -0
- union/remote/__init__.py +23 -0
- union/remote/_client/__init__.py +0 -0
- union/remote/_client/_protocols.py +129 -0
- union/remote/_client/auth/__init__.py +12 -0
- union/remote/_client/auth/_authenticators/__init__.py +0 -0
- union/remote/_client/auth/_authenticators/base.py +391 -0
- union/remote/_client/auth/_authenticators/client_credentials.py +73 -0
- union/remote/_client/auth/_authenticators/device_code.py +120 -0
- union/remote/_client/auth/_authenticators/external_command.py +77 -0
- union/remote/_client/auth/_authenticators/factory.py +200 -0
- union/remote/_client/auth/_authenticators/pkce.py +515 -0
- union/remote/_client/auth/_channel.py +184 -0
- union/remote/_client/auth/_client_config.py +83 -0
- union/remote/_client/auth/_default_html.py +32 -0
- union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- union/remote/_client/auth/_grpc_utils/auth_interceptor.py +204 -0
- union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +144 -0
- union/remote/_client/auth/_keyring.py +154 -0
- union/remote/_client/auth/_token_client.py +258 -0
- union/remote/_client/auth/errors.py +16 -0
- union/remote/_client/controlplane.py +86 -0
- union/remote/_data.py +149 -0
- union/remote/_logs.py +74 -0
- union/remote/_project.py +86 -0
- union/remote/_run.py +820 -0
- union/remote/_secret.py +132 -0
- union/remote/_task.py +193 -0
- union/report/__init__.py +3 -0
- union/report/_report.py +178 -0
- union/report/_template.html +124 -0
- union/storage/__init__.py +24 -0
- union/storage/_remote_fs.py +34 -0
- union/storage/_storage.py +247 -0
- union/storage/_utils.py +5 -0
- union/types/__init__.py +11 -0
- union/types/_renderer.py +162 -0
- union/types/_string_literals.py +120 -0
- union/types/_type_engine.py +2131 -0
- union/types/_utils.py +80 -0
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import gzip
|
|
4
|
+
import hashlib
|
|
5
|
+
import importlib.util
|
|
6
|
+
import os
|
|
7
|
+
import pathlib
|
|
8
|
+
import shutil
|
|
9
|
+
import site
|
|
10
|
+
import stat
|
|
11
|
+
import sys
|
|
12
|
+
import tarfile
|
|
13
|
+
import tempfile
|
|
14
|
+
import typing
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from functools import lru_cache
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from types import ModuleType
|
|
19
|
+
from typing import List, Literal, Optional, Tuple, Union
|
|
20
|
+
|
|
21
|
+
from union._logging import logger
|
|
22
|
+
|
|
23
|
+
from ._ignore import IgnoreGroup
|
|
24
|
+
|
|
25
|
+
CopyFiles = Literal["loaded_modules", "all", "none"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def compress_scripts(source_path: str, destination: str, modules: List[ModuleType]):
|
|
29
|
+
"""
|
|
30
|
+
Compresses the single script while maintaining the folder structure for that file.
|
|
31
|
+
|
|
32
|
+
For example, given the follow file structure:
|
|
33
|
+
.
|
|
34
|
+
├── flyte
|
|
35
|
+
├── __init__.py
|
|
36
|
+
└── workflows
|
|
37
|
+
├── example.py
|
|
38
|
+
├── another_example.py
|
|
39
|
+
├── yet_another_example.py
|
|
40
|
+
├── unused_example.py
|
|
41
|
+
└── __init__.py
|
|
42
|
+
|
|
43
|
+
Let's say you want to compress `example.py` imports `another_example.py`. And `another_example.py`
|
|
44
|
+
imports on `yet_another_example.py`. This will produce a tar file that contains only that
|
|
45
|
+
file alongside with the folder structure, i.e.:
|
|
46
|
+
|
|
47
|
+
.
|
|
48
|
+
├── flyte
|
|
49
|
+
├── __init__.py
|
|
50
|
+
└── workflows
|
|
51
|
+
├── example.py
|
|
52
|
+
├── another_example.py
|
|
53
|
+
├── yet_another_example.py
|
|
54
|
+
└── __init__.py
|
|
55
|
+
|
|
56
|
+
"""
|
|
57
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
58
|
+
destination_path = os.path.join(tmp_dir, "code")
|
|
59
|
+
os.mkdir(destination_path)
|
|
60
|
+
add_imported_modules_from_source(source_path, destination_path, modules)
|
|
61
|
+
|
|
62
|
+
tar_path = os.path.join(tmp_dir, "tmp.tar")
|
|
63
|
+
with tarfile.open(tar_path, "w") as tar:
|
|
64
|
+
tmp_path: str = os.path.join(tmp_dir, "code")
|
|
65
|
+
files: typing.List[str] = os.listdir(tmp_path)
|
|
66
|
+
for ws_file in files:
|
|
67
|
+
tar.add(os.path.join(tmp_path, ws_file), arcname=ws_file, filter=tar_strip_file_attributes)
|
|
68
|
+
with gzip.GzipFile(filename=destination, mode="wb", mtime=0) as gzipped:
|
|
69
|
+
with open(tar_path, "rb") as tar_file:
|
|
70
|
+
gzipped.write(tar_file.read())
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# Takes in a TarInfo and returns the modified TarInfo:
|
|
74
|
+
# https://docs.python.org/3/library/tarfile.html#tarinfo-objects
|
|
75
|
+
# intended to be passed as a filter to tarfile.add
|
|
76
|
+
# https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.add
|
|
77
|
+
def tar_strip_file_attributes(tar_info: tarfile.TarInfo) -> tarfile.TarInfo:
|
|
78
|
+
# set time to epoch timestamp 0, aka 00:00:00 UTC on 1 January 1980
|
|
79
|
+
# note that when extracting this tarfile, this time will be shown as the modified date
|
|
80
|
+
tar_info.mtime = datetime(1980, 1, 1, tzinfo=timezone.utc).timestamp()
|
|
81
|
+
|
|
82
|
+
# user/group info
|
|
83
|
+
tar_info.uid = 0
|
|
84
|
+
tar_info.uname = ""
|
|
85
|
+
tar_info.gid = 0
|
|
86
|
+
tar_info.gname = ""
|
|
87
|
+
|
|
88
|
+
# stripping paxheaders may not be required
|
|
89
|
+
# see https://stackoverflow.com/questions/34688392/paxheaders-in-tarball
|
|
90
|
+
tar_info.pax_headers = {}
|
|
91
|
+
|
|
92
|
+
return tar_info
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def ls_files(
|
|
96
|
+
source_path: pathlib.Path,
|
|
97
|
+
copy_file_detection: CopyFiles,
|
|
98
|
+
deref_symlinks: bool = False,
|
|
99
|
+
ignore_group: Optional[IgnoreGroup] = None,
|
|
100
|
+
) -> Tuple[List[str], str]:
|
|
101
|
+
"""
|
|
102
|
+
user_modules_and_packages is a list of the Python modules and packages, expressed as absolute paths, that the
|
|
103
|
+
user has run this pyflyte command with. For pyflyte run for instance, this is just a list of one.
|
|
104
|
+
This is used for two reasons.
|
|
105
|
+
- Everything in this list needs to be returned. Files are returned and folders are walked.
|
|
106
|
+
- A common source path is derived from this is, which is just the common folder that contains everything in the
|
|
107
|
+
list. For ex. if you do
|
|
108
|
+
$ pyflyte --pkgs a.b,a.c package
|
|
109
|
+
Then the common root is just the folder a/. The modules list is filtered against this root. Only files
|
|
110
|
+
representing modules under this root are included
|
|
111
|
+
|
|
112
|
+
If the copy enum is set to loaded_modules, then the loaded sys modules will be used.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
# Unlike the below, the value error here is useful and should be returned to the user, like if absolute and
|
|
116
|
+
# relative paths are mixed.
|
|
117
|
+
|
|
118
|
+
# This is --copy auto
|
|
119
|
+
if copy_file_detection == "loaded_modules":
|
|
120
|
+
sys_modules = list(sys.modules.values())
|
|
121
|
+
all_files = list_imported_modules_as_files(str(source_path), sys_modules)
|
|
122
|
+
# this is --copy all (--copy none should never invoke this function)
|
|
123
|
+
else:
|
|
124
|
+
all_files = list_all_files(source_path, deref_symlinks, ignore_group)
|
|
125
|
+
|
|
126
|
+
all_files.sort()
|
|
127
|
+
hasher = hashlib.md5()
|
|
128
|
+
for abspath in all_files:
|
|
129
|
+
relpath = os.path.relpath(abspath, source_path)
|
|
130
|
+
_filehash_update(abspath, hasher)
|
|
131
|
+
_pathhash_update(relpath, hasher)
|
|
132
|
+
|
|
133
|
+
digest = hasher.hexdigest()
|
|
134
|
+
|
|
135
|
+
return all_files, digest
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _filehash_update(path: Union[os.PathLike, str], hasher: hashlib._Hash) -> None:
|
|
139
|
+
blocksize = 65536
|
|
140
|
+
with open(path, "rb") as f:
|
|
141
|
+
bytes = f.read(blocksize)
|
|
142
|
+
while bytes:
|
|
143
|
+
hasher.update(bytes)
|
|
144
|
+
bytes = f.read(blocksize)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _pathhash_update(path: Union[os.PathLike, str], hasher: hashlib._Hash) -> None:
|
|
148
|
+
path_list = path.split(os.sep)
|
|
149
|
+
hasher.update("".join(path_list).encode("utf-8"))
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
EXCLUDE_DIRS = {".git"}
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def list_all_files(source_path: pathlib.Path, deref_symlinks, ignore_group: Optional[IgnoreGroup] = None) -> List[str]:
|
|
156
|
+
all_files = []
|
|
157
|
+
|
|
158
|
+
# This is needed to prevent infinite recursion when walking with followlinks
|
|
159
|
+
visited_inodes = set()
|
|
160
|
+
for root, dirnames, files in source_path.walk(top_down=True, follow_symlinks=deref_symlinks):
|
|
161
|
+
dirnames[:] = [d for d in dirnames if d not in EXCLUDE_DIRS]
|
|
162
|
+
if deref_symlinks:
|
|
163
|
+
inode = os.stat(root).st_ino
|
|
164
|
+
if inode in visited_inodes:
|
|
165
|
+
continue
|
|
166
|
+
visited_inodes.add(inode)
|
|
167
|
+
|
|
168
|
+
ff = []
|
|
169
|
+
files.sort()
|
|
170
|
+
for fname in files:
|
|
171
|
+
abspath = (root / fname).absolute()
|
|
172
|
+
# Only consider files that exist (e.g. disregard symlinks that point to non-existent files)
|
|
173
|
+
if not os.path.exists(abspath):
|
|
174
|
+
logger.info(f"Skipping non-existent file {abspath}")
|
|
175
|
+
continue
|
|
176
|
+
# Skip socket files
|
|
177
|
+
if stat.S_ISSOCK(os.stat(abspath).st_mode):
|
|
178
|
+
logger.info(f"Skip socket file {abspath}")
|
|
179
|
+
continue
|
|
180
|
+
if ignore_group:
|
|
181
|
+
if ignore_group.is_ignored(abspath):
|
|
182
|
+
continue
|
|
183
|
+
|
|
184
|
+
ff.append(abspath)
|
|
185
|
+
all_files.extend(ff)
|
|
186
|
+
|
|
187
|
+
# Remove directories that we've already visited from dirnames
|
|
188
|
+
if deref_symlinks:
|
|
189
|
+
dirnames[:] = [d for d in dirnames if os.stat(os.path.join(root, d)).st_ino not in visited_inodes]
|
|
190
|
+
|
|
191
|
+
return all_files
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _file_is_in_directory(file: str, directory: str) -> bool:
|
|
195
|
+
"""Return True if file is in directory and in its children."""
|
|
196
|
+
try:
|
|
197
|
+
return os.path.commonpath([file, directory]) == directory
|
|
198
|
+
except ValueError as e:
|
|
199
|
+
# ValueError is raised by windows if the paths are not from the same drive
|
|
200
|
+
logger.debug(f"{file} and {directory} are not in the same drive: {e!s}")
|
|
201
|
+
return False
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def list_imported_modules_as_files(source_path: str, modules: List[ModuleType]) -> List[str]:
|
|
205
|
+
"""Copies modules into destination that are in modules. The module files are copied only if:
|
|
206
|
+
|
|
207
|
+
1. Not a site-packages. These are installed packages and not user files.
|
|
208
|
+
2. Not in the sys.base_prefix or sys.prefix. These are also installed and not user files.
|
|
209
|
+
3. Does not share a common path with the source_path.
|
|
210
|
+
"""
|
|
211
|
+
# source path is the folder holding the main script.
|
|
212
|
+
# but in register/package case, there are multiple folders.
|
|
213
|
+
# identify a common root amongst the packages listed?
|
|
214
|
+
|
|
215
|
+
import union
|
|
216
|
+
from union._utils.lazy_module import is_imported
|
|
217
|
+
|
|
218
|
+
files = []
|
|
219
|
+
union_root = os.path.dirname(union.__file__)
|
|
220
|
+
|
|
221
|
+
# These directories contain installed packages or modules from the Python standard library.
|
|
222
|
+
# If a module is from these directories, then they are not user files.
|
|
223
|
+
invalid_directories = [union_root, sys.prefix, sys.base_prefix, site.getusersitepackages(), *site.getsitepackages()]
|
|
224
|
+
|
|
225
|
+
for mod in modules:
|
|
226
|
+
# Be careful not to import a module with the .__file__ call if not yet imported.
|
|
227
|
+
if "LazyModule" in object.__getattribute__(mod, "__class__").__name__:
|
|
228
|
+
name = object.__getattribute__(mod, "__name__")
|
|
229
|
+
if is_imported(name):
|
|
230
|
+
mod_file = mod.__file__
|
|
231
|
+
else:
|
|
232
|
+
continue
|
|
233
|
+
else:
|
|
234
|
+
try:
|
|
235
|
+
mod_file = mod.__file__
|
|
236
|
+
except AttributeError:
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
if mod_file is None:
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
if any(_file_is_in_directory(mod_file, directory) for directory in invalid_directories):
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
if not _file_is_in_directory(mod_file, source_path):
|
|
246
|
+
# Only upload files where the module file in the source directory
|
|
247
|
+
continue
|
|
248
|
+
|
|
249
|
+
files.append(mod_file)
|
|
250
|
+
|
|
251
|
+
return files
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def add_imported_modules_from_source(source_path: str, destination: str, modules: List[ModuleType]):
|
|
255
|
+
"""Copies modules into destination that are in modules. The module files are copied only if:
|
|
256
|
+
|
|
257
|
+
1. Not a site-packages. These are installed packages and not user files.
|
|
258
|
+
2. Not in the sys.base_prefix or sys.prefix. These are also installed and not user files.
|
|
259
|
+
3. Does not share a common path with the source_path.
|
|
260
|
+
"""
|
|
261
|
+
# source path is the folder holding the main script.
|
|
262
|
+
# but in register/package case, there are multiple folders.
|
|
263
|
+
# identify a common root amongst the packages listed?
|
|
264
|
+
|
|
265
|
+
files = list_imported_modules_as_files(source_path, modules)
|
|
266
|
+
for file in files:
|
|
267
|
+
relative_path = os.path.relpath(file, start=source_path)
|
|
268
|
+
new_destination = os.path.join(destination, relative_path)
|
|
269
|
+
|
|
270
|
+
if os.path.exists(new_destination):
|
|
271
|
+
# No need to copy if it already exists
|
|
272
|
+
continue
|
|
273
|
+
|
|
274
|
+
os.makedirs(os.path.dirname(new_destination), exist_ok=True)
|
|
275
|
+
shutil.copy(file, new_destination)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def import_module_from_file(module_name, file):
|
|
279
|
+
try:
|
|
280
|
+
spec = importlib.util.spec_from_file_location(module_name, file)
|
|
281
|
+
module = importlib.util.module_from_spec(spec)
|
|
282
|
+
return module
|
|
283
|
+
except Exception as exc:
|
|
284
|
+
raise ModuleNotFoundError(f"Module from file {file} cannot be loaded") from exc
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def get_all_modules(source_path: str, module_name: Optional[str]) -> List[ModuleType]:
|
|
288
|
+
"""Import python file with module_name in source_path and return all modules."""
|
|
289
|
+
sys_modules = list(sys.modules.values())
|
|
290
|
+
if module_name is None or module_name in sys.modules:
|
|
291
|
+
# module already exists, there is no need to import it again
|
|
292
|
+
return sys_modules
|
|
293
|
+
|
|
294
|
+
full_module = os.path.join(source_path, *module_name.split("."))
|
|
295
|
+
full_module_path = f"{full_module}.py"
|
|
296
|
+
|
|
297
|
+
is_python_file = os.path.exists(full_module_path) and os.path.isfile(full_module_path)
|
|
298
|
+
if not is_python_file:
|
|
299
|
+
return sys_modules
|
|
300
|
+
|
|
301
|
+
try:
|
|
302
|
+
new_module = import_module_from_file(module_name, full_module_path)
|
|
303
|
+
return [*sys_modules, new_module]
|
|
304
|
+
except Exception as exc:
|
|
305
|
+
logger.error(f"Using system modules, failed to import {module_name} from {full_module_path}: {exc!s}")
|
|
306
|
+
# Import failed so we fallback to `sys_modules`
|
|
307
|
+
return sys_modules
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
@lru_cache
|
|
311
|
+
def hash_file(file_path: typing.Union[os.PathLike, str]) -> (bytes, str, int):
|
|
312
|
+
"""
|
|
313
|
+
Hash a file and produce a digest to be used as a version
|
|
314
|
+
"""
|
|
315
|
+
h = hashlib.md5()
|
|
316
|
+
size = 0
|
|
317
|
+
|
|
318
|
+
with open(file_path, "rb") as file:
|
|
319
|
+
while True:
|
|
320
|
+
# Reading is buffered, so we can read smaller chunks.
|
|
321
|
+
chunk = file.read(h.block_size)
|
|
322
|
+
if not chunk:
|
|
323
|
+
break
|
|
324
|
+
h.update(chunk)
|
|
325
|
+
size += len(chunk)
|
|
326
|
+
|
|
327
|
+
return h.digest(), h.hexdigest(), size
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _find_project_root(source_path) -> str:
|
|
331
|
+
"""
|
|
332
|
+
Find the root of the project.
|
|
333
|
+
The root of the project is considered to be the first ancestor from source_path that does
|
|
334
|
+
not contain a __init__.py file.
|
|
335
|
+
|
|
336
|
+
N.B.: This assumption only holds for regular packages (as opposed to namespace packages)
|
|
337
|
+
"""
|
|
338
|
+
# Start from the directory right above source_path
|
|
339
|
+
path = Path(source_path).parent.resolve()
|
|
340
|
+
while os.path.exists(os.path.join(path, "__init__.py")):
|
|
341
|
+
path = path.parent
|
|
342
|
+
return str(path)
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import gzip
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import pathlib
|
|
6
|
+
import tempfile
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from flyteidl.core.tasks_pb2 import TaskTemplate
|
|
10
|
+
|
|
11
|
+
import union.storage as storage
|
|
12
|
+
from union._datastructures import CodeBundle
|
|
13
|
+
from union._logging import log, logger
|
|
14
|
+
|
|
15
|
+
from ._ignore import GitIgnore, Ignore, StandardIgnore
|
|
16
|
+
from ._packaging import create_bundle, list_files_to_bundle, print_ls_tree
|
|
17
|
+
from ._utils import CopyFiles, hash_file
|
|
18
|
+
|
|
19
|
+
_pickled_file_extension = ".pkl.gz"
|
|
20
|
+
_tar_file_extension = ".tar.gz"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def build_pkl_bundle(
|
|
24
|
+
o: TaskTemplate,
|
|
25
|
+
upload_to_controlplane: bool = True,
|
|
26
|
+
upload_from_dataplane_path: str | None = None,
|
|
27
|
+
copy_bundle_to: pathlib.Path | None = None,
|
|
28
|
+
) -> CodeBundle:
|
|
29
|
+
"""
|
|
30
|
+
Build a Pickled for the given task.
|
|
31
|
+
|
|
32
|
+
TODO We can optimize this by having an LRU cache for the function, this is so that if the same task is being
|
|
33
|
+
pickled multiple times, we can avoid the overhead of pickling it multiple times, by copying to a common place
|
|
34
|
+
and reusing based on task hash.
|
|
35
|
+
|
|
36
|
+
:param o: Object to be pickled. This is the task template.
|
|
37
|
+
:param upload_to_controlplane: Whether to upload the pickled file to the control plane or not
|
|
38
|
+
:param upload_from_dataplane_path: If we are on the dataplane, this is the path where the
|
|
39
|
+
pickled file should be uploaded to. upload_to_controlplane has to be False in this case.
|
|
40
|
+
:param copy_bundle_to: If set, the bundle will be copied to this path. This is used for testing purposes.
|
|
41
|
+
:return: CodeBundle object containing the pickled file path and the computed version.
|
|
42
|
+
"""
|
|
43
|
+
import cloudpickle
|
|
44
|
+
|
|
45
|
+
import union.storage as storage
|
|
46
|
+
|
|
47
|
+
if upload_to_controlplane and upload_from_dataplane_path:
|
|
48
|
+
raise ValueError("Cannot upload to control plane and upload from dataplane path at the same time.")
|
|
49
|
+
|
|
50
|
+
logger.debug("Building pickled code bundle.")
|
|
51
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
52
|
+
dest = pathlib.Path(tmp_dir) / f"code_bundle{_pickled_file_extension}"
|
|
53
|
+
with gzip.GzipFile(filename=dest, mode="wb", mtime=0) as gzipped:
|
|
54
|
+
cloudpickle.dump(o, gzipped)
|
|
55
|
+
|
|
56
|
+
if upload_to_controlplane:
|
|
57
|
+
logger.debug("Uploading pickled code bundle to control plane.")
|
|
58
|
+
from union.remote import upload_file
|
|
59
|
+
|
|
60
|
+
hash_digest, remote_path = await upload_file(dest)
|
|
61
|
+
return CodeBundle(pkl=remote_path, computed_version=hash_digest)
|
|
62
|
+
|
|
63
|
+
elif upload_from_dataplane_path:
|
|
64
|
+
logger.debug(f"Uploading pickled code bundle to dataplane path {upload_from_dataplane_path}.")
|
|
65
|
+
_, str_digest, _ = hash_file(file_path=dest)
|
|
66
|
+
final_path = await storage.put(str(dest), upload_from_dataplane_path)
|
|
67
|
+
return CodeBundle(pkl=final_path, computed_version=str_digest)
|
|
68
|
+
|
|
69
|
+
else:
|
|
70
|
+
logger.debug("Dryrun enabled, not uploading pickled code bundle.")
|
|
71
|
+
_, str_digest, _ = hash_file(file_path=dest)
|
|
72
|
+
if copy_bundle_to:
|
|
73
|
+
import shutil
|
|
74
|
+
|
|
75
|
+
# Copy the bundle to the given path
|
|
76
|
+
shutil.copy(dest, copy_bundle_to)
|
|
77
|
+
local_path = copy_bundle_to / dest.name
|
|
78
|
+
return CodeBundle(pkl=str(local_path), computed_version=str_digest)
|
|
79
|
+
return CodeBundle(pkl=str(dest), computed_version=str_digest)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
async def build_code_bundle(
|
|
83
|
+
from_dir: Path,
|
|
84
|
+
*ignore: Ignore,
|
|
85
|
+
extract_dir: str = ".",
|
|
86
|
+
dryrun: bool = False,
|
|
87
|
+
copy_bundle_to: pathlib.Path | None = None,
|
|
88
|
+
copy_style: CopyFiles = "loaded_modules",
|
|
89
|
+
) -> CodeBundle:
|
|
90
|
+
"""
|
|
91
|
+
Build the code bundle for the current environment.
|
|
92
|
+
:param from_dir: The directory to bundle of the code to bundle. This is the root directory for the source.
|
|
93
|
+
:param extract_dir: The directory to extract the code bundle to, when in the container. It defaults to the current
|
|
94
|
+
working directory.
|
|
95
|
+
:param ignore: The list of ignores to apply. This is a list of Ignore classes.
|
|
96
|
+
:param dryrun: If dryrun is enabled, files will not be uploaded to the control plane.
|
|
97
|
+
:param copy_bundle_to: If set, the bundle will be copied to this path. This is used for testing purposes.
|
|
98
|
+
:param copy_style: What to put into the tarball. (either all, or loaded_modules. if none, skip this function)
|
|
99
|
+
|
|
100
|
+
:return: The code bundle, which contains the path where the code was zipped to.
|
|
101
|
+
"""
|
|
102
|
+
logger.debug("Building code bundle.")
|
|
103
|
+
from union.remote import upload_file
|
|
104
|
+
|
|
105
|
+
if not ignore:
|
|
106
|
+
ignore = [StandardIgnore, GitIgnore]
|
|
107
|
+
|
|
108
|
+
logger.debug(f"Finding files to bundle, ignoring as configured by: {ignore}")
|
|
109
|
+
files, digest = list_files_to_bundle(from_dir, True, *ignore, copy_style=copy_style)
|
|
110
|
+
if logger.getEffectiveLevel() <= logging.INFO:
|
|
111
|
+
print_ls_tree(from_dir, files)
|
|
112
|
+
|
|
113
|
+
logger.debug("Building code bundle.")
|
|
114
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
115
|
+
bundle_path, tar_size, archive_size = create_bundle(from_dir, pathlib.Path(tmp_dir), files, digest)
|
|
116
|
+
logger.info(f"Code bundle created at {bundle_path}, size: {tar_size} MB, archive size: {archive_size} MB")
|
|
117
|
+
if not dryrun:
|
|
118
|
+
hash_digest, remote_path = await upload_file(bundle_path)
|
|
119
|
+
else:
|
|
120
|
+
remote_path = "na"
|
|
121
|
+
if copy_bundle_to:
|
|
122
|
+
import shutil
|
|
123
|
+
|
|
124
|
+
# Copy the bundle to the given path
|
|
125
|
+
shutil.copy(bundle_path, copy_bundle_to)
|
|
126
|
+
remote_path = str(copy_bundle_to / bundle_path.name)
|
|
127
|
+
_, hash_digest, _ = hash_file(file_path=bundle_path)
|
|
128
|
+
return CodeBundle(tgz=remote_path, destination=extract_dir, computed_version=hash_digest)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@log(level=logging.INFO)
|
|
132
|
+
async def download_bundle(bundle: CodeBundle) -> pathlib.Path:
|
|
133
|
+
"""
|
|
134
|
+
Downloads a code bundle (tgz | pkl) to the local destination path.
|
|
135
|
+
:param bundle: The code bundle to download.
|
|
136
|
+
|
|
137
|
+
:return: The path to the downloaded code bundle.
|
|
138
|
+
"""
|
|
139
|
+
dest = pathlib.Path(bundle.destination)
|
|
140
|
+
if not dest.is_dir():
|
|
141
|
+
raise ValueError(f"Destination path should be a directory, found {dest}, {dest.stat()}")
|
|
142
|
+
|
|
143
|
+
# TODO make storage apis better to accept pathlib.Path
|
|
144
|
+
if bundle.tgz:
|
|
145
|
+
downloaded_bundle = dest / os.path.basename(bundle.tgz)
|
|
146
|
+
# Download the tgz file
|
|
147
|
+
downloaded_bundle = await storage.get(bundle.tgz, str(downloaded_bundle.absolute()))
|
|
148
|
+
downloaded_bundle = pathlib.Path(downloaded_bundle)
|
|
149
|
+
# NOTE the os.path.join(destination, ''). This is to ensure that the given path is in fact a directory and all
|
|
150
|
+
# downloaded data should be copied into this directory. We do this to account for a difference in behavior in
|
|
151
|
+
# fsspec, which requires a trailing slash in case of pre-existing directory.
|
|
152
|
+
process = await asyncio.create_subprocess_exec(
|
|
153
|
+
"tar",
|
|
154
|
+
"-xvf",
|
|
155
|
+
str(downloaded_bundle),
|
|
156
|
+
"-C",
|
|
157
|
+
str(dest),
|
|
158
|
+
stdout=asyncio.subprocess.PIPE,
|
|
159
|
+
stderr=asyncio.subprocess.PIPE,
|
|
160
|
+
)
|
|
161
|
+
stdout, stderr = await process.communicate()
|
|
162
|
+
|
|
163
|
+
if process.returncode != 0:
|
|
164
|
+
raise RuntimeError(stderr.decode())
|
|
165
|
+
return downloaded_bundle.absolute()
|
|
166
|
+
|
|
167
|
+
elif bundle.pkl:
|
|
168
|
+
# Lets gunzip the pkl file
|
|
169
|
+
|
|
170
|
+
downloaded_bundle = dest / os.path.basename(bundle.pkl)
|
|
171
|
+
# Download the tgz file
|
|
172
|
+
downloaded_bundle = await storage.get(bundle.pkl, str(downloaded_bundle.absolute()))
|
|
173
|
+
downloaded_bundle = pathlib.Path(downloaded_bundle)
|
|
174
|
+
return downloaded_bundle.absolute()
|
|
175
|
+
else:
|
|
176
|
+
raise ValueError("Code bundle should be either tgz or pkl, found neither.")
|
union/_context.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextvars
|
|
4
|
+
from dataclasses import dataclass, replace
|
|
5
|
+
from typing import TYPE_CHECKING, Callable, Optional, ParamSpec, TypeVar
|
|
6
|
+
|
|
7
|
+
from union._datastructures import GroupData, RawDataPath, TaskContext
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from union.report import Report
|
|
11
|
+
|
|
12
|
+
P = ParamSpec("P") # capture the function's parameters
|
|
13
|
+
R = TypeVar("R") # return type
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True, kw_only=True)
|
|
17
|
+
class ContextData:
|
|
18
|
+
"""
|
|
19
|
+
A ContextData cannot be created without an execution. Even for local execution's there should be an execution ID
|
|
20
|
+
|
|
21
|
+
:param: action The action ID of the current execution. This is always set, within a run.
|
|
22
|
+
:param: group_data If nested in a group the current group information
|
|
23
|
+
:param: task_context The context of the current task execution, this is what is available to the user, it is set
|
|
24
|
+
when the task is executed through `run` methods. If the Task is executed as regular python methods, this
|
|
25
|
+
will be None.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
group_data: Optional[GroupData] = None
|
|
29
|
+
task_context: Optional[TaskContext] = None
|
|
30
|
+
raw_data_path: Optional[RawDataPath] = None
|
|
31
|
+
|
|
32
|
+
def replace(self, **kwargs) -> ContextData:
|
|
33
|
+
return replace(self, **kwargs)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Context:
|
|
37
|
+
"""
|
|
38
|
+
A context class to hold the current execution context.
|
|
39
|
+
This is not coroutine safe, it assumes that the context is set in a single thread.
|
|
40
|
+
You should use the `contextual_run` function to run a function in a new context tree.
|
|
41
|
+
|
|
42
|
+
A context tree is defined as a tree of contexts, where under the root, all coroutines that were started in
|
|
43
|
+
this context tree can access the context mutations, but no coroutine, created outside of the context tree can access
|
|
44
|
+
the context mutations.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, data: ContextData):
|
|
48
|
+
if data is None:
|
|
49
|
+
raise ValueError("Cannot create a new context without contextdata.")
|
|
50
|
+
self._data = data
|
|
51
|
+
self._id = id(self) # Immutable unique identifier
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def data(self) -> ContextData:
|
|
55
|
+
"""Viewable data."""
|
|
56
|
+
return self._data
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def raw_data(self) -> RawDataPath:
|
|
60
|
+
"""
|
|
61
|
+
Get the raw data prefix for the current context first by looking up the task context, then the raw data path
|
|
62
|
+
"""
|
|
63
|
+
if self.data and self.data.task_context and self.data.task_context.raw_data_path:
|
|
64
|
+
return self.data.task_context.raw_data_path
|
|
65
|
+
if self.data and self.data.raw_data_path:
|
|
66
|
+
return self.data.raw_data_path
|
|
67
|
+
raise ValueError("Raw data path has not been set in the context.")
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def id(self) -> int:
|
|
71
|
+
"""Viewable ID."""
|
|
72
|
+
return self._id
|
|
73
|
+
|
|
74
|
+
def replace_task_context(self, tctx: TaskContext) -> Context:
|
|
75
|
+
"""
|
|
76
|
+
Replace the task context in the current context.
|
|
77
|
+
"""
|
|
78
|
+
return Context(self.data.replace(task_context=tctx))
|
|
79
|
+
|
|
80
|
+
def new_raw_data_path(self, raw_data_path: RawDataPath) -> Context:
|
|
81
|
+
"""
|
|
82
|
+
Return a copy of the context with the given raw data path object
|
|
83
|
+
"""
|
|
84
|
+
return Context(self.data.replace(raw_data_path=raw_data_path))
|
|
85
|
+
|
|
86
|
+
def get_report(self) -> Optional[Report]:
|
|
87
|
+
"""
|
|
88
|
+
Returns a report if within a task context, else a None
|
|
89
|
+
:return:
|
|
90
|
+
"""
|
|
91
|
+
if self.data.task_context:
|
|
92
|
+
return self.data.task_context.report
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
def is_task_context(self) -> bool:
|
|
96
|
+
"""
|
|
97
|
+
Returns true if the context is a task context
|
|
98
|
+
:return:
|
|
99
|
+
"""
|
|
100
|
+
return self.data.task_context is not None
|
|
101
|
+
|
|
102
|
+
def __enter__(self):
|
|
103
|
+
"""Enter the context, setting it as the current context."""
|
|
104
|
+
self._token = root_context_var.set(self)
|
|
105
|
+
return self
|
|
106
|
+
|
|
107
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
108
|
+
"""Exit the context, restoring the previous context."""
|
|
109
|
+
root_context_var.reset(self._token)
|
|
110
|
+
|
|
111
|
+
async def __aenter__(self):
|
|
112
|
+
"""Async version of context entry."""
|
|
113
|
+
self._token = root_context_var.set(self)
|
|
114
|
+
return self
|
|
115
|
+
|
|
116
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
117
|
+
"""Async version of context exit."""
|
|
118
|
+
root_context_var.reset(self._token)
|
|
119
|
+
|
|
120
|
+
def __repr__(self):
|
|
121
|
+
return f"{self.data}"
|
|
122
|
+
|
|
123
|
+
def __str__(self):
|
|
124
|
+
return self.__repr__()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# Global context variable to hold the current context
|
|
128
|
+
root_context_var = contextvars.ContextVar("root", default=Context(data=ContextData()))
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def ctx() -> Optional[TaskContext]:
|
|
132
|
+
"""Retrieve the current task context from the context variable."""
|
|
133
|
+
return internal_ctx().data.task_context
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def internal_ctx() -> Context:
|
|
137
|
+
"""Retrieve the current context from the context variable."""
|
|
138
|
+
return root_context_var.get()
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
async def contextual_run(func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
|
|
142
|
+
"""
|
|
143
|
+
Run a function with a new context subtree.
|
|
144
|
+
"""
|
|
145
|
+
_ctx = contextvars.copy_context()
|
|
146
|
+
return await _ctx.run(func, *args, **kwargs)
|