flyte 0.0.1b0__py3-none-any.whl → 2.0.0b46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flyte/__init__.py +83 -30
- flyte/_bin/connect.py +61 -0
- flyte/_bin/debug.py +38 -0
- flyte/_bin/runtime.py +87 -19
- flyte/_bin/serve.py +351 -0
- flyte/_build.py +3 -2
- flyte/_cache/cache.py +6 -5
- flyte/_cache/local_cache.py +216 -0
- flyte/_code_bundle/_ignore.py +31 -5
- flyte/_code_bundle/_packaging.py +42 -11
- flyte/_code_bundle/_utils.py +57 -34
- flyte/_code_bundle/bundle.py +130 -27
- flyte/_constants.py +1 -0
- flyte/_context.py +21 -5
- flyte/_custom_context.py +73 -0
- flyte/_debug/constants.py +37 -0
- flyte/_debug/utils.py +17 -0
- flyte/_debug/vscode.py +315 -0
- flyte/_deploy.py +396 -75
- flyte/_deployer.py +109 -0
- flyte/_environment.py +94 -11
- flyte/_excepthook.py +37 -0
- flyte/_group.py +2 -1
- flyte/_hash.py +1 -16
- flyte/_image.py +544 -234
- flyte/_initialize.py +443 -294
- flyte/_interface.py +40 -5
- flyte/_internal/controllers/__init__.py +22 -8
- flyte/_internal/controllers/_local_controller.py +159 -35
- flyte/_internal/controllers/_trace.py +18 -10
- flyte/_internal/controllers/remote/__init__.py +38 -9
- flyte/_internal/controllers/remote/_action.py +82 -12
- flyte/_internal/controllers/remote/_client.py +6 -2
- flyte/_internal/controllers/remote/_controller.py +290 -64
- flyte/_internal/controllers/remote/_core.py +155 -95
- flyte/_internal/controllers/remote/_informer.py +40 -20
- flyte/_internal/controllers/remote/_service_protocol.py +2 -2
- flyte/_internal/imagebuild/__init__.py +2 -10
- flyte/_internal/imagebuild/docker_builder.py +391 -84
- flyte/_internal/imagebuild/image_builder.py +111 -55
- flyte/_internal/imagebuild/remote_builder.py +409 -0
- flyte/_internal/imagebuild/utils.py +79 -0
- flyte/_internal/resolvers/_app_env_module.py +92 -0
- flyte/_internal/resolvers/_task_module.py +5 -38
- flyte/_internal/resolvers/app_env.py +26 -0
- flyte/_internal/resolvers/common.py +8 -1
- flyte/_internal/resolvers/default.py +2 -2
- flyte/_internal/runtime/convert.py +322 -33
- flyte/_internal/runtime/entrypoints.py +106 -18
- flyte/_internal/runtime/io.py +71 -23
- flyte/_internal/runtime/resources_serde.py +21 -7
- flyte/_internal/runtime/reuse.py +125 -0
- flyte/_internal/runtime/rusty.py +196 -0
- flyte/_internal/runtime/task_serde.py +239 -66
- flyte/_internal/runtime/taskrunner.py +48 -8
- flyte/_internal/runtime/trigger_serde.py +162 -0
- flyte/_internal/runtime/types_serde.py +7 -16
- flyte/_keyring/file.py +115 -0
- flyte/_link.py +30 -0
- flyte/_logging.py +241 -42
- flyte/_map.py +312 -0
- flyte/_metrics.py +59 -0
- flyte/_module.py +74 -0
- flyte/_pod.py +30 -0
- flyte/_resources.py +296 -33
- flyte/_retry.py +1 -7
- flyte/_reusable_environment.py +72 -7
- flyte/_run.py +461 -132
- flyte/_secret.py +47 -11
- flyte/_serve.py +333 -0
- flyte/_task.py +245 -56
- flyte/_task_environment.py +219 -97
- flyte/_task_plugins.py +47 -0
- flyte/_tools.py +8 -8
- flyte/_trace.py +15 -24
- flyte/_trigger.py +1027 -0
- flyte/_utils/__init__.py +12 -1
- flyte/_utils/asyn.py +3 -1
- flyte/_utils/async_cache.py +139 -0
- flyte/_utils/coro_management.py +5 -4
- flyte/_utils/description_parser.py +19 -0
- flyte/_utils/docker_credentials.py +173 -0
- flyte/_utils/helpers.py +45 -19
- flyte/_utils/module_loader.py +123 -0
- flyte/_utils/org_discovery.py +57 -0
- flyte/_utils/uv_script_parser.py +8 -1
- flyte/_version.py +16 -3
- flyte/app/__init__.py +27 -0
- flyte/app/_app_environment.py +362 -0
- flyte/app/_connector_environment.py +40 -0
- flyte/app/_deploy.py +130 -0
- flyte/app/_parameter.py +343 -0
- flyte/app/_runtime/__init__.py +3 -0
- flyte/app/_runtime/app_serde.py +383 -0
- flyte/app/_types.py +113 -0
- flyte/app/extras/__init__.py +9 -0
- flyte/app/extras/_auth_middleware.py +217 -0
- flyte/app/extras/_fastapi.py +93 -0
- flyte/app/extras/_model_loader/__init__.py +3 -0
- flyte/app/extras/_model_loader/config.py +7 -0
- flyte/app/extras/_model_loader/loader.py +288 -0
- flyte/cli/__init__.py +12 -0
- flyte/cli/_abort.py +28 -0
- flyte/cli/_build.py +114 -0
- flyte/cli/_common.py +493 -0
- flyte/cli/_create.py +371 -0
- flyte/cli/_delete.py +45 -0
- flyte/cli/_deploy.py +401 -0
- flyte/cli/_gen.py +316 -0
- flyte/cli/_get.py +446 -0
- flyte/cli/_option.py +33 -0
- {union/_cli → flyte/cli}/_params.py +152 -153
- flyte/cli/_plugins.py +209 -0
- flyte/cli/_prefetch.py +292 -0
- flyte/cli/_run.py +690 -0
- flyte/cli/_serve.py +338 -0
- flyte/cli/_update.py +86 -0
- flyte/cli/_user.py +20 -0
- flyte/cli/main.py +246 -0
- flyte/config/__init__.py +3 -0
- flyte/config/_config.py +248 -0
- flyte/config/_internal.py +73 -0
- flyte/config/_reader.py +225 -0
- flyte/connectors/__init__.py +11 -0
- flyte/connectors/_connector.py +330 -0
- flyte/connectors/_server.py +194 -0
- flyte/connectors/utils.py +159 -0
- flyte/errors.py +134 -2
- flyte/extend.py +24 -0
- flyte/extras/_container.py +69 -56
- flyte/git/__init__.py +3 -0
- flyte/git/_config.py +279 -0
- flyte/io/__init__.py +8 -1
- flyte/io/{structured_dataset → _dataframe}/__init__.py +32 -30
- flyte/io/{structured_dataset → _dataframe}/basic_dfs.py +75 -68
- flyte/io/{structured_dataset/structured_dataset.py → _dataframe/dataframe.py} +207 -242
- flyte/io/_dir.py +575 -113
- flyte/io/_file.py +587 -141
- flyte/io/_hashing_io.py +342 -0
- flyte/io/extend.py +7 -0
- flyte/models.py +635 -0
- flyte/prefetch/__init__.py +22 -0
- flyte/prefetch/_hf_model.py +563 -0
- flyte/remote/__init__.py +14 -3
- flyte/remote/_action.py +879 -0
- flyte/remote/_app.py +346 -0
- flyte/remote/_auth_metadata.py +42 -0
- flyte/remote/_client/_protocols.py +62 -4
- flyte/remote/_client/auth/_auth_utils.py +19 -0
- flyte/remote/_client/auth/_authenticators/base.py +8 -2
- flyte/remote/_client/auth/_authenticators/device_code.py +4 -5
- flyte/remote/_client/auth/_authenticators/factory.py +4 -0
- flyte/remote/_client/auth/_authenticators/passthrough.py +79 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +17 -18
- flyte/remote/_client/auth/_channel.py +47 -18
- flyte/remote/_client/auth/_client_config.py +5 -3
- flyte/remote/_client/auth/_keyring.py +15 -2
- flyte/remote/_client/auth/_token_client.py +3 -3
- flyte/remote/_client/controlplane.py +206 -18
- flyte/remote/_common.py +66 -0
- flyte/remote/_data.py +107 -22
- flyte/remote/_logs.py +116 -33
- flyte/remote/_project.py +21 -19
- flyte/remote/_run.py +164 -631
- flyte/remote/_secret.py +72 -29
- flyte/remote/_task.py +387 -46
- flyte/remote/_trigger.py +368 -0
- flyte/remote/_user.py +43 -0
- flyte/report/_report.py +10 -6
- flyte/storage/__init__.py +13 -1
- flyte/storage/_config.py +237 -0
- flyte/storage/_parallel_reader.py +289 -0
- flyte/storage/_storage.py +268 -59
- flyte/syncify/__init__.py +56 -0
- flyte/syncify/_api.py +414 -0
- flyte/types/__init__.py +39 -0
- flyte/types/_interface.py +22 -7
- flyte/{io/pickle/transformer.py → types/_pickle.py} +37 -9
- flyte/types/_string_literals.py +8 -9
- flyte/types/_type_engine.py +230 -129
- flyte/types/_utils.py +1 -1
- flyte-2.0.0b46.data/scripts/debug.py +38 -0
- flyte-2.0.0b46.data/scripts/runtime.py +194 -0
- flyte-2.0.0b46.dist-info/METADATA +352 -0
- flyte-2.0.0b46.dist-info/RECORD +221 -0
- flyte-2.0.0b46.dist-info/entry_points.txt +8 -0
- flyte-2.0.0b46.dist-info/licenses/LICENSE +201 -0
- flyte/_api_commons.py +0 -3
- flyte/_cli/_common.py +0 -287
- flyte/_cli/_create.py +0 -42
- flyte/_cli/_delete.py +0 -23
- flyte/_cli/_deploy.py +0 -140
- flyte/_cli/_get.py +0 -235
- flyte/_cli/_run.py +0 -152
- flyte/_cli/main.py +0 -72
- flyte/_datastructures.py +0 -342
- flyte/_internal/controllers/pbhash.py +0 -39
- flyte/_protos/common/authorization_pb2.py +0 -66
- flyte/_protos/common/authorization_pb2.pyi +0 -108
- flyte/_protos/common/authorization_pb2_grpc.py +0 -4
- flyte/_protos/common/identifier_pb2.py +0 -71
- flyte/_protos/common/identifier_pb2.pyi +0 -82
- flyte/_protos/common/identifier_pb2_grpc.py +0 -4
- flyte/_protos/common/identity_pb2.py +0 -48
- flyte/_protos/common/identity_pb2.pyi +0 -72
- flyte/_protos/common/identity_pb2_grpc.py +0 -4
- flyte/_protos/common/list_pb2.py +0 -36
- flyte/_protos/common/list_pb2.pyi +0 -69
- flyte/_protos/common/list_pb2_grpc.py +0 -4
- flyte/_protos/common/policy_pb2.py +0 -37
- flyte/_protos/common/policy_pb2.pyi +0 -27
- flyte/_protos/common/policy_pb2_grpc.py +0 -4
- flyte/_protos/common/role_pb2.py +0 -37
- flyte/_protos/common/role_pb2.pyi +0 -53
- flyte/_protos/common/role_pb2_grpc.py +0 -4
- flyte/_protos/common/runtime_version_pb2.py +0 -28
- flyte/_protos/common/runtime_version_pb2.pyi +0 -24
- flyte/_protos/common/runtime_version_pb2_grpc.py +0 -4
- flyte/_protos/logs/dataplane/payload_pb2.py +0 -96
- flyte/_protos/logs/dataplane/payload_pb2.pyi +0 -168
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
- flyte/_protos/secret/definition_pb2.py +0 -49
- flyte/_protos/secret/definition_pb2.pyi +0 -93
- flyte/_protos/secret/definition_pb2_grpc.py +0 -4
- flyte/_protos/secret/payload_pb2.py +0 -62
- flyte/_protos/secret/payload_pb2.pyi +0 -94
- flyte/_protos/secret/payload_pb2_grpc.py +0 -4
- flyte/_protos/secret/secret_pb2.py +0 -38
- flyte/_protos/secret/secret_pb2.pyi +0 -6
- flyte/_protos/secret/secret_pb2_grpc.py +0 -198
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
- flyte/_protos/validate/validate/validate_pb2.py +0 -76
- flyte/_protos/workflow/node_execution_service_pb2.py +0 -26
- flyte/_protos/workflow/node_execution_service_pb2.pyi +0 -4
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
- flyte/_protos/workflow/queue_service_pb2.py +0 -106
- flyte/_protos/workflow/queue_service_pb2.pyi +0 -141
- flyte/_protos/workflow/queue_service_pb2_grpc.py +0 -172
- flyte/_protos/workflow/run_definition_pb2.py +0 -128
- flyte/_protos/workflow/run_definition_pb2.pyi +0 -310
- flyte/_protos/workflow/run_definition_pb2_grpc.py +0 -4
- flyte/_protos/workflow/run_logs_service_pb2.py +0 -41
- flyte/_protos/workflow/run_logs_service_pb2.pyi +0 -28
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
- flyte/_protos/workflow/run_service_pb2.py +0 -133
- flyte/_protos/workflow/run_service_pb2.pyi +0 -175
- flyte/_protos/workflow/run_service_pb2_grpc.py +0 -412
- flyte/_protos/workflow/state_service_pb2.py +0 -58
- flyte/_protos/workflow/state_service_pb2.pyi +0 -71
- flyte/_protos/workflow/state_service_pb2_grpc.py +0 -138
- flyte/_protos/workflow/task_definition_pb2.py +0 -72
- flyte/_protos/workflow/task_definition_pb2.pyi +0 -65
- flyte/_protos/workflow/task_definition_pb2_grpc.py +0 -4
- flyte/_protos/workflow/task_service_pb2.py +0 -44
- flyte/_protos/workflow/task_service_pb2.pyi +0 -31
- flyte/_protos/workflow/task_service_pb2_grpc.py +0 -104
- flyte/io/_dataframe.py +0 -0
- flyte/io/pickle/__init__.py +0 -0
- flyte/remote/_console.py +0 -18
- flyte-0.0.1b0.dist-info/METADATA +0 -179
- flyte-0.0.1b0.dist-info/RECORD +0 -390
- flyte-0.0.1b0.dist-info/entry_points.txt +0 -3
- union/__init__.py +0 -54
- union/_api_commons.py +0 -3
- union/_bin/__init__.py +0 -0
- union/_bin/runtime.py +0 -113
- union/_build.py +0 -25
- union/_cache/__init__.py +0 -12
- union/_cache/cache.py +0 -141
- union/_cache/defaults.py +0 -9
- union/_cache/policy_function_body.py +0 -42
- union/_cli/__init__.py +0 -0
- union/_cli/_common.py +0 -263
- union/_cli/_create.py +0 -40
- union/_cli/_delete.py +0 -23
- union/_cli/_deploy.py +0 -120
- union/_cli/_get.py +0 -162
- union/_cli/_run.py +0 -150
- union/_cli/main.py +0 -72
- union/_code_bundle/__init__.py +0 -8
- union/_code_bundle/_ignore.py +0 -113
- union/_code_bundle/_packaging.py +0 -187
- union/_code_bundle/_utils.py +0 -342
- union/_code_bundle/bundle.py +0 -176
- union/_context.py +0 -146
- union/_datastructures.py +0 -295
- union/_deploy.py +0 -185
- union/_doc.py +0 -29
- union/_docstring.py +0 -26
- union/_environment.py +0 -43
- union/_group.py +0 -31
- union/_hash.py +0 -23
- union/_image.py +0 -760
- union/_initialize.py +0 -585
- union/_interface.py +0 -84
- union/_internal/__init__.py +0 -3
- union/_internal/controllers/__init__.py +0 -77
- union/_internal/controllers/_local_controller.py +0 -77
- union/_internal/controllers/pbhash.py +0 -39
- union/_internal/controllers/remote/__init__.py +0 -40
- union/_internal/controllers/remote/_action.py +0 -131
- union/_internal/controllers/remote/_client.py +0 -43
- union/_internal/controllers/remote/_controller.py +0 -169
- union/_internal/controllers/remote/_core.py +0 -341
- union/_internal/controllers/remote/_informer.py +0 -260
- union/_internal/controllers/remote/_service_protocol.py +0 -44
- union/_internal/imagebuild/__init__.py +0 -11
- union/_internal/imagebuild/docker_builder.py +0 -416
- union/_internal/imagebuild/image_builder.py +0 -243
- union/_internal/imagebuild/remote_builder.py +0 -0
- union/_internal/resolvers/__init__.py +0 -0
- union/_internal/resolvers/_task_module.py +0 -31
- union/_internal/resolvers/common.py +0 -24
- union/_internal/resolvers/default.py +0 -27
- union/_internal/runtime/__init__.py +0 -0
- union/_internal/runtime/convert.py +0 -163
- union/_internal/runtime/entrypoints.py +0 -121
- union/_internal/runtime/io.py +0 -136
- union/_internal/runtime/resources_serde.py +0 -134
- union/_internal/runtime/task_serde.py +0 -202
- union/_internal/runtime/taskrunner.py +0 -179
- union/_internal/runtime/types_serde.py +0 -53
- union/_logging.py +0 -124
- union/_protos/__init__.py +0 -0
- union/_protos/common/authorization_pb2.py +0 -66
- union/_protos/common/authorization_pb2.pyi +0 -106
- union/_protos/common/authorization_pb2_grpc.py +0 -4
- union/_protos/common/identifier_pb2.py +0 -71
- union/_protos/common/identifier_pb2.pyi +0 -82
- union/_protos/common/identifier_pb2_grpc.py +0 -4
- union/_protos/common/identity_pb2.py +0 -48
- union/_protos/common/identity_pb2.pyi +0 -72
- union/_protos/common/identity_pb2_grpc.py +0 -4
- union/_protos/common/list_pb2.py +0 -36
- union/_protos/common/list_pb2.pyi +0 -69
- union/_protos/common/list_pb2_grpc.py +0 -4
- union/_protos/common/policy_pb2.py +0 -37
- union/_protos/common/policy_pb2.pyi +0 -27
- union/_protos/common/policy_pb2_grpc.py +0 -4
- union/_protos/common/role_pb2.py +0 -37
- union/_protos/common/role_pb2.pyi +0 -51
- union/_protos/common/role_pb2_grpc.py +0 -4
- union/_protos/common/runtime_version_pb2.py +0 -28
- union/_protos/common/runtime_version_pb2.pyi +0 -24
- union/_protos/common/runtime_version_pb2_grpc.py +0 -4
- union/_protos/logs/dataplane/payload_pb2.py +0 -96
- union/_protos/logs/dataplane/payload_pb2.pyi +0 -168
- union/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
- union/_protos/secret/definition_pb2.py +0 -49
- union/_protos/secret/definition_pb2.pyi +0 -93
- union/_protos/secret/definition_pb2_grpc.py +0 -4
- union/_protos/secret/payload_pb2.py +0 -62
- union/_protos/secret/payload_pb2.pyi +0 -94
- union/_protos/secret/payload_pb2_grpc.py +0 -4
- union/_protos/secret/secret_pb2.py +0 -38
- union/_protos/secret/secret_pb2.pyi +0 -6
- union/_protos/secret/secret_pb2_grpc.py +0 -198
- union/_protos/validate/validate/validate_pb2.py +0 -76
- union/_protos/workflow/node_execution_service_pb2.py +0 -26
- union/_protos/workflow/node_execution_service_pb2.pyi +0 -4
- union/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
- union/_protos/workflow/queue_service_pb2.py +0 -75
- union/_protos/workflow/queue_service_pb2.pyi +0 -103
- union/_protos/workflow/queue_service_pb2_grpc.py +0 -172
- union/_protos/workflow/run_definition_pb2.py +0 -100
- union/_protos/workflow/run_definition_pb2.pyi +0 -256
- union/_protos/workflow/run_definition_pb2_grpc.py +0 -4
- union/_protos/workflow/run_logs_service_pb2.py +0 -41
- union/_protos/workflow/run_logs_service_pb2.pyi +0 -28
- union/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
- union/_protos/workflow/run_service_pb2.py +0 -133
- union/_protos/workflow/run_service_pb2.pyi +0 -173
- union/_protos/workflow/run_service_pb2_grpc.py +0 -412
- union/_protos/workflow/state_service_pb2.py +0 -58
- union/_protos/workflow/state_service_pb2.pyi +0 -69
- union/_protos/workflow/state_service_pb2_grpc.py +0 -138
- union/_protos/workflow/task_definition_pb2.py +0 -72
- union/_protos/workflow/task_definition_pb2.pyi +0 -65
- union/_protos/workflow/task_definition_pb2_grpc.py +0 -4
- union/_protos/workflow/task_service_pb2.py +0 -44
- union/_protos/workflow/task_service_pb2.pyi +0 -31
- union/_protos/workflow/task_service_pb2_grpc.py +0 -104
- union/_resources.py +0 -226
- union/_retry.py +0 -32
- union/_reusable_environment.py +0 -25
- union/_run.py +0 -374
- union/_secret.py +0 -61
- union/_task.py +0 -354
- union/_task_environment.py +0 -186
- union/_timeout.py +0 -47
- union/_tools.py +0 -27
- union/_utils/__init__.py +0 -11
- union/_utils/asyn.py +0 -119
- union/_utils/file_handling.py +0 -71
- union/_utils/helpers.py +0 -46
- union/_utils/lazy_module.py +0 -54
- union/_utils/uv_script_parser.py +0 -49
- union/_version.py +0 -21
- union/connectors/__init__.py +0 -0
- union/errors.py +0 -128
- union/extras/__init__.py +0 -5
- union/extras/_container.py +0 -263
- union/io/__init__.py +0 -11
- union/io/_dataframe.py +0 -0
- union/io/_dir.py +0 -425
- union/io/_file.py +0 -418
- union/io/pickle/__init__.py +0 -0
- union/io/pickle/transformer.py +0 -117
- union/io/structured_dataset/__init__.py +0 -122
- union/io/structured_dataset/basic_dfs.py +0 -219
- union/io/structured_dataset/structured_dataset.py +0 -1057
- union/py.typed +0 -0
- union/remote/__init__.py +0 -23
- union/remote/_client/__init__.py +0 -0
- union/remote/_client/_protocols.py +0 -129
- union/remote/_client/auth/__init__.py +0 -12
- union/remote/_client/auth/_authenticators/__init__.py +0 -0
- union/remote/_client/auth/_authenticators/base.py +0 -391
- union/remote/_client/auth/_authenticators/client_credentials.py +0 -73
- union/remote/_client/auth/_authenticators/device_code.py +0 -120
- union/remote/_client/auth/_authenticators/external_command.py +0 -77
- union/remote/_client/auth/_authenticators/factory.py +0 -200
- union/remote/_client/auth/_authenticators/pkce.py +0 -515
- union/remote/_client/auth/_channel.py +0 -184
- union/remote/_client/auth/_client_config.py +0 -83
- union/remote/_client/auth/_default_html.py +0 -32
- union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- union/remote/_client/auth/_grpc_utils/auth_interceptor.py +0 -204
- union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +0 -144
- union/remote/_client/auth/_keyring.py +0 -154
- union/remote/_client/auth/_token_client.py +0 -258
- union/remote/_client/auth/errors.py +0 -16
- union/remote/_client/controlplane.py +0 -86
- union/remote/_data.py +0 -149
- union/remote/_logs.py +0 -74
- union/remote/_project.py +0 -86
- union/remote/_run.py +0 -820
- union/remote/_secret.py +0 -132
- union/remote/_task.py +0 -193
- union/report/__init__.py +0 -3
- union/report/_report.py +0 -178
- union/report/_template.html +0 -124
- union/storage/__init__.py +0 -24
- union/storage/_remote_fs.py +0 -34
- union/storage/_storage.py +0 -247
- union/storage/_utils.py +0 -5
- union/types/__init__.py +0 -11
- union/types/_renderer.py +0 -162
- union/types/_string_literals.py +0 -120
- union/types/_type_engine.py +0 -2131
- union/types/_utils.py +0 -80
- /flyte/{_cli → _debug}/__init__.py +0 -0
- /flyte/{_protos → _keyring}/__init__.py +0 -0
- {flyte-0.0.1b0.dist-info → flyte-2.0.0b46.dist-info}/WHEEL +0 -0
- {flyte-0.0.1b0.dist-info → flyte-2.0.0b46.dist-info}/top_level.txt +0 -0
flyte/storage/_config.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import os
|
|
5
|
+
import typing
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import ClassVar
|
|
8
|
+
|
|
9
|
+
from flyte.config import set_if_exists
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(init=True, repr=True, eq=True, frozen=True)
|
|
13
|
+
class Storage(object):
|
|
14
|
+
"""
|
|
15
|
+
Data storage configuration that applies across any provider.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
retries: int = 3
|
|
19
|
+
backoff: datetime.timedelta = datetime.timedelta(seconds=5)
|
|
20
|
+
enable_debug: bool = False
|
|
21
|
+
attach_execution_metadata: bool = True
|
|
22
|
+
|
|
23
|
+
_KEY_ENV_VAR_MAPPING: ClassVar[typing.Dict[str, str]] = {
|
|
24
|
+
"enable_debug": "UNION_STORAGE_DEBUG",
|
|
25
|
+
"retries": "UNION_STORAGE_RETRIES",
|
|
26
|
+
"backoff": "UNION_STORAGE_BACKOFF_SECONDS",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
def get_fsspec_kwargs(self, anonymous: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
|
|
30
|
+
"""
|
|
31
|
+
Returns the configuration as kwargs for constructing an fsspec filesystem.
|
|
32
|
+
"""
|
|
33
|
+
return {}
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def _auto_as_kwargs(cls) -> typing.Dict[str, typing.Any]:
|
|
37
|
+
retries = os.getenv(cls._KEY_ENV_VAR_MAPPING["retries"])
|
|
38
|
+
backoff = os.getenv(cls._KEY_ENV_VAR_MAPPING["backoff"])
|
|
39
|
+
enable_debug = os.getenv(cls._KEY_ENV_VAR_MAPPING["enable_debug"])
|
|
40
|
+
|
|
41
|
+
kwargs: typing.Dict[str, typing.Any] = {}
|
|
42
|
+
kwargs = set_if_exists(kwargs, "enable_debug", enable_debug)
|
|
43
|
+
kwargs = set_if_exists(kwargs, "retries", retries)
|
|
44
|
+
kwargs = set_if_exists(kwargs, "backoff", backoff)
|
|
45
|
+
return kwargs
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def auto(cls) -> Storage:
|
|
49
|
+
"""
|
|
50
|
+
Construct the config object automatically from environment variables.
|
|
51
|
+
"""
|
|
52
|
+
return cls(**cls._auto_as_kwargs())
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(init=True, repr=True, eq=True, frozen=True)
|
|
56
|
+
class S3(Storage):
|
|
57
|
+
"""
|
|
58
|
+
S3 specific configuration
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
endpoint: typing.Optional[str] = None
|
|
62
|
+
access_key_id: typing.Optional[str] = None
|
|
63
|
+
secret_access_key: typing.Optional[str] = None
|
|
64
|
+
region: typing.Optional[str] = None
|
|
65
|
+
|
|
66
|
+
_KEY_ENV_VAR_MAPPING: ClassVar[typing.Dict[str, str]] = {
|
|
67
|
+
"endpoint": "FLYTE_AWS_ENDPOINT",
|
|
68
|
+
"access_key_id": "FLYTE_AWS_ACCESS_KEY_ID",
|
|
69
|
+
"secret_access_key": "FLYTE_AWS_SECRET_ACCESS_KEY",
|
|
70
|
+
} | Storage._KEY_ENV_VAR_MAPPING
|
|
71
|
+
|
|
72
|
+
# Refer to https://github.com/developmentseed/obstore/blob/33654fc37f19a657689eb93327b621e9f9e01494/obstore/python/obstore/store/_aws.pyi#L11
|
|
73
|
+
# for key and secret
|
|
74
|
+
_CONFIG_KEY_FSSPEC_S3_KEY_ID: ClassVar = "access_key_id"
|
|
75
|
+
_CONFIG_KEY_FSSPEC_S3_SECRET: ClassVar = "secret_access_key"
|
|
76
|
+
_CONFIG_KEY_ENDPOINT: ClassVar = "endpoint_url"
|
|
77
|
+
_KEY_SKIP_SIGNATURE: ClassVar = "skip_signature"
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def auto(cls, region: str | None = None) -> S3:
|
|
81
|
+
"""
|
|
82
|
+
:return: Config
|
|
83
|
+
"""
|
|
84
|
+
endpoint = os.getenv(cls._KEY_ENV_VAR_MAPPING["endpoint"], None)
|
|
85
|
+
access_key_id = os.getenv(cls._KEY_ENV_VAR_MAPPING["access_key_id"], None)
|
|
86
|
+
secret_access_key = os.getenv(cls._KEY_ENV_VAR_MAPPING["secret_access_key"], None)
|
|
87
|
+
|
|
88
|
+
kwargs = super()._auto_as_kwargs()
|
|
89
|
+
kwargs = set_if_exists(kwargs, "endpoint", endpoint)
|
|
90
|
+
kwargs = set_if_exists(kwargs, "access_key_id", access_key_id)
|
|
91
|
+
kwargs = set_if_exists(kwargs, "secret_access_key", secret_access_key)
|
|
92
|
+
kwargs = set_if_exists(kwargs, "region", region)
|
|
93
|
+
|
|
94
|
+
return S3(**kwargs)
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def for_sandbox(cls) -> S3:
|
|
98
|
+
"""
|
|
99
|
+
:return:
|
|
100
|
+
"""
|
|
101
|
+
kwargs = super()._auto_as_kwargs()
|
|
102
|
+
final_kwargs = kwargs | {
|
|
103
|
+
"endpoint": "http://localhost:4566",
|
|
104
|
+
"access_key_id": "minio",
|
|
105
|
+
"secret_access_key": "miniostorage",
|
|
106
|
+
}
|
|
107
|
+
return S3(**final_kwargs)
|
|
108
|
+
|
|
109
|
+
def get_fsspec_kwargs(self, anonymous: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
|
|
110
|
+
# Construct the config object
|
|
111
|
+
kwargs.pop("anonymous", None) # Remove anonymous if it exists, as we handle it separately
|
|
112
|
+
config: typing.Dict[str, typing.Any] = {}
|
|
113
|
+
if self._CONFIG_KEY_FSSPEC_S3_KEY_ID in kwargs or self.access_key_id:
|
|
114
|
+
config[self._CONFIG_KEY_FSSPEC_S3_KEY_ID] = kwargs.pop(
|
|
115
|
+
self._CONFIG_KEY_FSSPEC_S3_KEY_ID, self.access_key_id
|
|
116
|
+
)
|
|
117
|
+
if self._CONFIG_KEY_FSSPEC_S3_SECRET in kwargs or self.secret_access_key:
|
|
118
|
+
config[self._CONFIG_KEY_FSSPEC_S3_SECRET] = kwargs.pop(
|
|
119
|
+
self._CONFIG_KEY_FSSPEC_S3_SECRET, self.secret_access_key
|
|
120
|
+
)
|
|
121
|
+
if self._CONFIG_KEY_ENDPOINT in kwargs or self.endpoint:
|
|
122
|
+
config["endpoint_url"] = kwargs.pop(self._CONFIG_KEY_ENDPOINT, self.endpoint)
|
|
123
|
+
|
|
124
|
+
retries = kwargs.pop("retries", self.retries)
|
|
125
|
+
backoff = kwargs.pop("backoff", self.backoff)
|
|
126
|
+
|
|
127
|
+
if anonymous:
|
|
128
|
+
config[self._KEY_SKIP_SIGNATURE] = True
|
|
129
|
+
|
|
130
|
+
retry_config = {
|
|
131
|
+
"max_retries": retries,
|
|
132
|
+
"backoff": {
|
|
133
|
+
"base": 2,
|
|
134
|
+
"init_backoff": backoff,
|
|
135
|
+
"max_backoff": datetime.timedelta(seconds=16),
|
|
136
|
+
},
|
|
137
|
+
"retry_timeout": datetime.timedelta(minutes=3),
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
client_options = {"timeout": "99999s", "allow_http": True}
|
|
141
|
+
|
|
142
|
+
if config:
|
|
143
|
+
kwargs["config"] = config
|
|
144
|
+
kwargs["client_options"] = client_options or None
|
|
145
|
+
kwargs["retry_config"] = retry_config or None
|
|
146
|
+
if self.region:
|
|
147
|
+
kwargs["region"] = self.region
|
|
148
|
+
|
|
149
|
+
return kwargs
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@dataclass(init=True, repr=True, eq=True, frozen=True)
|
|
153
|
+
class GCS(Storage):
|
|
154
|
+
"""
|
|
155
|
+
Any GCS specific configuration.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
gsutil_parallelism: bool = False
|
|
159
|
+
|
|
160
|
+
_KEY_ENV_VAR_MAPPING: ClassVar[dict[str, str]] = {
|
|
161
|
+
"gsutil_parallelism": "GCP_GSUTIL_PARALLELISM",
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
@classmethod
|
|
165
|
+
def auto(cls) -> GCS:
|
|
166
|
+
gsutil_parallelism = os.getenv(cls._KEY_ENV_VAR_MAPPING["gsutil_parallelism"], None)
|
|
167
|
+
|
|
168
|
+
kwargs: typing.Dict[str, typing.Any] = {}
|
|
169
|
+
kwargs = set_if_exists(kwargs, "gsutil_parallelism", gsutil_parallelism)
|
|
170
|
+
return GCS(**kwargs)
|
|
171
|
+
|
|
172
|
+
def get_fsspec_kwargs(self, anonymous: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
|
|
173
|
+
kwargs.pop("anonymous", None)
|
|
174
|
+
return kwargs
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@dataclass(init=True, repr=True, eq=True, frozen=True)
|
|
178
|
+
class ABFS(Storage):
|
|
179
|
+
"""
|
|
180
|
+
Any Azure Blob Storage specific configuration.
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
account_name: typing.Optional[str] = None
|
|
184
|
+
account_key: typing.Optional[str] = None
|
|
185
|
+
tenant_id: typing.Optional[str] = None
|
|
186
|
+
client_id: typing.Optional[str] = None
|
|
187
|
+
client_secret: typing.Optional[str] = None
|
|
188
|
+
|
|
189
|
+
_KEY_ENV_VAR_MAPPING: ClassVar[dict[str, str]] = {
|
|
190
|
+
"account_name": "AZURE_STORAGE_ACCOUNT_NAME",
|
|
191
|
+
"account_key": "AZURE_STORAGE_ACCOUNT_KEY",
|
|
192
|
+
"tenant_id": "AZURE_TENANT_ID",
|
|
193
|
+
"client_id": "AZURE_CLIENT_ID",
|
|
194
|
+
"client_secret": "AZURE_CLIENT_SECRET",
|
|
195
|
+
}
|
|
196
|
+
_KEY_SKIP_SIGNATURE: ClassVar = "skip_signature"
|
|
197
|
+
|
|
198
|
+
@classmethod
|
|
199
|
+
def auto(cls) -> ABFS:
|
|
200
|
+
account_name = os.getenv(cls._KEY_ENV_VAR_MAPPING["account_name"], None)
|
|
201
|
+
account_key = os.getenv(cls._KEY_ENV_VAR_MAPPING["account_key"], None)
|
|
202
|
+
tenant_id = os.getenv(cls._KEY_ENV_VAR_MAPPING["tenant_id"], None)
|
|
203
|
+
client_id = os.getenv(cls._KEY_ENV_VAR_MAPPING["client_id"], None)
|
|
204
|
+
client_secret = os.getenv(cls._KEY_ENV_VAR_MAPPING["client_secret"], None)
|
|
205
|
+
|
|
206
|
+
kwargs: typing.Dict[str, typing.Any] = {}
|
|
207
|
+
kwargs = set_if_exists(kwargs, "account_name", account_name)
|
|
208
|
+
kwargs = set_if_exists(kwargs, "account_key", account_key)
|
|
209
|
+
kwargs = set_if_exists(kwargs, "tenant_id", tenant_id)
|
|
210
|
+
kwargs = set_if_exists(kwargs, "client_id", client_id)
|
|
211
|
+
kwargs = set_if_exists(kwargs, "client_secret", client_secret)
|
|
212
|
+
return ABFS(**kwargs)
|
|
213
|
+
|
|
214
|
+
def get_fsspec_kwargs(self, anonymous: bool = False, **kwargs) -> typing.Dict[str, typing.Any]:
|
|
215
|
+
kwargs.pop("anonymous", None)
|
|
216
|
+
config: typing.Dict[str, typing.Any] = {}
|
|
217
|
+
if "account_name" in kwargs or self.account_name:
|
|
218
|
+
config["account_name"] = kwargs.get("account_name", self.account_name)
|
|
219
|
+
if "account_key" in kwargs or self.account_key:
|
|
220
|
+
config["account_key"] = kwargs.get("account_key", self.account_key)
|
|
221
|
+
if "client_id" in kwargs or self.client_id:
|
|
222
|
+
config["client_id"] = kwargs.get("client_id", self.client_id)
|
|
223
|
+
if "client_secret" in kwargs or self.client_secret:
|
|
224
|
+
config["client_secret"] = kwargs.get("client_secret", self.client_secret)
|
|
225
|
+
if "tenant_id" in kwargs or self.tenant_id:
|
|
226
|
+
config["tenant_id"] = kwargs.get("tenant_id", self.tenant_id)
|
|
227
|
+
|
|
228
|
+
if anonymous:
|
|
229
|
+
config[self._KEY_SKIP_SIGNATURE] = True
|
|
230
|
+
|
|
231
|
+
client_options = {"timeout": "99999s", "allow_http": "true"}
|
|
232
|
+
|
|
233
|
+
if config:
|
|
234
|
+
kwargs["config"] = config
|
|
235
|
+
kwargs["client_options"] = client_options
|
|
236
|
+
|
|
237
|
+
return kwargs
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import dataclasses
|
|
5
|
+
import io
|
|
6
|
+
import os
|
|
7
|
+
import pathlib
|
|
8
|
+
import sys
|
|
9
|
+
import tempfile
|
|
10
|
+
import typing
|
|
11
|
+
from typing import Any, Hashable, Protocol
|
|
12
|
+
|
|
13
|
+
import aiofiles
|
|
14
|
+
import aiofiles.os
|
|
15
|
+
import obstore
|
|
16
|
+
|
|
17
|
+
if typing.TYPE_CHECKING:
|
|
18
|
+
from obstore import Bytes, ObjectMeta
|
|
19
|
+
from obstore.store import ObjectStore
|
|
20
|
+
|
|
21
|
+
CHUNK_SIZE = int(os.getenv("FLYTE_IO_CHUNK_SIZE", str(16 * 1024 * 1024)))
|
|
22
|
+
MAX_CONCURRENCY = int(os.getenv("FLYTE_IO_MAX_CONCURRENCY", str(32)))
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DownloadQueueEmpty(RuntimeError):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class BufferProtocol(Protocol):
|
|
30
|
+
async def write(self, offset, length, value: Bytes) -> None: ...
|
|
31
|
+
|
|
32
|
+
async def read(self) -> memoryview: ...
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def complete(self) -> bool: ...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclasses.dataclass
|
|
39
|
+
class _MemoryBuffer:
|
|
40
|
+
arr: bytearray
|
|
41
|
+
pending: int
|
|
42
|
+
_closed: bool = False
|
|
43
|
+
|
|
44
|
+
async def write(self, offset: int, length: int, value: Bytes) -> None:
|
|
45
|
+
self.arr[offset : offset + length] = memoryview(value)
|
|
46
|
+
self.pending -= length
|
|
47
|
+
|
|
48
|
+
async def read(self) -> memoryview:
|
|
49
|
+
return memoryview(self.arr)
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def complete(self) -> bool:
|
|
53
|
+
return self.pending == 0
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def new(cls, size):
|
|
57
|
+
return cls(arr=bytearray(size), pending=size)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclasses.dataclass
|
|
61
|
+
class _FileBuffer:
|
|
62
|
+
path: pathlib.Path
|
|
63
|
+
pending: int
|
|
64
|
+
_handle: io.FileIO | None = None
|
|
65
|
+
_closed: bool = False
|
|
66
|
+
|
|
67
|
+
async def write(self, offset: int, length: int, value: Bytes) -> None:
|
|
68
|
+
async with aiofiles.open(self.path, mode="r+b") as f:
|
|
69
|
+
await f.seek(offset)
|
|
70
|
+
await f.write(value)
|
|
71
|
+
self.pending -= length
|
|
72
|
+
|
|
73
|
+
async def read(self) -> memoryview:
|
|
74
|
+
async with aiofiles.open(self.path, mode="rb") as f:
|
|
75
|
+
return memoryview(await f.read())
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def complete(self) -> bool:
|
|
79
|
+
return self.pending == 0
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
def new(cls, path: pathlib.Path, size: int):
|
|
83
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
84
|
+
path.touch()
|
|
85
|
+
return cls(path=path, pending=size)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclasses.dataclass
|
|
89
|
+
class Chunk:
|
|
90
|
+
offset: int
|
|
91
|
+
length: int
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclasses.dataclass
|
|
95
|
+
class Source:
|
|
96
|
+
id: Hashable
|
|
97
|
+
path: pathlib.Path # Should be str, represents the fully qualified prefix of a file (no bucket)
|
|
98
|
+
length: int
|
|
99
|
+
offset: int = 0
|
|
100
|
+
metadata: Any | None = None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@dataclasses.dataclass
|
|
104
|
+
class DownloadTask:
|
|
105
|
+
source: Source
|
|
106
|
+
chunk: Chunk
|
|
107
|
+
target: pathlib.Path | None = None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class ObstoreParallelReader:
|
|
111
|
+
def __init__(
|
|
112
|
+
self,
|
|
113
|
+
store: ObjectStore,
|
|
114
|
+
*,
|
|
115
|
+
chunk_size=CHUNK_SIZE,
|
|
116
|
+
max_concurrency=MAX_CONCURRENCY,
|
|
117
|
+
):
|
|
118
|
+
self._store = store
|
|
119
|
+
self._chunk_size = chunk_size
|
|
120
|
+
self._max_concurrency = max_concurrency
|
|
121
|
+
|
|
122
|
+
def _chunks(self, size) -> typing.Iterator[tuple[int, int]]:
|
|
123
|
+
cs = self._chunk_size
|
|
124
|
+
for offset in range(0, size, cs):
|
|
125
|
+
length = min(cs, size - offset)
|
|
126
|
+
yield offset, length
|
|
127
|
+
|
|
128
|
+
async def _as_completed(self, gen: typing.AsyncGenerator[DownloadTask, None], transformer=None):
|
|
129
|
+
inq: asyncio.Queue = asyncio.Queue(self._max_concurrency * 2)
|
|
130
|
+
outq: asyncio.Queue = asyncio.Queue()
|
|
131
|
+
sentinel = object()
|
|
132
|
+
done = asyncio.Event()
|
|
133
|
+
|
|
134
|
+
active: dict[Hashable, _FileBuffer | _MemoryBuffer] = {}
|
|
135
|
+
|
|
136
|
+
async def _fill():
|
|
137
|
+
# Helper function to fill the input queue, this is because the generator is async because it does list/head
|
|
138
|
+
# calls on the object store which are async.
|
|
139
|
+
try:
|
|
140
|
+
counter = 0
|
|
141
|
+
async for task in gen:
|
|
142
|
+
if task.source.id not in active:
|
|
143
|
+
active[task.source.id] = (
|
|
144
|
+
_FileBuffer.new(task.target, task.source.length)
|
|
145
|
+
if task.target is not None
|
|
146
|
+
else _MemoryBuffer.new(task.source.length)
|
|
147
|
+
)
|
|
148
|
+
await inq.put(task)
|
|
149
|
+
counter += 1
|
|
150
|
+
await inq.put(sentinel)
|
|
151
|
+
if counter == 0:
|
|
152
|
+
raise DownloadQueueEmpty
|
|
153
|
+
except asyncio.CancelledError:
|
|
154
|
+
# document why we need to swallow this
|
|
155
|
+
pass
|
|
156
|
+
|
|
157
|
+
async def _worker():
|
|
158
|
+
try:
|
|
159
|
+
while not done.is_set():
|
|
160
|
+
task: DownloadTask = await inq.get()
|
|
161
|
+
if task is sentinel:
|
|
162
|
+
inq.put_nowait(sentinel)
|
|
163
|
+
break
|
|
164
|
+
# chunk.offset is the local offset within the source (e.g., 0, chunk_size, 2*chunk_size)
|
|
165
|
+
# source.offset is the offset within the file where the source data starts
|
|
166
|
+
# The actual file position is the sum of both
|
|
167
|
+
file_offset = task.chunk.offset + task.source.offset
|
|
168
|
+
buf = active[task.source.id]
|
|
169
|
+
data_to_write = await obstore.get_range_async(
|
|
170
|
+
self._store,
|
|
171
|
+
str(task.source.path),
|
|
172
|
+
start=file_offset,
|
|
173
|
+
end=file_offset + task.chunk.length,
|
|
174
|
+
)
|
|
175
|
+
await buf.write(
|
|
176
|
+
task.chunk.offset,
|
|
177
|
+
task.chunk.length,
|
|
178
|
+
data_to_write,
|
|
179
|
+
)
|
|
180
|
+
if not buf.complete:
|
|
181
|
+
continue
|
|
182
|
+
if transformer is not None:
|
|
183
|
+
result = await transformer(buf, task.source)
|
|
184
|
+
elif task.target is not None:
|
|
185
|
+
result = task.target
|
|
186
|
+
else:
|
|
187
|
+
result = task.source
|
|
188
|
+
outq.put_nowait((task.source.id, result))
|
|
189
|
+
del active[task.source.id]
|
|
190
|
+
except asyncio.CancelledError:
|
|
191
|
+
pass
|
|
192
|
+
finally:
|
|
193
|
+
done.set()
|
|
194
|
+
|
|
195
|
+
# Yield results as they are completed
|
|
196
|
+
if sys.version_info >= (3, 11):
|
|
197
|
+
async with asyncio.TaskGroup() as tg:
|
|
198
|
+
tg.create_task(_fill())
|
|
199
|
+
for _ in range(self._max_concurrency):
|
|
200
|
+
tg.create_task(_worker())
|
|
201
|
+
while not done.is_set():
|
|
202
|
+
yield await outq.get()
|
|
203
|
+
else:
|
|
204
|
+
fill_task = asyncio.create_task(_fill())
|
|
205
|
+
worker_tasks = [asyncio.create_task(_worker()) for _ in range(self._max_concurrency)]
|
|
206
|
+
try:
|
|
207
|
+
while not done.is_set():
|
|
208
|
+
yield await outq.get()
|
|
209
|
+
except Exception as e:
|
|
210
|
+
if not fill_task.done():
|
|
211
|
+
fill_task.cancel()
|
|
212
|
+
for wt in worker_tasks:
|
|
213
|
+
if not wt.done():
|
|
214
|
+
wt.cancel()
|
|
215
|
+
raise e
|
|
216
|
+
finally:
|
|
217
|
+
await asyncio.gather(fill_task, *worker_tasks, return_exceptions=True)
|
|
218
|
+
|
|
219
|
+
# Drain the output queue
|
|
220
|
+
try:
|
|
221
|
+
while True:
|
|
222
|
+
yield outq.get_nowait()
|
|
223
|
+
except asyncio.QueueEmpty:
|
|
224
|
+
pass
|
|
225
|
+
|
|
226
|
+
async def download_files(
|
|
227
|
+
self,
|
|
228
|
+
src_prefix: pathlib.Path,
|
|
229
|
+
target_prefix: pathlib.Path,
|
|
230
|
+
*paths,
|
|
231
|
+
destination_file_name: str | None = None,
|
|
232
|
+
exclude: list[str] | None = None,
|
|
233
|
+
) -> None:
|
|
234
|
+
"""
|
|
235
|
+
src_prefix: Prefix you want to download from in the object store, not including the bucket name, nor file name.
|
|
236
|
+
Should be replaced with string
|
|
237
|
+
target_prefix: Local directory to download to
|
|
238
|
+
paths: Specific paths (relative to src_prefix) to download. If empty, download everything
|
|
239
|
+
exclude: List of patterns to exclude from the download.
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
def _keep(path):
|
|
243
|
+
if exclude is not None and any(path.match(e) for e in exclude):
|
|
244
|
+
return False
|
|
245
|
+
return True
|
|
246
|
+
|
|
247
|
+
async def _list_downloadable() -> typing.AsyncGenerator[ObjectMeta, None]:
|
|
248
|
+
if paths:
|
|
249
|
+
# For specific file paths, use async head
|
|
250
|
+
for path_ in paths:
|
|
251
|
+
path = src_prefix / path_
|
|
252
|
+
if _keep(path):
|
|
253
|
+
yield await obstore.head_async(self._store, str(path))
|
|
254
|
+
return
|
|
255
|
+
|
|
256
|
+
# Use obstore.list() for recursive listing (all files in all subdirectories)
|
|
257
|
+
# obstore.list() returns an async iterator that yields batches (lists) of objects
|
|
258
|
+
async for batch in obstore.list(self._store, prefix=str(src_prefix)):
|
|
259
|
+
for obj in batch:
|
|
260
|
+
yield obj
|
|
261
|
+
|
|
262
|
+
async def _gen(tmp_dir: str) -> typing.AsyncGenerator[DownloadTask, None]:
|
|
263
|
+
async for obj in _list_downloadable():
|
|
264
|
+
path = pathlib.Path(obj["path"]) # e.g. Path(prefix/file.txt), needs to be changed to str.
|
|
265
|
+
size = obj["size"]
|
|
266
|
+
source = Source(id=path, path=path, length=size)
|
|
267
|
+
# Strip src_prefix from path for destination
|
|
268
|
+
rel_path = path.relative_to(src_prefix) # doesn't work on windows
|
|
269
|
+
for offset, length in self._chunks(size):
|
|
270
|
+
yield DownloadTask(
|
|
271
|
+
source=source,
|
|
272
|
+
target=tmp_dir / rel_path, # doesn't work on windows
|
|
273
|
+
chunk=Chunk(offset, length),
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
def _transform_decorator(tmp_dir: str):
|
|
277
|
+
async def _transformer(buf: _FileBuffer, _: Source) -> None:
|
|
278
|
+
if len(paths) == 1 and destination_file_name is not None:
|
|
279
|
+
target = target_prefix / destination_file_name
|
|
280
|
+
else:
|
|
281
|
+
target = target_prefix / buf.path.relative_to(tmp_dir)
|
|
282
|
+
await aiofiles.os.makedirs(target.parent, exist_ok=True)
|
|
283
|
+
return await aiofiles.os.replace(buf.path, target) # mv buf.path target
|
|
284
|
+
|
|
285
|
+
return _transformer
|
|
286
|
+
|
|
287
|
+
with tempfile.TemporaryDirectory() as temporary_dir:
|
|
288
|
+
async for _ in self._as_completed(_gen(temporary_dir), transformer=_transform_decorator(temporary_dir)):
|
|
289
|
+
pass
|