flyte 0.0.1b0__py3-none-any.whl → 2.0.0b46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flyte/__init__.py +83 -30
- flyte/_bin/connect.py +61 -0
- flyte/_bin/debug.py +38 -0
- flyte/_bin/runtime.py +87 -19
- flyte/_bin/serve.py +351 -0
- flyte/_build.py +3 -2
- flyte/_cache/cache.py +6 -5
- flyte/_cache/local_cache.py +216 -0
- flyte/_code_bundle/_ignore.py +31 -5
- flyte/_code_bundle/_packaging.py +42 -11
- flyte/_code_bundle/_utils.py +57 -34
- flyte/_code_bundle/bundle.py +130 -27
- flyte/_constants.py +1 -0
- flyte/_context.py +21 -5
- flyte/_custom_context.py +73 -0
- flyte/_debug/constants.py +37 -0
- flyte/_debug/utils.py +17 -0
- flyte/_debug/vscode.py +315 -0
- flyte/_deploy.py +396 -75
- flyte/_deployer.py +109 -0
- flyte/_environment.py +94 -11
- flyte/_excepthook.py +37 -0
- flyte/_group.py +2 -1
- flyte/_hash.py +1 -16
- flyte/_image.py +544 -234
- flyte/_initialize.py +443 -294
- flyte/_interface.py +40 -5
- flyte/_internal/controllers/__init__.py +22 -8
- flyte/_internal/controllers/_local_controller.py +159 -35
- flyte/_internal/controllers/_trace.py +18 -10
- flyte/_internal/controllers/remote/__init__.py +38 -9
- flyte/_internal/controllers/remote/_action.py +82 -12
- flyte/_internal/controllers/remote/_client.py +6 -2
- flyte/_internal/controllers/remote/_controller.py +290 -64
- flyte/_internal/controllers/remote/_core.py +155 -95
- flyte/_internal/controllers/remote/_informer.py +40 -20
- flyte/_internal/controllers/remote/_service_protocol.py +2 -2
- flyte/_internal/imagebuild/__init__.py +2 -10
- flyte/_internal/imagebuild/docker_builder.py +391 -84
- flyte/_internal/imagebuild/image_builder.py +111 -55
- flyte/_internal/imagebuild/remote_builder.py +409 -0
- flyte/_internal/imagebuild/utils.py +79 -0
- flyte/_internal/resolvers/_app_env_module.py +92 -0
- flyte/_internal/resolvers/_task_module.py +5 -38
- flyte/_internal/resolvers/app_env.py +26 -0
- flyte/_internal/resolvers/common.py +8 -1
- flyte/_internal/resolvers/default.py +2 -2
- flyte/_internal/runtime/convert.py +322 -33
- flyte/_internal/runtime/entrypoints.py +106 -18
- flyte/_internal/runtime/io.py +71 -23
- flyte/_internal/runtime/resources_serde.py +21 -7
- flyte/_internal/runtime/reuse.py +125 -0
- flyte/_internal/runtime/rusty.py +196 -0
- flyte/_internal/runtime/task_serde.py +239 -66
- flyte/_internal/runtime/taskrunner.py +48 -8
- flyte/_internal/runtime/trigger_serde.py +162 -0
- flyte/_internal/runtime/types_serde.py +7 -16
- flyte/_keyring/file.py +115 -0
- flyte/_link.py +30 -0
- flyte/_logging.py +241 -42
- flyte/_map.py +312 -0
- flyte/_metrics.py +59 -0
- flyte/_module.py +74 -0
- flyte/_pod.py +30 -0
- flyte/_resources.py +296 -33
- flyte/_retry.py +1 -7
- flyte/_reusable_environment.py +72 -7
- flyte/_run.py +461 -132
- flyte/_secret.py +47 -11
- flyte/_serve.py +333 -0
- flyte/_task.py +245 -56
- flyte/_task_environment.py +219 -97
- flyte/_task_plugins.py +47 -0
- flyte/_tools.py +8 -8
- flyte/_trace.py +15 -24
- flyte/_trigger.py +1027 -0
- flyte/_utils/__init__.py +12 -1
- flyte/_utils/asyn.py +3 -1
- flyte/_utils/async_cache.py +139 -0
- flyte/_utils/coro_management.py +5 -4
- flyte/_utils/description_parser.py +19 -0
- flyte/_utils/docker_credentials.py +173 -0
- flyte/_utils/helpers.py +45 -19
- flyte/_utils/module_loader.py +123 -0
- flyte/_utils/org_discovery.py +57 -0
- flyte/_utils/uv_script_parser.py +8 -1
- flyte/_version.py +16 -3
- flyte/app/__init__.py +27 -0
- flyte/app/_app_environment.py +362 -0
- flyte/app/_connector_environment.py +40 -0
- flyte/app/_deploy.py +130 -0
- flyte/app/_parameter.py +343 -0
- flyte/app/_runtime/__init__.py +3 -0
- flyte/app/_runtime/app_serde.py +383 -0
- flyte/app/_types.py +113 -0
- flyte/app/extras/__init__.py +9 -0
- flyte/app/extras/_auth_middleware.py +217 -0
- flyte/app/extras/_fastapi.py +93 -0
- flyte/app/extras/_model_loader/__init__.py +3 -0
- flyte/app/extras/_model_loader/config.py +7 -0
- flyte/app/extras/_model_loader/loader.py +288 -0
- flyte/cli/__init__.py +12 -0
- flyte/cli/_abort.py +28 -0
- flyte/cli/_build.py +114 -0
- flyte/cli/_common.py +493 -0
- flyte/cli/_create.py +371 -0
- flyte/cli/_delete.py +45 -0
- flyte/cli/_deploy.py +401 -0
- flyte/cli/_gen.py +316 -0
- flyte/cli/_get.py +446 -0
- flyte/cli/_option.py +33 -0
- {union/_cli → flyte/cli}/_params.py +152 -153
- flyte/cli/_plugins.py +209 -0
- flyte/cli/_prefetch.py +292 -0
- flyte/cli/_run.py +690 -0
- flyte/cli/_serve.py +338 -0
- flyte/cli/_update.py +86 -0
- flyte/cli/_user.py +20 -0
- flyte/cli/main.py +246 -0
- flyte/config/__init__.py +3 -0
- flyte/config/_config.py +248 -0
- flyte/config/_internal.py +73 -0
- flyte/config/_reader.py +225 -0
- flyte/connectors/__init__.py +11 -0
- flyte/connectors/_connector.py +330 -0
- flyte/connectors/_server.py +194 -0
- flyte/connectors/utils.py +159 -0
- flyte/errors.py +134 -2
- flyte/extend.py +24 -0
- flyte/extras/_container.py +69 -56
- flyte/git/__init__.py +3 -0
- flyte/git/_config.py +279 -0
- flyte/io/__init__.py +8 -1
- flyte/io/{structured_dataset → _dataframe}/__init__.py +32 -30
- flyte/io/{structured_dataset → _dataframe}/basic_dfs.py +75 -68
- flyte/io/{structured_dataset/structured_dataset.py → _dataframe/dataframe.py} +207 -242
- flyte/io/_dir.py +575 -113
- flyte/io/_file.py +587 -141
- flyte/io/_hashing_io.py +342 -0
- flyte/io/extend.py +7 -0
- flyte/models.py +635 -0
- flyte/prefetch/__init__.py +22 -0
- flyte/prefetch/_hf_model.py +563 -0
- flyte/remote/__init__.py +14 -3
- flyte/remote/_action.py +879 -0
- flyte/remote/_app.py +346 -0
- flyte/remote/_auth_metadata.py +42 -0
- flyte/remote/_client/_protocols.py +62 -4
- flyte/remote/_client/auth/_auth_utils.py +19 -0
- flyte/remote/_client/auth/_authenticators/base.py +8 -2
- flyte/remote/_client/auth/_authenticators/device_code.py +4 -5
- flyte/remote/_client/auth/_authenticators/factory.py +4 -0
- flyte/remote/_client/auth/_authenticators/passthrough.py +79 -0
- flyte/remote/_client/auth/_authenticators/pkce.py +17 -18
- flyte/remote/_client/auth/_channel.py +47 -18
- flyte/remote/_client/auth/_client_config.py +5 -3
- flyte/remote/_client/auth/_keyring.py +15 -2
- flyte/remote/_client/auth/_token_client.py +3 -3
- flyte/remote/_client/controlplane.py +206 -18
- flyte/remote/_common.py +66 -0
- flyte/remote/_data.py +107 -22
- flyte/remote/_logs.py +116 -33
- flyte/remote/_project.py +21 -19
- flyte/remote/_run.py +164 -631
- flyte/remote/_secret.py +72 -29
- flyte/remote/_task.py +387 -46
- flyte/remote/_trigger.py +368 -0
- flyte/remote/_user.py +43 -0
- flyte/report/_report.py +10 -6
- flyte/storage/__init__.py +13 -1
- flyte/storage/_config.py +237 -0
- flyte/storage/_parallel_reader.py +289 -0
- flyte/storage/_storage.py +268 -59
- flyte/syncify/__init__.py +56 -0
- flyte/syncify/_api.py +414 -0
- flyte/types/__init__.py +39 -0
- flyte/types/_interface.py +22 -7
- flyte/{io/pickle/transformer.py → types/_pickle.py} +37 -9
- flyte/types/_string_literals.py +8 -9
- flyte/types/_type_engine.py +230 -129
- flyte/types/_utils.py +1 -1
- flyte-2.0.0b46.data/scripts/debug.py +38 -0
- flyte-2.0.0b46.data/scripts/runtime.py +194 -0
- flyte-2.0.0b46.dist-info/METADATA +352 -0
- flyte-2.0.0b46.dist-info/RECORD +221 -0
- flyte-2.0.0b46.dist-info/entry_points.txt +8 -0
- flyte-2.0.0b46.dist-info/licenses/LICENSE +201 -0
- flyte/_api_commons.py +0 -3
- flyte/_cli/_common.py +0 -287
- flyte/_cli/_create.py +0 -42
- flyte/_cli/_delete.py +0 -23
- flyte/_cli/_deploy.py +0 -140
- flyte/_cli/_get.py +0 -235
- flyte/_cli/_run.py +0 -152
- flyte/_cli/main.py +0 -72
- flyte/_datastructures.py +0 -342
- flyte/_internal/controllers/pbhash.py +0 -39
- flyte/_protos/common/authorization_pb2.py +0 -66
- flyte/_protos/common/authorization_pb2.pyi +0 -108
- flyte/_protos/common/authorization_pb2_grpc.py +0 -4
- flyte/_protos/common/identifier_pb2.py +0 -71
- flyte/_protos/common/identifier_pb2.pyi +0 -82
- flyte/_protos/common/identifier_pb2_grpc.py +0 -4
- flyte/_protos/common/identity_pb2.py +0 -48
- flyte/_protos/common/identity_pb2.pyi +0 -72
- flyte/_protos/common/identity_pb2_grpc.py +0 -4
- flyte/_protos/common/list_pb2.py +0 -36
- flyte/_protos/common/list_pb2.pyi +0 -69
- flyte/_protos/common/list_pb2_grpc.py +0 -4
- flyte/_protos/common/policy_pb2.py +0 -37
- flyte/_protos/common/policy_pb2.pyi +0 -27
- flyte/_protos/common/policy_pb2_grpc.py +0 -4
- flyte/_protos/common/role_pb2.py +0 -37
- flyte/_protos/common/role_pb2.pyi +0 -53
- flyte/_protos/common/role_pb2_grpc.py +0 -4
- flyte/_protos/common/runtime_version_pb2.py +0 -28
- flyte/_protos/common/runtime_version_pb2.pyi +0 -24
- flyte/_protos/common/runtime_version_pb2_grpc.py +0 -4
- flyte/_protos/logs/dataplane/payload_pb2.py +0 -96
- flyte/_protos/logs/dataplane/payload_pb2.pyi +0 -168
- flyte/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
- flyte/_protos/secret/definition_pb2.py +0 -49
- flyte/_protos/secret/definition_pb2.pyi +0 -93
- flyte/_protos/secret/definition_pb2_grpc.py +0 -4
- flyte/_protos/secret/payload_pb2.py +0 -62
- flyte/_protos/secret/payload_pb2.pyi +0 -94
- flyte/_protos/secret/payload_pb2_grpc.py +0 -4
- flyte/_protos/secret/secret_pb2.py +0 -38
- flyte/_protos/secret/secret_pb2.pyi +0 -6
- flyte/_protos/secret/secret_pb2_grpc.py +0 -198
- flyte/_protos/secret/secret_pb2_grpc_grpc.py +0 -198
- flyte/_protos/validate/validate/validate_pb2.py +0 -76
- flyte/_protos/workflow/node_execution_service_pb2.py +0 -26
- flyte/_protos/workflow/node_execution_service_pb2.pyi +0 -4
- flyte/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
- flyte/_protos/workflow/queue_service_pb2.py +0 -106
- flyte/_protos/workflow/queue_service_pb2.pyi +0 -141
- flyte/_protos/workflow/queue_service_pb2_grpc.py +0 -172
- flyte/_protos/workflow/run_definition_pb2.py +0 -128
- flyte/_protos/workflow/run_definition_pb2.pyi +0 -310
- flyte/_protos/workflow/run_definition_pb2_grpc.py +0 -4
- flyte/_protos/workflow/run_logs_service_pb2.py +0 -41
- flyte/_protos/workflow/run_logs_service_pb2.pyi +0 -28
- flyte/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
- flyte/_protos/workflow/run_service_pb2.py +0 -133
- flyte/_protos/workflow/run_service_pb2.pyi +0 -175
- flyte/_protos/workflow/run_service_pb2_grpc.py +0 -412
- flyte/_protos/workflow/state_service_pb2.py +0 -58
- flyte/_protos/workflow/state_service_pb2.pyi +0 -71
- flyte/_protos/workflow/state_service_pb2_grpc.py +0 -138
- flyte/_protos/workflow/task_definition_pb2.py +0 -72
- flyte/_protos/workflow/task_definition_pb2.pyi +0 -65
- flyte/_protos/workflow/task_definition_pb2_grpc.py +0 -4
- flyte/_protos/workflow/task_service_pb2.py +0 -44
- flyte/_protos/workflow/task_service_pb2.pyi +0 -31
- flyte/_protos/workflow/task_service_pb2_grpc.py +0 -104
- flyte/io/_dataframe.py +0 -0
- flyte/io/pickle/__init__.py +0 -0
- flyte/remote/_console.py +0 -18
- flyte-0.0.1b0.dist-info/METADATA +0 -179
- flyte-0.0.1b0.dist-info/RECORD +0 -390
- flyte-0.0.1b0.dist-info/entry_points.txt +0 -3
- union/__init__.py +0 -54
- union/_api_commons.py +0 -3
- union/_bin/__init__.py +0 -0
- union/_bin/runtime.py +0 -113
- union/_build.py +0 -25
- union/_cache/__init__.py +0 -12
- union/_cache/cache.py +0 -141
- union/_cache/defaults.py +0 -9
- union/_cache/policy_function_body.py +0 -42
- union/_cli/__init__.py +0 -0
- union/_cli/_common.py +0 -263
- union/_cli/_create.py +0 -40
- union/_cli/_delete.py +0 -23
- union/_cli/_deploy.py +0 -120
- union/_cli/_get.py +0 -162
- union/_cli/_run.py +0 -150
- union/_cli/main.py +0 -72
- union/_code_bundle/__init__.py +0 -8
- union/_code_bundle/_ignore.py +0 -113
- union/_code_bundle/_packaging.py +0 -187
- union/_code_bundle/_utils.py +0 -342
- union/_code_bundle/bundle.py +0 -176
- union/_context.py +0 -146
- union/_datastructures.py +0 -295
- union/_deploy.py +0 -185
- union/_doc.py +0 -29
- union/_docstring.py +0 -26
- union/_environment.py +0 -43
- union/_group.py +0 -31
- union/_hash.py +0 -23
- union/_image.py +0 -760
- union/_initialize.py +0 -585
- union/_interface.py +0 -84
- union/_internal/__init__.py +0 -3
- union/_internal/controllers/__init__.py +0 -77
- union/_internal/controllers/_local_controller.py +0 -77
- union/_internal/controllers/pbhash.py +0 -39
- union/_internal/controllers/remote/__init__.py +0 -40
- union/_internal/controllers/remote/_action.py +0 -131
- union/_internal/controllers/remote/_client.py +0 -43
- union/_internal/controllers/remote/_controller.py +0 -169
- union/_internal/controllers/remote/_core.py +0 -341
- union/_internal/controllers/remote/_informer.py +0 -260
- union/_internal/controllers/remote/_service_protocol.py +0 -44
- union/_internal/imagebuild/__init__.py +0 -11
- union/_internal/imagebuild/docker_builder.py +0 -416
- union/_internal/imagebuild/image_builder.py +0 -243
- union/_internal/imagebuild/remote_builder.py +0 -0
- union/_internal/resolvers/__init__.py +0 -0
- union/_internal/resolvers/_task_module.py +0 -31
- union/_internal/resolvers/common.py +0 -24
- union/_internal/resolvers/default.py +0 -27
- union/_internal/runtime/__init__.py +0 -0
- union/_internal/runtime/convert.py +0 -163
- union/_internal/runtime/entrypoints.py +0 -121
- union/_internal/runtime/io.py +0 -136
- union/_internal/runtime/resources_serde.py +0 -134
- union/_internal/runtime/task_serde.py +0 -202
- union/_internal/runtime/taskrunner.py +0 -179
- union/_internal/runtime/types_serde.py +0 -53
- union/_logging.py +0 -124
- union/_protos/__init__.py +0 -0
- union/_protos/common/authorization_pb2.py +0 -66
- union/_protos/common/authorization_pb2.pyi +0 -106
- union/_protos/common/authorization_pb2_grpc.py +0 -4
- union/_protos/common/identifier_pb2.py +0 -71
- union/_protos/common/identifier_pb2.pyi +0 -82
- union/_protos/common/identifier_pb2_grpc.py +0 -4
- union/_protos/common/identity_pb2.py +0 -48
- union/_protos/common/identity_pb2.pyi +0 -72
- union/_protos/common/identity_pb2_grpc.py +0 -4
- union/_protos/common/list_pb2.py +0 -36
- union/_protos/common/list_pb2.pyi +0 -69
- union/_protos/common/list_pb2_grpc.py +0 -4
- union/_protos/common/policy_pb2.py +0 -37
- union/_protos/common/policy_pb2.pyi +0 -27
- union/_protos/common/policy_pb2_grpc.py +0 -4
- union/_protos/common/role_pb2.py +0 -37
- union/_protos/common/role_pb2.pyi +0 -51
- union/_protos/common/role_pb2_grpc.py +0 -4
- union/_protos/common/runtime_version_pb2.py +0 -28
- union/_protos/common/runtime_version_pb2.pyi +0 -24
- union/_protos/common/runtime_version_pb2_grpc.py +0 -4
- union/_protos/logs/dataplane/payload_pb2.py +0 -96
- union/_protos/logs/dataplane/payload_pb2.pyi +0 -168
- union/_protos/logs/dataplane/payload_pb2_grpc.py +0 -4
- union/_protos/secret/definition_pb2.py +0 -49
- union/_protos/secret/definition_pb2.pyi +0 -93
- union/_protos/secret/definition_pb2_grpc.py +0 -4
- union/_protos/secret/payload_pb2.py +0 -62
- union/_protos/secret/payload_pb2.pyi +0 -94
- union/_protos/secret/payload_pb2_grpc.py +0 -4
- union/_protos/secret/secret_pb2.py +0 -38
- union/_protos/secret/secret_pb2.pyi +0 -6
- union/_protos/secret/secret_pb2_grpc.py +0 -198
- union/_protos/validate/validate/validate_pb2.py +0 -76
- union/_protos/workflow/node_execution_service_pb2.py +0 -26
- union/_protos/workflow/node_execution_service_pb2.pyi +0 -4
- union/_protos/workflow/node_execution_service_pb2_grpc.py +0 -32
- union/_protos/workflow/queue_service_pb2.py +0 -75
- union/_protos/workflow/queue_service_pb2.pyi +0 -103
- union/_protos/workflow/queue_service_pb2_grpc.py +0 -172
- union/_protos/workflow/run_definition_pb2.py +0 -100
- union/_protos/workflow/run_definition_pb2.pyi +0 -256
- union/_protos/workflow/run_definition_pb2_grpc.py +0 -4
- union/_protos/workflow/run_logs_service_pb2.py +0 -41
- union/_protos/workflow/run_logs_service_pb2.pyi +0 -28
- union/_protos/workflow/run_logs_service_pb2_grpc.py +0 -69
- union/_protos/workflow/run_service_pb2.py +0 -133
- union/_protos/workflow/run_service_pb2.pyi +0 -173
- union/_protos/workflow/run_service_pb2_grpc.py +0 -412
- union/_protos/workflow/state_service_pb2.py +0 -58
- union/_protos/workflow/state_service_pb2.pyi +0 -69
- union/_protos/workflow/state_service_pb2_grpc.py +0 -138
- union/_protos/workflow/task_definition_pb2.py +0 -72
- union/_protos/workflow/task_definition_pb2.pyi +0 -65
- union/_protos/workflow/task_definition_pb2_grpc.py +0 -4
- union/_protos/workflow/task_service_pb2.py +0 -44
- union/_protos/workflow/task_service_pb2.pyi +0 -31
- union/_protos/workflow/task_service_pb2_grpc.py +0 -104
- union/_resources.py +0 -226
- union/_retry.py +0 -32
- union/_reusable_environment.py +0 -25
- union/_run.py +0 -374
- union/_secret.py +0 -61
- union/_task.py +0 -354
- union/_task_environment.py +0 -186
- union/_timeout.py +0 -47
- union/_tools.py +0 -27
- union/_utils/__init__.py +0 -11
- union/_utils/asyn.py +0 -119
- union/_utils/file_handling.py +0 -71
- union/_utils/helpers.py +0 -46
- union/_utils/lazy_module.py +0 -54
- union/_utils/uv_script_parser.py +0 -49
- union/_version.py +0 -21
- union/connectors/__init__.py +0 -0
- union/errors.py +0 -128
- union/extras/__init__.py +0 -5
- union/extras/_container.py +0 -263
- union/io/__init__.py +0 -11
- union/io/_dataframe.py +0 -0
- union/io/_dir.py +0 -425
- union/io/_file.py +0 -418
- union/io/pickle/__init__.py +0 -0
- union/io/pickle/transformer.py +0 -117
- union/io/structured_dataset/__init__.py +0 -122
- union/io/structured_dataset/basic_dfs.py +0 -219
- union/io/structured_dataset/structured_dataset.py +0 -1057
- union/py.typed +0 -0
- union/remote/__init__.py +0 -23
- union/remote/_client/__init__.py +0 -0
- union/remote/_client/_protocols.py +0 -129
- union/remote/_client/auth/__init__.py +0 -12
- union/remote/_client/auth/_authenticators/__init__.py +0 -0
- union/remote/_client/auth/_authenticators/base.py +0 -391
- union/remote/_client/auth/_authenticators/client_credentials.py +0 -73
- union/remote/_client/auth/_authenticators/device_code.py +0 -120
- union/remote/_client/auth/_authenticators/external_command.py +0 -77
- union/remote/_client/auth/_authenticators/factory.py +0 -200
- union/remote/_client/auth/_authenticators/pkce.py +0 -515
- union/remote/_client/auth/_channel.py +0 -184
- union/remote/_client/auth/_client_config.py +0 -83
- union/remote/_client/auth/_default_html.py +0 -32
- union/remote/_client/auth/_grpc_utils/__init__.py +0 -0
- union/remote/_client/auth/_grpc_utils/auth_interceptor.py +0 -204
- union/remote/_client/auth/_grpc_utils/default_metadata_interceptor.py +0 -144
- union/remote/_client/auth/_keyring.py +0 -154
- union/remote/_client/auth/_token_client.py +0 -258
- union/remote/_client/auth/errors.py +0 -16
- union/remote/_client/controlplane.py +0 -86
- union/remote/_data.py +0 -149
- union/remote/_logs.py +0 -74
- union/remote/_project.py +0 -86
- union/remote/_run.py +0 -820
- union/remote/_secret.py +0 -132
- union/remote/_task.py +0 -193
- union/report/__init__.py +0 -3
- union/report/_report.py +0 -178
- union/report/_template.html +0 -124
- union/storage/__init__.py +0 -24
- union/storage/_remote_fs.py +0 -34
- union/storage/_storage.py +0 -247
- union/storage/_utils.py +0 -5
- union/types/__init__.py +0 -11
- union/types/_renderer.py +0 -162
- union/types/_string_literals.py +0 -120
- union/types/_type_engine.py +0 -2131
- union/types/_utils.py +0 -80
- /flyte/{_cli → _debug}/__init__.py +0 -0
- /flyte/{_protos → _keyring}/__init__.py +0 -0
- {flyte-0.0.1b0.dist-info → flyte-2.0.0b46.dist-info}/WHEEL +0 -0
- {flyte-0.0.1b0.dist-info → flyte-2.0.0b46.dist-info}/top_level.txt +0 -0
flyte/io/_file.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import inspect
|
|
3
4
|
import os
|
|
5
|
+
import typing
|
|
4
6
|
from contextlib import asynccontextmanager, contextmanager
|
|
5
7
|
from pathlib import Path
|
|
6
8
|
from typing import (
|
|
7
9
|
IO,
|
|
10
|
+
Annotated,
|
|
8
11
|
Any,
|
|
9
12
|
AsyncGenerator,
|
|
10
13
|
Dict,
|
|
@@ -17,85 +20,165 @@ from typing import (
|
|
|
17
20
|
)
|
|
18
21
|
|
|
19
22
|
import aiofiles
|
|
20
|
-
from
|
|
21
|
-
from fsspec.asyn import AsyncFileSystem
|
|
23
|
+
from flyteidl2.core import literals_pb2, types_pb2
|
|
22
24
|
from fsspec.utils import get_protocol
|
|
23
25
|
from mashumaro.types import SerializableType
|
|
24
|
-
from pydantic import BaseModel, model_validator
|
|
25
|
-
from
|
|
26
|
+
from pydantic import BaseModel, Field, model_validator
|
|
27
|
+
from pydantic.json_schema import SkipJsonSchema
|
|
26
28
|
|
|
29
|
+
import flyte.errors
|
|
27
30
|
import flyte.storage as storage
|
|
28
31
|
from flyte._context import internal_ctx
|
|
29
32
|
from flyte._initialize import requires_initialization
|
|
30
|
-
from flyte.
|
|
33
|
+
from flyte.io._hashing_io import AsyncHashingReader, HashingWriter, HashMethod, PrecomputedValue
|
|
31
34
|
from flyte.types import TypeEngine, TypeTransformer, TypeTransformerFailedError
|
|
32
35
|
|
|
36
|
+
if typing.TYPE_CHECKING:
|
|
37
|
+
from obstore import AsyncReadableFile, AsyncWritableFile
|
|
38
|
+
|
|
39
|
+
if typing.TYPE_CHECKING:
|
|
40
|
+
from obstore import AsyncReadableFile, AsyncWritableFile
|
|
41
|
+
|
|
33
42
|
# Type variable for the file format
|
|
34
43
|
T = TypeVar("T")
|
|
35
44
|
|
|
36
|
-
synced = Synchronizer()
|
|
37
|
-
|
|
38
45
|
|
|
39
46
|
class File(BaseModel, Generic[T], SerializableType):
|
|
40
47
|
"""
|
|
41
48
|
A generic file class representing a file with a specified format.
|
|
42
|
-
Provides both async and sync interfaces for file operations.
|
|
43
|
-
|
|
49
|
+
Provides both async and sync interfaces for file operations. All methods without _sync suffix are async.
|
|
50
|
+
|
|
51
|
+
The class should be instantiated using one of the class methods. The constructor should be used only to
|
|
52
|
+
instantiate references to existing remote objects.
|
|
44
53
|
|
|
45
54
|
The generic type T represents the format of the file.
|
|
46
55
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
from pandas import DataFrame
|
|
51
|
-
csv_file = File[DataFrame](path="s3://my-bucket/data.csv")
|
|
56
|
+
Important methods:
|
|
57
|
+
- `from_existing_remote`: Create a File object from an existing remote file.
|
|
58
|
+
- `new_remote`: Create a new File reference for a remote file that will be written to.
|
|
52
59
|
|
|
53
|
-
|
|
54
|
-
|
|
60
|
+
**Asynchronous methods**:
|
|
61
|
+
- `open`: Asynchronously open the file and return a file-like object.
|
|
62
|
+
- `download`: Asynchronously download the file to a local path.
|
|
63
|
+
- `from_local`: Asynchronously create a File object from a local file, uploading it to remote storage.
|
|
64
|
+
- `exists`: Asynchronously check if the file exists.
|
|
55
65
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
66
|
+
**Synchronous methods** (suffixed with `_sync`):
|
|
67
|
+
- `open_sync`: Synchronously open the file and return a file-like object.
|
|
68
|
+
- `download_sync`: Synchronously download the file to a local path.
|
|
69
|
+
- `from_local_sync`: Synchronously create a File object from a local file, uploading it to remote storage.
|
|
70
|
+
- `exists_sync`: Synchronously check if the file exists.
|
|
60
71
|
|
|
61
|
-
Example: Read a file input in a Task.
|
|
62
|
-
|
|
72
|
+
Example: Read a file input in a Task (Async).
|
|
73
|
+
|
|
74
|
+
```python
|
|
63
75
|
@env.task
|
|
64
|
-
async def
|
|
65
|
-
async with file.open() as f:
|
|
66
|
-
|
|
76
|
+
async def read_file(file: File) -> str:
|
|
77
|
+
async with file.open("rb") as f:
|
|
78
|
+
content = bytes(await f.read())
|
|
79
|
+
return content.decode("utf-8")
|
|
67
80
|
```
|
|
68
81
|
|
|
69
|
-
Example:
|
|
82
|
+
Example: Read a file input in a Task (Sync).
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
@env.task
|
|
86
|
+
def read_file_sync(file: File) -> str:
|
|
87
|
+
with file.open_sync("rb") as f:
|
|
88
|
+
content = f.read()
|
|
89
|
+
return content.decode("utf-8")
|
|
70
90
|
```
|
|
91
|
+
|
|
92
|
+
Example: Write a file by streaming it directly to blob storage (Async).
|
|
93
|
+
|
|
94
|
+
```python
|
|
71
95
|
@env.task
|
|
72
|
-
async def
|
|
73
|
-
df = pd.DataFrame(...)
|
|
96
|
+
async def write_file() -> File:
|
|
74
97
|
file = File.new_remote()
|
|
75
98
|
async with file.open("wb") as f:
|
|
76
|
-
|
|
77
|
-
# No additional uploading will be done here.
|
|
99
|
+
await f.write(b"Hello, World!")
|
|
78
100
|
return file
|
|
79
101
|
```
|
|
80
|
-
|
|
102
|
+
|
|
103
|
+
Example: Upload a local file to remote storage (Async).
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
@env.task
|
|
107
|
+
async def upload_file() -> File:
|
|
108
|
+
# Write to local file first
|
|
109
|
+
with open("/tmp/data.csv", "w") as f:
|
|
110
|
+
f.write("col1,col2\\n1,2\\n3,4\\n")
|
|
111
|
+
# Upload to remote storage
|
|
112
|
+
return await File.from_local("/tmp/data.csv")
|
|
81
113
|
```
|
|
114
|
+
|
|
115
|
+
Example: Upload a local file to remote storage (Sync).
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
@env.task
|
|
119
|
+
def upload_file_sync() -> File:
|
|
120
|
+
# Write to local file first
|
|
121
|
+
with open("/tmp/data.csv", "w") as f:
|
|
122
|
+
f.write("col1,col2\\n1,2\\n3,4\\n")
|
|
123
|
+
# Upload to remote storage
|
|
124
|
+
return File.from_local_sync("/tmp/data.csv")
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Example: Download a file to local storage (Async).
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
@env.task
|
|
131
|
+
async def download_file(file: File) -> str:
|
|
132
|
+
local_path = await file.download()
|
|
133
|
+
# Process the local file
|
|
134
|
+
with open(local_path, "r") as f:
|
|
135
|
+
return f.read()
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Example: Download a file to local storage (Sync).
|
|
139
|
+
|
|
140
|
+
```python
|
|
82
141
|
@env.task
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
142
|
+
def download_file_sync(file: File) -> str:
|
|
143
|
+
local_path = file.download_sync()
|
|
144
|
+
# Process the local file
|
|
145
|
+
with open(local_path, "r") as f:
|
|
146
|
+
return f.read()
|
|
86
147
|
```
|
|
87
148
|
|
|
88
|
-
Example:
|
|
149
|
+
Example: Reference an existing remote file.
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
@env.task
|
|
153
|
+
async def process_existing_file() -> str:
|
|
154
|
+
file = File.from_existing_remote("s3://my-bucket/data.csv")
|
|
155
|
+
async with file.open("rb") as f:
|
|
156
|
+
content = await f.read()
|
|
157
|
+
return content.decode("utf-8")
|
|
89
158
|
```
|
|
159
|
+
|
|
160
|
+
Example: Check if a file exists (Async).
|
|
161
|
+
|
|
162
|
+
```python
|
|
90
163
|
@env.task
|
|
91
|
-
async def
|
|
92
|
-
return
|
|
164
|
+
async def check_file(file: File) -> bool:
|
|
165
|
+
return await file.exists()
|
|
93
166
|
```
|
|
94
167
|
|
|
95
|
-
Example:
|
|
168
|
+
Example: Check if a file exists (Sync).
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
@env.task
|
|
172
|
+
def check_file_sync(file: File) -> bool:
|
|
173
|
+
return file.exists_sync()
|
|
96
174
|
```
|
|
175
|
+
|
|
176
|
+
Example: Pass through a file without copying.
|
|
177
|
+
|
|
178
|
+
```python
|
|
97
179
|
@env.task
|
|
98
|
-
async def
|
|
180
|
+
async def pass_through(file: File) -> File:
|
|
181
|
+
# No copy occurs - just passes the reference
|
|
99
182
|
return file
|
|
100
183
|
```
|
|
101
184
|
|
|
@@ -107,6 +190,8 @@ class File(BaseModel, Generic[T], SerializableType):
|
|
|
107
190
|
path: str
|
|
108
191
|
name: Optional[str] = None
|
|
109
192
|
format: str = ""
|
|
193
|
+
hash: Optional[str] = None
|
|
194
|
+
hash_method: Annotated[Optional[HashMethod], Field(default=None, exclude=True), SkipJsonSchema()] = None
|
|
110
195
|
|
|
111
196
|
class Config:
|
|
112
197
|
arbitrary_types_allowed = True
|
|
@@ -114,20 +199,24 @@ class File(BaseModel, Generic[T], SerializableType):
|
|
|
114
199
|
@model_validator(mode="before")
|
|
115
200
|
@classmethod
|
|
116
201
|
def pre_init(cls, data):
|
|
202
|
+
"""Internal: Pydantic validator to set default name from path. Not intended for direct use."""
|
|
117
203
|
if data.get("name") is None:
|
|
118
204
|
data["name"] = Path(data["path"]).name
|
|
119
205
|
return data
|
|
120
206
|
|
|
121
207
|
def _serialize(self) -> Dict[str, Optional[str]]:
|
|
208
|
+
"""Internal: Serialize File to dictionary. Not intended for direct use."""
|
|
122
209
|
pyd_dump = self.model_dump()
|
|
123
210
|
return pyd_dump
|
|
124
211
|
|
|
125
212
|
@classmethod
|
|
126
213
|
def _deserialize(cls, file_dump: Dict[str, Optional[str]]) -> File:
|
|
214
|
+
"""Internal: Deserialize File from dictionary. Not intended for direct use."""
|
|
127
215
|
return File.model_validate(file_dump)
|
|
128
216
|
|
|
129
217
|
@classmethod
|
|
130
218
|
def schema_match(cls, incoming: dict):
|
|
219
|
+
"""Internal: Check if incoming schema matches File schema. Not intended for direct use."""
|
|
131
220
|
this_schema = cls.model_json_schema()
|
|
132
221
|
current_required = this_schema.get("required")
|
|
133
222
|
incoming_required = incoming.get("required")
|
|
@@ -142,41 +231,69 @@ class File(BaseModel, Generic[T], SerializableType):
|
|
|
142
231
|
|
|
143
232
|
@classmethod
|
|
144
233
|
@requires_initialization
|
|
145
|
-
def new_remote(cls) -> File[T]:
|
|
234
|
+
def new_remote(cls, file_name: Optional[str] = None, hash_method: Optional[HashMethod | str] = None) -> File[T]:
|
|
146
235
|
"""
|
|
147
236
|
Create a new File reference for a remote file that will be written to.
|
|
148
237
|
|
|
149
|
-
|
|
150
|
-
|
|
238
|
+
Use this when you want to create a new file and write to it directly without creating a local file first.
|
|
239
|
+
|
|
240
|
+
Example (Async):
|
|
241
|
+
|
|
242
|
+
```python
|
|
151
243
|
@env.task
|
|
152
|
-
async def
|
|
153
|
-
df = pd.DataFrame(
|
|
244
|
+
async def create_csv() -> File:
|
|
245
|
+
df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
|
|
154
246
|
file = File.new_remote()
|
|
155
247
|
async with file.open("wb") as f:
|
|
156
248
|
df.to_csv(f)
|
|
157
249
|
return file
|
|
158
250
|
```
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
file_name: Optional string specifying a remote file name. If not set,
|
|
254
|
+
a generated file name will be returned.
|
|
255
|
+
hash_method: Optional HashMethod or string to use for cache key computation. If a string is provided,
|
|
256
|
+
it will be used as a precomputed cache key. If a HashMethod is provided, it will be used
|
|
257
|
+
to compute the hash as data is written.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
A new File instance with a generated remote path
|
|
159
261
|
"""
|
|
160
262
|
ctx = internal_ctx()
|
|
263
|
+
known_cache_key = hash_method if isinstance(hash_method, str) else None
|
|
264
|
+
method = hash_method if isinstance(hash_method, HashMethod) else None
|
|
161
265
|
|
|
162
|
-
return cls(
|
|
266
|
+
return cls(
|
|
267
|
+
path=ctx.raw_data.get_random_remote_path(file_name=file_name), hash=known_cache_key, hash_method=method
|
|
268
|
+
)
|
|
163
269
|
|
|
164
270
|
@classmethod
|
|
165
|
-
def from_existing_remote(cls, remote_path: str) -> File[T]:
|
|
271
|
+
def from_existing_remote(cls, remote_path: str, file_cache_key: Optional[str] = None) -> File[T]:
|
|
166
272
|
"""
|
|
167
273
|
Create a File reference from an existing remote file.
|
|
168
274
|
|
|
275
|
+
Use this when you want to reference a file that already exists in remote storage without uploading it.
|
|
276
|
+
|
|
169
277
|
Example:
|
|
278
|
+
|
|
170
279
|
```python
|
|
171
280
|
@env.task
|
|
172
|
-
async def
|
|
173
|
-
|
|
281
|
+
async def process_existing_file() -> str:
|
|
282
|
+
file = File.from_existing_remote("s3://my-bucket/data.csv")
|
|
283
|
+
async with file.open("rb") as f:
|
|
284
|
+
content = await f.read()
|
|
285
|
+
return content.decode("utf-8")
|
|
174
286
|
```
|
|
175
287
|
|
|
176
288
|
Args:
|
|
177
289
|
remote_path: The remote path to the existing file
|
|
290
|
+
file_cache_key: Optional hash value to use for cache key computation. If not specified, the cache key
|
|
291
|
+
will be computed based on the file's attributes (path, name, format).
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
A new File instance pointing to the existing remote file
|
|
178
295
|
"""
|
|
179
|
-
return cls(path=remote_path)
|
|
296
|
+
return cls(path=remote_path, hash=file_cache_key)
|
|
180
297
|
|
|
181
298
|
@asynccontextmanager
|
|
182
299
|
async def open(
|
|
@@ -187,82 +304,129 @@ class File(BaseModel, Generic[T], SerializableType):
|
|
|
187
304
|
cache_options: Optional[dict] = None,
|
|
188
305
|
compression: Optional[str] = None,
|
|
189
306
|
**kwargs,
|
|
190
|
-
) -> AsyncGenerator[
|
|
307
|
+
) -> AsyncGenerator[Union[AsyncWritableFile, AsyncReadableFile, "HashingWriter"], None]:
|
|
191
308
|
"""
|
|
192
309
|
Asynchronously open the file and return a file-like object.
|
|
193
310
|
|
|
311
|
+
Use this method in async tasks to read from or write to files directly.
|
|
312
|
+
|
|
313
|
+
Example (Async Read):
|
|
314
|
+
|
|
315
|
+
```python
|
|
316
|
+
@env.task
|
|
317
|
+
async def read_file(f: File) -> str:
|
|
318
|
+
async with f.open("rb") as fh:
|
|
319
|
+
content = bytes(await fh.read())
|
|
320
|
+
return content.decode("utf-8")
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
Example (Async Write):
|
|
324
|
+
|
|
325
|
+
```python
|
|
326
|
+
@env.task
|
|
327
|
+
async def write_file() -> File:
|
|
328
|
+
f = File.new_remote()
|
|
329
|
+
async with f.open("wb") as fh:
|
|
330
|
+
await fh.write(b"Hello, World!")
|
|
331
|
+
return f
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
Example (Streaming Read):
|
|
335
|
+
|
|
336
|
+
```python
|
|
337
|
+
@env.task
|
|
338
|
+
async def stream_read(f: File) -> str:
|
|
339
|
+
content_parts = []
|
|
340
|
+
async with f.open("rb", block_size=1024) as fh:
|
|
341
|
+
while True:
|
|
342
|
+
chunk = await fh.read()
|
|
343
|
+
if not chunk:
|
|
344
|
+
break
|
|
345
|
+
content_parts.append(chunk)
|
|
346
|
+
return b"".join(content_parts).decode("utf-8")
|
|
347
|
+
```
|
|
348
|
+
|
|
194
349
|
Args:
|
|
195
|
-
mode: The mode to open the file in (default: 'rb')
|
|
196
|
-
|
|
350
|
+
mode: The mode to open the file in (default: 'rb'). Common modes: 'rb' (read binary),
|
|
351
|
+
'wb' (write binary), 'rt' (read text), 'wt' (write text)
|
|
352
|
+
block_size: Size of blocks for reading in bytes. Useful for streaming large files.
|
|
197
353
|
cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
|
|
198
354
|
cache_options: Dictionary of options for the cache
|
|
199
355
|
compression: Compression format or None for auto-detection
|
|
200
356
|
**kwargs: Additional arguments passed to fsspec's open method
|
|
201
357
|
|
|
202
358
|
Returns:
|
|
203
|
-
An async file-like object
|
|
204
|
-
|
|
205
|
-
Example:
|
|
206
|
-
```python
|
|
207
|
-
async with file.open('rb') as f:
|
|
208
|
-
data = await f.read()
|
|
209
|
-
```
|
|
359
|
+
An async file-like object that can be used with async read/write operations
|
|
210
360
|
"""
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
if block_size:
|
|
223
|
-
open_kwargs["block_size"] = block_size
|
|
224
|
-
|
|
225
|
-
# Apply caching strategy
|
|
226
|
-
if cache_type != "none":
|
|
227
|
-
open_kwargs["cache_type"] = cache_type
|
|
228
|
-
open_kwargs["cache_options"] = cache_options
|
|
229
|
-
|
|
230
|
-
# Use aiofiles for local files
|
|
231
|
-
if fs.protocol == "file":
|
|
232
|
-
async with aiofiles.open(self.path, mode=mode, **kwargs) as f:
|
|
233
|
-
yield f
|
|
234
|
-
else:
|
|
235
|
-
# This code is broadly similar to what storage.get_stream does, but without actually reading from the stream
|
|
236
|
-
file_handle = None
|
|
361
|
+
# Check if we should use obstore bypass
|
|
362
|
+
try:
|
|
363
|
+
fh = await storage.open(
|
|
364
|
+
self.path,
|
|
365
|
+
mode=mode,
|
|
366
|
+
cache_type=cache_type,
|
|
367
|
+
cache_options=cache_options,
|
|
368
|
+
compression=compression,
|
|
369
|
+
block_size=block_size,
|
|
370
|
+
**kwargs,
|
|
371
|
+
)
|
|
237
372
|
try:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
yield file_handle
|
|
241
|
-
return
|
|
242
|
-
except NotImplementedError:
|
|
243
|
-
logger.debug(f"{fs} doesn't implement 'open_async', falling back to sync")
|
|
373
|
+
yield fh
|
|
374
|
+
return
|
|
244
375
|
finally:
|
|
245
|
-
if
|
|
246
|
-
|
|
376
|
+
if inspect.iscoroutinefunction(fh.close):
|
|
377
|
+
await fh.close()
|
|
378
|
+
else:
|
|
379
|
+
fh.close()
|
|
380
|
+
except flyte.errors.OnlyAsyncIOSupportedError:
|
|
381
|
+
# Fall back to aiofiles
|
|
382
|
+
fs = storage.get_underlying_filesystem(path=self.path)
|
|
383
|
+
if "file" in fs.protocol:
|
|
384
|
+
async with aiofiles.open(self.path, mode=mode, **kwargs) as f:
|
|
385
|
+
yield f
|
|
386
|
+
return
|
|
387
|
+
raise
|
|
388
|
+
|
|
389
|
+
async def exists(self) -> bool:
|
|
390
|
+
"""
|
|
391
|
+
Asynchronously check if the file exists.
|
|
247
392
|
|
|
248
|
-
|
|
249
|
-
|
|
393
|
+
Example (Async):
|
|
394
|
+
|
|
395
|
+
```python
|
|
396
|
+
@env.task
|
|
397
|
+
async def check_file(f: File) -> bool:
|
|
398
|
+
if await f.exists():
|
|
399
|
+
print("File exists!")
|
|
400
|
+
return True
|
|
401
|
+
return False
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
True if the file exists, False otherwise
|
|
406
|
+
"""
|
|
407
|
+
return await storage.exists(self.path)
|
|
250
408
|
|
|
251
409
|
def exists_sync(self) -> bool:
|
|
252
410
|
"""
|
|
253
411
|
Synchronously check if the file exists.
|
|
254
412
|
|
|
413
|
+
Use this in non-async tasks or when you need synchronous file existence checking.
|
|
414
|
+
|
|
415
|
+
Example (Sync):
|
|
416
|
+
|
|
417
|
+
```python
|
|
418
|
+
@env.task
|
|
419
|
+
def check_file_sync(f: File) -> bool:
|
|
420
|
+
if f.exists_sync():
|
|
421
|
+
print("File exists!")
|
|
422
|
+
return True
|
|
423
|
+
return False
|
|
424
|
+
```
|
|
425
|
+
|
|
255
426
|
Returns:
|
|
256
427
|
True if the file exists, False otherwise
|
|
257
|
-
|
|
258
|
-
Example:
|
|
259
|
-
```python
|
|
260
|
-
if file.exists_sync():
|
|
261
|
-
# Process the file
|
|
262
|
-
```
|
|
263
428
|
"""
|
|
264
|
-
|
|
265
|
-
return fs.exists(self.path)
|
|
429
|
+
return storage.exists_sync(self.path)
|
|
266
430
|
|
|
267
431
|
@contextmanager
|
|
268
432
|
def open_sync(
|
|
@@ -273,26 +437,44 @@ class File(BaseModel, Generic[T], SerializableType):
|
|
|
273
437
|
cache_options: Optional[dict] = None,
|
|
274
438
|
compression: Optional[str] = None,
|
|
275
439
|
**kwargs,
|
|
276
|
-
) -> Generator[IO[Any]]:
|
|
440
|
+
) -> Generator[IO[Any], None, None]:
|
|
277
441
|
"""
|
|
278
442
|
Synchronously open the file and return a file-like object.
|
|
279
443
|
|
|
444
|
+
Use this method in non-async tasks to read from or write to files directly.
|
|
445
|
+
|
|
446
|
+
Example (Sync Read):
|
|
447
|
+
|
|
448
|
+
```python
|
|
449
|
+
@env.task
|
|
450
|
+
def read_file_sync(f: File) -> str:
|
|
451
|
+
with f.open_sync("rb") as fh:
|
|
452
|
+
content = fh.read()
|
|
453
|
+
return content.decode("utf-8")
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
Example (Sync Write):
|
|
457
|
+
|
|
458
|
+
```python
|
|
459
|
+
@env.task
|
|
460
|
+
def write_file_sync() -> File:
|
|
461
|
+
f = File.new_remote()
|
|
462
|
+
with f.open_sync("wb") as fh:
|
|
463
|
+
fh.write(b"Hello, World!")
|
|
464
|
+
return f
|
|
465
|
+
```
|
|
466
|
+
|
|
280
467
|
Args:
|
|
281
|
-
mode: The mode to open the file in (default: 'rb')
|
|
282
|
-
|
|
468
|
+
mode: The mode to open the file in (default: 'rb'). Common modes: 'rb' (read binary),
|
|
469
|
+
'wb' (write binary), 'rt' (read text), 'wt' (write text)
|
|
470
|
+
block_size: Size of blocks for reading in bytes. Useful for streaming large files.
|
|
283
471
|
cache_type: Caching mechanism to use ('readahead', 'mmap', 'bytes', 'none')
|
|
284
472
|
cache_options: Dictionary of options for the cache
|
|
285
473
|
compression: Compression format or None for auto-detection
|
|
286
474
|
**kwargs: Additional arguments passed to fsspec's open method
|
|
287
475
|
|
|
288
476
|
Returns:
|
|
289
|
-
A file-like object
|
|
290
|
-
|
|
291
|
-
Example:
|
|
292
|
-
```python
|
|
293
|
-
with file.open_sync('rb') as f:
|
|
294
|
-
data = f.read()
|
|
295
|
-
```
|
|
477
|
+
A file-like object that can be used with standard read/write operations
|
|
296
478
|
"""
|
|
297
479
|
fs = storage.get_underlying_filesystem(path=self.path)
|
|
298
480
|
|
|
@@ -314,59 +496,193 @@ class File(BaseModel, Generic[T], SerializableType):
|
|
|
314
496
|
with fs.open(self.path, **open_kwargs) as f:
|
|
315
497
|
yield f
|
|
316
498
|
|
|
317
|
-
#
|
|
499
|
+
# TODO sync needs to be implemented
|
|
318
500
|
async def download(self, local_path: Optional[Union[str, Path]] = None) -> str:
|
|
319
501
|
"""
|
|
320
502
|
Asynchronously download the file to a local path.
|
|
321
503
|
|
|
504
|
+
Use this when you need to download a remote file to your local filesystem for processing.
|
|
505
|
+
|
|
506
|
+
Example (Async):
|
|
507
|
+
|
|
508
|
+
```python
|
|
509
|
+
@env.task
|
|
510
|
+
async def download_and_process(f: File) -> str:
|
|
511
|
+
local_path = await f.download()
|
|
512
|
+
# Now process the local file
|
|
513
|
+
with open(local_path, "r") as fh:
|
|
514
|
+
return fh.read()
|
|
515
|
+
```
|
|
516
|
+
|
|
517
|
+
Example (Download to specific path):
|
|
518
|
+
|
|
519
|
+
```python
|
|
520
|
+
@env.task
|
|
521
|
+
async def download_to_path(f: File) -> str:
|
|
522
|
+
local_path = await f.download("/tmp/myfile.csv")
|
|
523
|
+
return local_path
|
|
524
|
+
```
|
|
525
|
+
|
|
322
526
|
Args:
|
|
323
527
|
local_path: The local path to download the file to. If None, a temporary
|
|
324
|
-
directory will be used.
|
|
528
|
+
directory will be used and a path will be generated.
|
|
325
529
|
|
|
326
530
|
Returns:
|
|
327
|
-
The path to the downloaded file
|
|
328
|
-
|
|
329
|
-
Example:
|
|
330
|
-
```python
|
|
331
|
-
local_file = await file.download('/tmp/myfile.csv')
|
|
332
|
-
```
|
|
531
|
+
The absolute path to the downloaded file
|
|
333
532
|
"""
|
|
334
533
|
if local_path is None:
|
|
335
|
-
local_path = storage.get_random_local_path(file_path_or_file_name=
|
|
534
|
+
local_path = storage.get_random_local_path(file_path_or_file_name=self.path)
|
|
336
535
|
else:
|
|
536
|
+
# Preserve trailing separator if present (Path.absolute() strips it)
|
|
537
|
+
local_path_str = str(local_path)
|
|
538
|
+
has_trailing_sep = local_path_str.endswith(os.sep)
|
|
337
539
|
local_path = str(Path(local_path).absolute())
|
|
540
|
+
if has_trailing_sep:
|
|
541
|
+
local_path = local_path + os.sep
|
|
338
542
|
|
|
339
543
|
fs = storage.get_underlying_filesystem(path=self.path)
|
|
340
544
|
|
|
341
545
|
# If it's already a local file, just copy it
|
|
342
546
|
if "file" in fs.protocol:
|
|
547
|
+
# Apply directory logic for local-to-local copies
|
|
548
|
+
local_path_for_copy = local_path
|
|
549
|
+
if isinstance(local_path, str):
|
|
550
|
+
local_path_obj = Path(local_path)
|
|
551
|
+
# Check if it's a directory or ends with separator
|
|
552
|
+
if local_path.endswith(os.sep) or (local_path_obj.exists() and local_path_obj.is_dir()):
|
|
553
|
+
remote_filename = Path(self.path).name
|
|
554
|
+
local_path_for_copy = str(local_path_obj / remote_filename)
|
|
555
|
+
|
|
556
|
+
# Ensure parent directory exists
|
|
557
|
+
Path(local_path_for_copy).parent.mkdir(parents=True, exist_ok=True)
|
|
558
|
+
|
|
343
559
|
# Use aiofiles for async copy
|
|
344
560
|
async with aiofiles.open(self.path, "rb") as src:
|
|
345
|
-
async with aiofiles.open(
|
|
561
|
+
async with aiofiles.open(local_path_for_copy, "wb") as dst:
|
|
346
562
|
await dst.write(await src.read())
|
|
347
|
-
return str(
|
|
563
|
+
return str(local_path_for_copy)
|
|
348
564
|
|
|
349
565
|
# Otherwise download from remote using async functionality
|
|
350
|
-
await storage.get(self.path, str(local_path))
|
|
566
|
+
result_path = await storage.get(self.path, str(local_path))
|
|
567
|
+
return result_path
|
|
568
|
+
|
|
569
|
+
def download_sync(self, local_path: Optional[Union[str, Path]] = None) -> str:
|
|
570
|
+
"""
|
|
571
|
+
Synchronously download the file to a local path.
|
|
572
|
+
|
|
573
|
+
Use this in non-async tasks when you need to download a remote file to your local filesystem.
|
|
574
|
+
|
|
575
|
+
Example (Sync):
|
|
576
|
+
|
|
577
|
+
```python
|
|
578
|
+
@env.task
|
|
579
|
+
def download_and_process_sync(f: File) -> str:
|
|
580
|
+
local_path = f.download_sync()
|
|
581
|
+
# Now process the local file
|
|
582
|
+
with open(local_path, "r") as fh:
|
|
583
|
+
return fh.read()
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
Example (Download to specific path):
|
|
587
|
+
|
|
588
|
+
```python
|
|
589
|
+
@env.task
|
|
590
|
+
def download_to_path_sync(f: File) -> str:
|
|
591
|
+
local_path = f.download_sync("/tmp/myfile.csv")
|
|
592
|
+
return local_path
|
|
593
|
+
```
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
local_path: The local path to download the file to. If None, a temporary
|
|
597
|
+
directory will be used and a path will be generated.
|
|
598
|
+
|
|
599
|
+
Returns:
|
|
600
|
+
The absolute path to the downloaded file
|
|
601
|
+
"""
|
|
602
|
+
if local_path is None:
|
|
603
|
+
local_path = storage.get_random_local_path(file_path_or_file_name=self.path)
|
|
604
|
+
else:
|
|
605
|
+
# Preserve trailing separator if present (Path.absolute() strips it)
|
|
606
|
+
local_path_str = str(local_path)
|
|
607
|
+
has_trailing_sep = local_path_str.endswith(os.sep)
|
|
608
|
+
local_path = str(Path(local_path).absolute())
|
|
609
|
+
if has_trailing_sep:
|
|
610
|
+
local_path = local_path + os.sep
|
|
611
|
+
|
|
612
|
+
fs = storage.get_underlying_filesystem(path=self.path)
|
|
613
|
+
|
|
614
|
+
# If it's already a local file, just copy it
|
|
615
|
+
if "file" in fs.protocol:
|
|
616
|
+
# Apply directory logic for local-to-local copies
|
|
617
|
+
local_path_for_copy = local_path
|
|
618
|
+
if isinstance(local_path, str):
|
|
619
|
+
local_path_obj = Path(local_path)
|
|
620
|
+
# Check if it's a directory or ends with separator
|
|
621
|
+
if local_path.endswith(os.sep) or (local_path_obj.exists() and local_path_obj.is_dir()):
|
|
622
|
+
remote_filename = Path(self.path).name
|
|
623
|
+
local_path_for_copy = str(local_path_obj / remote_filename)
|
|
624
|
+
|
|
625
|
+
# Ensure parent directory exists
|
|
626
|
+
Path(local_path_for_copy).parent.mkdir(parents=True, exist_ok=True)
|
|
627
|
+
|
|
628
|
+
# Use standard file operations for sync copy
|
|
629
|
+
import shutil
|
|
630
|
+
|
|
631
|
+
shutil.copy2(self.path, local_path_for_copy)
|
|
632
|
+
return str(local_path_for_copy)
|
|
633
|
+
|
|
634
|
+
# Otherwise download from remote using sync functionality
|
|
635
|
+
# Use the sync version of storage operations
|
|
636
|
+
with fs.open(self.path, "rb") as src:
|
|
637
|
+
with open(local_path, "wb") as dst:
|
|
638
|
+
dst.write(src.read())
|
|
351
639
|
return str(local_path)
|
|
352
640
|
|
|
353
641
|
@classmethod
|
|
354
642
|
@requires_initialization
|
|
355
|
-
|
|
643
|
+
def from_local_sync(
|
|
644
|
+
cls,
|
|
645
|
+
local_path: Union[str, Path],
|
|
646
|
+
remote_destination: Optional[str] = None,
|
|
647
|
+
hash_method: Optional[HashMethod | str] = None,
|
|
648
|
+
) -> File[T]:
|
|
356
649
|
"""
|
|
357
|
-
|
|
650
|
+
Synchronously create a new File object from a local file by uploading it to remote storage.
|
|
651
|
+
|
|
652
|
+
Use this in non-async tasks when you have a local file that needs to be uploaded to remote storage.
|
|
653
|
+
|
|
654
|
+
Example (Sync):
|
|
655
|
+
|
|
656
|
+
```python
|
|
657
|
+
@env.task
|
|
658
|
+
def upload_local_file_sync() -> File:
|
|
659
|
+
# Create a local file
|
|
660
|
+
with open("/tmp/data.csv", "w") as f:
|
|
661
|
+
f.write("col1,col2\n1,2\n3,4\n")
|
|
662
|
+
|
|
663
|
+
# Upload to remote storage
|
|
664
|
+
remote_file = File.from_local_sync("/tmp/data.csv")
|
|
665
|
+
return remote_file
|
|
666
|
+
```
|
|
667
|
+
|
|
668
|
+
Example (With specific destination):
|
|
669
|
+
|
|
670
|
+
```python
|
|
671
|
+
@env.task
|
|
672
|
+
def upload_to_specific_path() -> File:
|
|
673
|
+
remote_file = File.from_local_sync("/tmp/data.csv", "s3://my-bucket/data.csv")
|
|
674
|
+
return remote_file
|
|
675
|
+
```
|
|
358
676
|
|
|
359
677
|
Args:
|
|
360
678
|
local_path: Path to the local file
|
|
361
|
-
remote_destination: Optional path to store the file
|
|
679
|
+
remote_destination: Optional remote path to store the file. If None, a path will be automatically generated.
|
|
680
|
+
hash_method: Optional HashMethod or string to use for cache key computation. If a string is provided,
|
|
681
|
+
it will be used as a precomputed cache key. If a HashMethod is provided, it will compute
|
|
682
|
+
the hash during upload. If not specified, the cache key will be based on file attributes.
|
|
362
683
|
|
|
363
684
|
Returns:
|
|
364
|
-
A new File instance pointing to the uploaded file
|
|
365
|
-
|
|
366
|
-
Example:
|
|
367
|
-
```python
|
|
368
|
-
remote_file = await File[DataFrame].from_local('/tmp/data.csv', 's3://bucket/data.csv')
|
|
369
|
-
```
|
|
685
|
+
A new File instance pointing to the uploaded remote file
|
|
370
686
|
"""
|
|
371
687
|
if not os.path.exists(local_path):
|
|
372
688
|
raise ValueError(f"File not found: {local_path}")
|
|
@@ -377,20 +693,148 @@ class File(BaseModel, Generic[T], SerializableType):
|
|
|
377
693
|
|
|
378
694
|
# If remote_destination was not set by the user, and the configured raw data path is also local,
|
|
379
695
|
# then let's optimize by not uploading.
|
|
696
|
+
hash_value = hash_method if isinstance(hash_method, str) else None
|
|
697
|
+
hash_method_obj = hash_method if isinstance(hash_method, HashMethod) else None
|
|
698
|
+
|
|
380
699
|
if "file" in protocol:
|
|
381
700
|
if remote_destination is None:
|
|
382
701
|
path = str(Path(local_path).absolute())
|
|
383
702
|
else:
|
|
384
703
|
# Otherwise, actually make a copy of the file
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
704
|
+
import shutil
|
|
705
|
+
|
|
706
|
+
if hash_method_obj:
|
|
707
|
+
# For hash computation, we need to read and write manually
|
|
708
|
+
with open(local_path, "rb") as src:
|
|
709
|
+
with open(remote_path, "wb") as dst:
|
|
710
|
+
dst_wrapper = HashingWriter(dst, accumulator=hash_method_obj)
|
|
711
|
+
dst_wrapper.write(src.read())
|
|
712
|
+
hash_value = dst_wrapper.result()
|
|
713
|
+
dst_wrapper.close()
|
|
714
|
+
else:
|
|
715
|
+
shutil.copy2(local_path, remote_path)
|
|
716
|
+
path = str(Path(remote_path).absolute())
|
|
717
|
+
else:
|
|
718
|
+
# Otherwise upload to remote using sync storage layer
|
|
719
|
+
fs = storage.get_underlying_filesystem(path=remote_path)
|
|
720
|
+
|
|
721
|
+
if hash_method_obj:
|
|
722
|
+
# We can skip the wrapper if the hash method is just a precomputed value
|
|
723
|
+
if not isinstance(hash_method_obj, PrecomputedValue):
|
|
724
|
+
with open(local_path, "rb") as src:
|
|
725
|
+
# For sync operations, we need to compute hash manually
|
|
726
|
+
data = src.read()
|
|
727
|
+
hash_method_obj.update(memoryview(data))
|
|
728
|
+
hash_value = hash_method_obj.result()
|
|
729
|
+
|
|
730
|
+
# Now write the data to remote
|
|
731
|
+
with fs.open(remote_path, "wb") as dst:
|
|
732
|
+
dst.write(data)
|
|
733
|
+
path = remote_path
|
|
734
|
+
else:
|
|
735
|
+
# Use sync file operations
|
|
736
|
+
with open(local_path, "rb") as src:
|
|
737
|
+
with fs.open(remote_path, "wb") as dst:
|
|
738
|
+
dst.write(src.read())
|
|
739
|
+
path = remote_path
|
|
740
|
+
hash_value = hash_method_obj.result()
|
|
741
|
+
else:
|
|
742
|
+
# Simple sync copy
|
|
743
|
+
with open(local_path, "rb") as src:
|
|
744
|
+
with fs.open(remote_path, "wb") as dst:
|
|
745
|
+
dst.write(src.read())
|
|
746
|
+
path = remote_path
|
|
747
|
+
|
|
748
|
+
f = cls(path=path, name=filename, hash_method=hash_method_obj, hash=hash_value)
|
|
749
|
+
return f
|
|
750
|
+
|
|
751
|
+
@classmethod
|
|
752
|
+
@requires_initialization
|
|
753
|
+
async def from_local(
|
|
754
|
+
cls,
|
|
755
|
+
local_path: Union[str, Path],
|
|
756
|
+
remote_destination: Optional[str] = None,
|
|
757
|
+
hash_method: Optional[HashMethod | str] = None,
|
|
758
|
+
) -> File[T]:
|
|
759
|
+
"""
|
|
760
|
+
Asynchronously create a new File object from a local file by uploading it to remote storage.
|
|
761
|
+
|
|
762
|
+
Use this in async tasks when you have a local file that needs to be uploaded to remote storage.
|
|
763
|
+
|
|
764
|
+
Example (Async):
|
|
765
|
+
|
|
766
|
+
```python
|
|
767
|
+
@env.task
|
|
768
|
+
async def upload_local_file() -> File:
|
|
769
|
+
# Create a local file
|
|
770
|
+
async with aiofiles.open("/tmp/data.csv", "w") as f:
|
|
771
|
+
await f.write("col1,col2\n1,2\n3,4\n")
|
|
772
|
+
|
|
773
|
+
# Upload to remote storage
|
|
774
|
+
remote_file = await File.from_local("/tmp/data.csv")
|
|
775
|
+
return remote_file
|
|
776
|
+
```
|
|
777
|
+
|
|
778
|
+
Example (With specific destination):
|
|
779
|
+
|
|
780
|
+
```python
|
|
781
|
+
@env.task
|
|
782
|
+
async def upload_to_specific_path() -> File:
|
|
783
|
+
remote_file = await File.from_local("/tmp/data.csv", "s3://my-bucket/data.csv")
|
|
784
|
+
return remote_file
|
|
785
|
+
```
|
|
786
|
+
|
|
787
|
+
Args:
|
|
788
|
+
local_path: Path to the local file
|
|
789
|
+
remote_destination: Optional remote path to store the file. If None, a path will be automatically generated.
|
|
790
|
+
hash_method: Optional HashMethod or string to use for cache key computation. If a string is provided,
|
|
791
|
+
it will be used as a precomputed cache key. If a HashMethod is provided, it will compute
|
|
792
|
+
the hash during upload. If not specified, the cache key will be based on file attributes.
|
|
793
|
+
|
|
794
|
+
Returns:
|
|
795
|
+
A new File instance pointing to the uploaded remote file
|
|
796
|
+
"""
|
|
797
|
+
if not os.path.exists(local_path):
|
|
798
|
+
raise ValueError(f"File not found: {local_path}")
|
|
799
|
+
|
|
800
|
+
filename = Path(local_path).name
|
|
801
|
+
remote_path = remote_destination or internal_ctx().raw_data.get_random_remote_path(filename)
|
|
802
|
+
protocol = get_protocol(remote_path)
|
|
803
|
+
|
|
804
|
+
# If remote_destination was not set by the user, and the configured raw data path is also local,
|
|
805
|
+
# then let's optimize by not uploading.
|
|
806
|
+
hash_value = hash_method if isinstance(hash_method, str) else None
|
|
807
|
+
hash_method = hash_method if isinstance(hash_method, HashMethod) else None
|
|
808
|
+
if "file" in protocol:
|
|
809
|
+
if remote_destination is None:
|
|
810
|
+
path = str(Path(local_path).absolute())
|
|
811
|
+
else:
|
|
812
|
+
# Otherwise, actually make a copy of the file
|
|
813
|
+
async with aiofiles.open(local_path, "rb") as src:
|
|
814
|
+
async with aiofiles.open(remote_path, "wb") as dst:
|
|
815
|
+
if hash_method:
|
|
816
|
+
dst_wrapper = HashingWriter(dst, accumulator=hash_method)
|
|
817
|
+
await dst_wrapper.write(await src.read())
|
|
818
|
+
hash_value = dst_wrapper.result()
|
|
819
|
+
else:
|
|
820
|
+
await dst.write(await src.read())
|
|
388
821
|
path = str(Path(remote_path).absolute())
|
|
389
822
|
else:
|
|
390
823
|
# Otherwise upload to remote using async storage layer
|
|
391
|
-
|
|
824
|
+
if hash_method:
|
|
825
|
+
# We can skip the wrapper if the hash method is just a precomputed value
|
|
826
|
+
if not isinstance(hash_method, PrecomputedValue):
|
|
827
|
+
async with aiofiles.open(local_path, "rb") as src:
|
|
828
|
+
src_wrapper = AsyncHashingReader(src, accumulator=hash_method)
|
|
829
|
+
path = await storage.put_stream(src_wrapper, to_path=remote_path)
|
|
830
|
+
hash_value = src_wrapper.result()
|
|
831
|
+
else:
|
|
832
|
+
path = await storage.put(str(local_path), remote_path)
|
|
833
|
+
hash_value = hash_method.result()
|
|
834
|
+
else:
|
|
835
|
+
path = await storage.put(str(local_path), remote_path)
|
|
392
836
|
|
|
393
|
-
f = cls(path=path, name=filename)
|
|
837
|
+
f = cls(path=path, name=filename, hash_method=hash_method, hash=hash_value)
|
|
394
838
|
return f
|
|
395
839
|
|
|
396
840
|
|
|
@@ -433,7 +877,8 @@ class FileTransformer(TypeTransformer[File]):
|
|
|
433
877
|
),
|
|
434
878
|
uri=python_val.path,
|
|
435
879
|
)
|
|
436
|
-
)
|
|
880
|
+
),
|
|
881
|
+
hash=python_val.hash if python_val.hash else None,
|
|
437
882
|
)
|
|
438
883
|
|
|
439
884
|
async def to_python_value(
|
|
@@ -451,7 +896,8 @@ class FileTransformer(TypeTransformer[File]):
|
|
|
451
896
|
|
|
452
897
|
uri = lv.scalar.blob.uri
|
|
453
898
|
filename = Path(uri).name
|
|
454
|
-
|
|
899
|
+
hash_value = lv.hash if lv.hash else None
|
|
900
|
+
f: File = File(path=uri, name=filename, format=lv.scalar.blob.metadata.type.format, hash=hash_value)
|
|
455
901
|
return f
|
|
456
902
|
|
|
457
903
|
def guess_python_type(self, literal_type: types_pb2.LiteralType) -> Type[File]:
|