genesis-flow 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genesis_flow-1.0.0.dist-info/METADATA +822 -0
- genesis_flow-1.0.0.dist-info/RECORD +645 -0
- genesis_flow-1.0.0.dist-info/WHEEL +5 -0
- genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
- genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
- genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
- mlflow/__init__.py +367 -0
- mlflow/__main__.py +3 -0
- mlflow/ag2/__init__.py +56 -0
- mlflow/ag2/ag2_logger.py +294 -0
- mlflow/anthropic/__init__.py +40 -0
- mlflow/anthropic/autolog.py +129 -0
- mlflow/anthropic/chat.py +144 -0
- mlflow/artifacts/__init__.py +268 -0
- mlflow/autogen/__init__.py +144 -0
- mlflow/autogen/chat.py +142 -0
- mlflow/azure/__init__.py +26 -0
- mlflow/azure/auth_handler.py +257 -0
- mlflow/azure/client.py +319 -0
- mlflow/azure/config.py +120 -0
- mlflow/azure/connection_factory.py +340 -0
- mlflow/azure/exceptions.py +27 -0
- mlflow/azure/stores.py +327 -0
- mlflow/azure/utils.py +183 -0
- mlflow/bedrock/__init__.py +45 -0
- mlflow/bedrock/_autolog.py +202 -0
- mlflow/bedrock/chat.py +122 -0
- mlflow/bedrock/stream.py +160 -0
- mlflow/bedrock/utils.py +43 -0
- mlflow/cli.py +707 -0
- mlflow/client.py +12 -0
- mlflow/config/__init__.py +56 -0
- mlflow/crewai/__init__.py +79 -0
- mlflow/crewai/autolog.py +253 -0
- mlflow/crewai/chat.py +29 -0
- mlflow/data/__init__.py +75 -0
- mlflow/data/artifact_dataset_sources.py +170 -0
- mlflow/data/code_dataset_source.py +40 -0
- mlflow/data/dataset.py +123 -0
- mlflow/data/dataset_registry.py +168 -0
- mlflow/data/dataset_source.py +110 -0
- mlflow/data/dataset_source_registry.py +219 -0
- mlflow/data/delta_dataset_source.py +167 -0
- mlflow/data/digest_utils.py +108 -0
- mlflow/data/evaluation_dataset.py +562 -0
- mlflow/data/filesystem_dataset_source.py +81 -0
- mlflow/data/http_dataset_source.py +145 -0
- mlflow/data/huggingface_dataset.py +258 -0
- mlflow/data/huggingface_dataset_source.py +118 -0
- mlflow/data/meta_dataset.py +104 -0
- mlflow/data/numpy_dataset.py +223 -0
- mlflow/data/pandas_dataset.py +231 -0
- mlflow/data/polars_dataset.py +352 -0
- mlflow/data/pyfunc_dataset_mixin.py +31 -0
- mlflow/data/schema.py +76 -0
- mlflow/data/sources.py +1 -0
- mlflow/data/spark_dataset.py +406 -0
- mlflow/data/spark_dataset_source.py +74 -0
- mlflow/data/spark_delta_utils.py +118 -0
- mlflow/data/tensorflow_dataset.py +350 -0
- mlflow/data/uc_volume_dataset_source.py +81 -0
- mlflow/db.py +27 -0
- mlflow/dspy/__init__.py +17 -0
- mlflow/dspy/autolog.py +197 -0
- mlflow/dspy/callback.py +398 -0
- mlflow/dspy/constant.py +1 -0
- mlflow/dspy/load.py +93 -0
- mlflow/dspy/save.py +393 -0
- mlflow/dspy/util.py +109 -0
- mlflow/dspy/wrapper.py +226 -0
- mlflow/entities/__init__.py +104 -0
- mlflow/entities/_mlflow_object.py +52 -0
- mlflow/entities/assessment.py +545 -0
- mlflow/entities/assessment_error.py +80 -0
- mlflow/entities/assessment_source.py +141 -0
- mlflow/entities/dataset.py +92 -0
- mlflow/entities/dataset_input.py +51 -0
- mlflow/entities/dataset_summary.py +62 -0
- mlflow/entities/document.py +48 -0
- mlflow/entities/experiment.py +109 -0
- mlflow/entities/experiment_tag.py +35 -0
- mlflow/entities/file_info.py +45 -0
- mlflow/entities/input_tag.py +35 -0
- mlflow/entities/lifecycle_stage.py +35 -0
- mlflow/entities/logged_model.py +228 -0
- mlflow/entities/logged_model_input.py +26 -0
- mlflow/entities/logged_model_output.py +32 -0
- mlflow/entities/logged_model_parameter.py +46 -0
- mlflow/entities/logged_model_status.py +74 -0
- mlflow/entities/logged_model_tag.py +33 -0
- mlflow/entities/metric.py +200 -0
- mlflow/entities/model_registry/__init__.py +29 -0
- mlflow/entities/model_registry/_model_registry_entity.py +13 -0
- mlflow/entities/model_registry/model_version.py +243 -0
- mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
- mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
- mlflow/entities/model_registry/model_version_search.py +25 -0
- mlflow/entities/model_registry/model_version_stages.py +25 -0
- mlflow/entities/model_registry/model_version_status.py +35 -0
- mlflow/entities/model_registry/model_version_tag.py +35 -0
- mlflow/entities/model_registry/prompt.py +73 -0
- mlflow/entities/model_registry/prompt_version.py +244 -0
- mlflow/entities/model_registry/registered_model.py +175 -0
- mlflow/entities/model_registry/registered_model_alias.py +35 -0
- mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
- mlflow/entities/model_registry/registered_model_search.py +25 -0
- mlflow/entities/model_registry/registered_model_tag.py +35 -0
- mlflow/entities/multipart_upload.py +74 -0
- mlflow/entities/param.py +49 -0
- mlflow/entities/run.py +97 -0
- mlflow/entities/run_data.py +84 -0
- mlflow/entities/run_info.py +188 -0
- mlflow/entities/run_inputs.py +59 -0
- mlflow/entities/run_outputs.py +43 -0
- mlflow/entities/run_status.py +41 -0
- mlflow/entities/run_tag.py +36 -0
- mlflow/entities/source_type.py +31 -0
- mlflow/entities/span.py +774 -0
- mlflow/entities/span_event.py +96 -0
- mlflow/entities/span_status.py +102 -0
- mlflow/entities/trace.py +317 -0
- mlflow/entities/trace_data.py +71 -0
- mlflow/entities/trace_info.py +220 -0
- mlflow/entities/trace_info_v2.py +162 -0
- mlflow/entities/trace_location.py +173 -0
- mlflow/entities/trace_state.py +39 -0
- mlflow/entities/trace_status.py +68 -0
- mlflow/entities/view_type.py +51 -0
- mlflow/environment_variables.py +866 -0
- mlflow/evaluation/__init__.py +16 -0
- mlflow/evaluation/assessment.py +369 -0
- mlflow/evaluation/evaluation.py +411 -0
- mlflow/evaluation/evaluation_tag.py +61 -0
- mlflow/evaluation/fluent.py +48 -0
- mlflow/evaluation/utils.py +201 -0
- mlflow/exceptions.py +213 -0
- mlflow/experiments.py +140 -0
- mlflow/gemini/__init__.py +81 -0
- mlflow/gemini/autolog.py +186 -0
- mlflow/gemini/chat.py +261 -0
- mlflow/genai/__init__.py +71 -0
- mlflow/genai/datasets/__init__.py +67 -0
- mlflow/genai/datasets/evaluation_dataset.py +131 -0
- mlflow/genai/evaluation/__init__.py +3 -0
- mlflow/genai/evaluation/base.py +411 -0
- mlflow/genai/evaluation/constant.py +23 -0
- mlflow/genai/evaluation/utils.py +244 -0
- mlflow/genai/judges/__init__.py +21 -0
- mlflow/genai/judges/databricks.py +404 -0
- mlflow/genai/label_schemas/__init__.py +153 -0
- mlflow/genai/label_schemas/label_schemas.py +209 -0
- mlflow/genai/labeling/__init__.py +159 -0
- mlflow/genai/labeling/labeling.py +250 -0
- mlflow/genai/optimize/__init__.py +13 -0
- mlflow/genai/optimize/base.py +198 -0
- mlflow/genai/optimize/optimizers/__init__.py +4 -0
- mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
- mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
- mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
- mlflow/genai/optimize/types.py +75 -0
- mlflow/genai/optimize/util.py +30 -0
- mlflow/genai/prompts/__init__.py +206 -0
- mlflow/genai/scheduled_scorers.py +431 -0
- mlflow/genai/scorers/__init__.py +26 -0
- mlflow/genai/scorers/base.py +492 -0
- mlflow/genai/scorers/builtin_scorers.py +765 -0
- mlflow/genai/scorers/scorer_utils.py +138 -0
- mlflow/genai/scorers/validation.py +165 -0
- mlflow/genai/utils/data_validation.py +146 -0
- mlflow/genai/utils/enum_utils.py +23 -0
- mlflow/genai/utils/trace_utils.py +211 -0
- mlflow/groq/__init__.py +42 -0
- mlflow/groq/_groq_autolog.py +74 -0
- mlflow/johnsnowlabs/__init__.py +888 -0
- mlflow/langchain/__init__.py +24 -0
- mlflow/langchain/api_request_parallel_processor.py +330 -0
- mlflow/langchain/autolog.py +147 -0
- mlflow/langchain/chat_agent_langgraph.py +340 -0
- mlflow/langchain/constant.py +1 -0
- mlflow/langchain/constants.py +1 -0
- mlflow/langchain/databricks_dependencies.py +444 -0
- mlflow/langchain/langchain_tracer.py +597 -0
- mlflow/langchain/model.py +919 -0
- mlflow/langchain/output_parsers.py +142 -0
- mlflow/langchain/retriever_chain.py +153 -0
- mlflow/langchain/runnables.py +527 -0
- mlflow/langchain/utils/chat.py +402 -0
- mlflow/langchain/utils/logging.py +671 -0
- mlflow/langchain/utils/serialization.py +36 -0
- mlflow/legacy_databricks_cli/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/provider.py +482 -0
- mlflow/litellm/__init__.py +175 -0
- mlflow/llama_index/__init__.py +22 -0
- mlflow/llama_index/autolog.py +55 -0
- mlflow/llama_index/chat.py +43 -0
- mlflow/llama_index/constant.py +1 -0
- mlflow/llama_index/model.py +577 -0
- mlflow/llama_index/pyfunc_wrapper.py +332 -0
- mlflow/llama_index/serialize_objects.py +188 -0
- mlflow/llama_index/tracer.py +561 -0
- mlflow/metrics/__init__.py +479 -0
- mlflow/metrics/base.py +39 -0
- mlflow/metrics/genai/__init__.py +25 -0
- mlflow/metrics/genai/base.py +101 -0
- mlflow/metrics/genai/genai_metric.py +771 -0
- mlflow/metrics/genai/metric_definitions.py +450 -0
- mlflow/metrics/genai/model_utils.py +371 -0
- mlflow/metrics/genai/prompt_template.py +68 -0
- mlflow/metrics/genai/prompts/__init__.py +0 -0
- mlflow/metrics/genai/prompts/v1.py +422 -0
- mlflow/metrics/genai/utils.py +6 -0
- mlflow/metrics/metric_definitions.py +619 -0
- mlflow/mismatch.py +34 -0
- mlflow/mistral/__init__.py +34 -0
- mlflow/mistral/autolog.py +71 -0
- mlflow/mistral/chat.py +135 -0
- mlflow/ml_package_versions.py +452 -0
- mlflow/models/__init__.py +97 -0
- mlflow/models/auth_policy.py +83 -0
- mlflow/models/cli.py +354 -0
- mlflow/models/container/__init__.py +294 -0
- mlflow/models/container/scoring_server/__init__.py +0 -0
- mlflow/models/container/scoring_server/nginx.conf +39 -0
- mlflow/models/dependencies_schemas.py +287 -0
- mlflow/models/display_utils.py +158 -0
- mlflow/models/docker_utils.py +211 -0
- mlflow/models/evaluation/__init__.py +23 -0
- mlflow/models/evaluation/_shap_patch.py +64 -0
- mlflow/models/evaluation/artifacts.py +194 -0
- mlflow/models/evaluation/base.py +1811 -0
- mlflow/models/evaluation/calibration_curve.py +109 -0
- mlflow/models/evaluation/default_evaluator.py +996 -0
- mlflow/models/evaluation/deprecated.py +23 -0
- mlflow/models/evaluation/evaluator_registry.py +80 -0
- mlflow/models/evaluation/evaluators/classifier.py +704 -0
- mlflow/models/evaluation/evaluators/default.py +233 -0
- mlflow/models/evaluation/evaluators/regressor.py +96 -0
- mlflow/models/evaluation/evaluators/shap.py +296 -0
- mlflow/models/evaluation/lift_curve.py +178 -0
- mlflow/models/evaluation/utils/metric.py +123 -0
- mlflow/models/evaluation/utils/trace.py +179 -0
- mlflow/models/evaluation/validation.py +434 -0
- mlflow/models/flavor_backend.py +93 -0
- mlflow/models/flavor_backend_registry.py +53 -0
- mlflow/models/model.py +1639 -0
- mlflow/models/model_config.py +150 -0
- mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
- mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
- mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
- mlflow/models/python_api.py +369 -0
- mlflow/models/rag_signatures.py +128 -0
- mlflow/models/resources.py +321 -0
- mlflow/models/signature.py +662 -0
- mlflow/models/utils.py +2054 -0
- mlflow/models/wheeled_model.py +280 -0
- mlflow/openai/__init__.py +57 -0
- mlflow/openai/_agent_tracer.py +364 -0
- mlflow/openai/api_request_parallel_processor.py +131 -0
- mlflow/openai/autolog.py +509 -0
- mlflow/openai/constant.py +1 -0
- mlflow/openai/model.py +824 -0
- mlflow/openai/utils/chat_schema.py +367 -0
- mlflow/optuna/__init__.py +3 -0
- mlflow/optuna/storage.py +646 -0
- mlflow/plugins/__init__.py +72 -0
- mlflow/plugins/base.py +358 -0
- mlflow/plugins/builtin/__init__.py +24 -0
- mlflow/plugins/builtin/pytorch_plugin.py +150 -0
- mlflow/plugins/builtin/sklearn_plugin.py +158 -0
- mlflow/plugins/builtin/transformers_plugin.py +187 -0
- mlflow/plugins/cli.py +321 -0
- mlflow/plugins/discovery.py +340 -0
- mlflow/plugins/manager.py +465 -0
- mlflow/plugins/registry.py +316 -0
- mlflow/plugins/templates/framework_plugin_template.py +329 -0
- mlflow/prompt/constants.py +20 -0
- mlflow/prompt/promptlab_model.py +197 -0
- mlflow/prompt/registry_utils.py +248 -0
- mlflow/promptflow/__init__.py +495 -0
- mlflow/protos/__init__.py +0 -0
- mlflow/protos/assessments_pb2.py +174 -0
- mlflow/protos/databricks_artifacts_pb2.py +489 -0
- mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
- mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
- mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
- mlflow/protos/databricks_pb2.py +267 -0
- mlflow/protos/databricks_trace_server_pb2.py +374 -0
- mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
- mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
- mlflow/protos/facet_feature_statistics_pb2.py +296 -0
- mlflow/protos/internal_pb2.py +77 -0
- mlflow/protos/mlflow_artifacts_pb2.py +336 -0
- mlflow/protos/model_registry_pb2.py +1073 -0
- mlflow/protos/scalapb/__init__.py +0 -0
- mlflow/protos/scalapb/scalapb_pb2.py +104 -0
- mlflow/protos/service_pb2.py +2600 -0
- mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
- mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
- mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
- mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
- mlflow/py.typed +0 -0
- mlflow/pydantic_ai/__init__.py +57 -0
- mlflow/pydantic_ai/autolog.py +173 -0
- mlflow/pyfunc/__init__.py +3844 -0
- mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
- mlflow/pyfunc/backend.py +523 -0
- mlflow/pyfunc/context.py +78 -0
- mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
- mlflow/pyfunc/loaders/__init__.py +7 -0
- mlflow/pyfunc/loaders/chat_agent.py +117 -0
- mlflow/pyfunc/loaders/chat_model.py +125 -0
- mlflow/pyfunc/loaders/code_model.py +31 -0
- mlflow/pyfunc/loaders/responses_agent.py +112 -0
- mlflow/pyfunc/mlserver.py +46 -0
- mlflow/pyfunc/model.py +1473 -0
- mlflow/pyfunc/scoring_server/__init__.py +604 -0
- mlflow/pyfunc/scoring_server/app.py +7 -0
- mlflow/pyfunc/scoring_server/client.py +146 -0
- mlflow/pyfunc/spark_model_cache.py +48 -0
- mlflow/pyfunc/stdin_server.py +44 -0
- mlflow/pyfunc/utils/__init__.py +3 -0
- mlflow/pyfunc/utils/data_validation.py +224 -0
- mlflow/pyfunc/utils/environment.py +22 -0
- mlflow/pyfunc/utils/input_converter.py +47 -0
- mlflow/pyfunc/utils/serving_data_parser.py +11 -0
- mlflow/pytorch/__init__.py +1171 -0
- mlflow/pytorch/_lightning_autolog.py +580 -0
- mlflow/pytorch/_pytorch_autolog.py +50 -0
- mlflow/pytorch/pickle_module.py +35 -0
- mlflow/rfunc/__init__.py +42 -0
- mlflow/rfunc/backend.py +134 -0
- mlflow/runs.py +89 -0
- mlflow/server/__init__.py +302 -0
- mlflow/server/auth/__init__.py +1224 -0
- mlflow/server/auth/__main__.py +4 -0
- mlflow/server/auth/basic_auth.ini +6 -0
- mlflow/server/auth/cli.py +11 -0
- mlflow/server/auth/client.py +537 -0
- mlflow/server/auth/config.py +34 -0
- mlflow/server/auth/db/__init__.py +0 -0
- mlflow/server/auth/db/cli.py +18 -0
- mlflow/server/auth/db/migrations/__init__.py +0 -0
- mlflow/server/auth/db/migrations/alembic.ini +110 -0
- mlflow/server/auth/db/migrations/env.py +76 -0
- mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
- mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
- mlflow/server/auth/db/models.py +67 -0
- mlflow/server/auth/db/utils.py +37 -0
- mlflow/server/auth/entities.py +165 -0
- mlflow/server/auth/logo.py +14 -0
- mlflow/server/auth/permissions.py +65 -0
- mlflow/server/auth/routes.py +18 -0
- mlflow/server/auth/sqlalchemy_store.py +263 -0
- mlflow/server/graphql/__init__.py +0 -0
- mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
- mlflow/server/graphql/graphql_custom_scalars.py +24 -0
- mlflow/server/graphql/graphql_errors.py +15 -0
- mlflow/server/graphql/graphql_no_batching.py +89 -0
- mlflow/server/graphql/graphql_schema_extensions.py +74 -0
- mlflow/server/handlers.py +3217 -0
- mlflow/server/prometheus_exporter.py +17 -0
- mlflow/server/validation.py +30 -0
- mlflow/shap/__init__.py +691 -0
- mlflow/sklearn/__init__.py +1994 -0
- mlflow/sklearn/utils.py +1041 -0
- mlflow/smolagents/__init__.py +66 -0
- mlflow/smolagents/autolog.py +139 -0
- mlflow/smolagents/chat.py +29 -0
- mlflow/store/__init__.py +10 -0
- mlflow/store/_unity_catalog/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/constants.py +2 -0
- mlflow/store/_unity_catalog/registry/__init__.py +6 -0
- mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
- mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
- mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
- mlflow/store/_unity_catalog/registry/utils.py +121 -0
- mlflow/store/artifact/__init__.py +0 -0
- mlflow/store/artifact/artifact_repo.py +472 -0
- mlflow/store/artifact/artifact_repository_registry.py +154 -0
- mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
- mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
- mlflow/store/artifact/cli.py +141 -0
- mlflow/store/artifact/cloud_artifact_repo.py +332 -0
- mlflow/store/artifact/databricks_artifact_repo.py +729 -0
- mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
- mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
- mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
- mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
- mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
- mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
- mlflow/store/artifact/ftp_artifact_repo.py +132 -0
- mlflow/store/artifact/gcs_artifact_repo.py +296 -0
- mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
- mlflow/store/artifact/http_artifact_repo.py +218 -0
- mlflow/store/artifact/local_artifact_repo.py +142 -0
- mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
- mlflow/store/artifact/models_artifact_repo.py +259 -0
- mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
- mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
- mlflow/store/artifact/r2_artifact_repo.py +70 -0
- mlflow/store/artifact/runs_artifact_repo.py +265 -0
- mlflow/store/artifact/s3_artifact_repo.py +330 -0
- mlflow/store/artifact/sftp_artifact_repo.py +141 -0
- mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
- mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
- mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
- mlflow/store/artifact/utils/__init__.py +0 -0
- mlflow/store/artifact/utils/models.py +148 -0
- mlflow/store/db/__init__.py +0 -0
- mlflow/store/db/base_sql_model.py +3 -0
- mlflow/store/db/db_types.py +10 -0
- mlflow/store/db/utils.py +314 -0
- mlflow/store/db_migrations/__init__.py +0 -0
- mlflow/store/db_migrations/alembic.ini +74 -0
- mlflow/store/db_migrations/env.py +84 -0
- mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
- mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
- mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
- mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
- mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
- mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
- mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
- mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
- mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
- mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
- mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
- mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
- mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
- mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
- mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
- mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
- mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
- mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
- mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
- mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
- mlflow/store/db_migrations/versions/__init__.py +0 -0
- mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
- mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
- mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
- mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
- mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
- mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
- mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
- mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
- mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
- mlflow/store/entities/__init__.py +3 -0
- mlflow/store/entities/paged_list.py +18 -0
- mlflow/store/model_registry/__init__.py +10 -0
- mlflow/store/model_registry/abstract_store.py +1081 -0
- mlflow/store/model_registry/base_rest_store.py +44 -0
- mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
- mlflow/store/model_registry/dbmodels/__init__.py +0 -0
- mlflow/store/model_registry/dbmodels/models.py +206 -0
- mlflow/store/model_registry/file_store.py +1091 -0
- mlflow/store/model_registry/rest_store.py +481 -0
- mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
- mlflow/store/tracking/__init__.py +23 -0
- mlflow/store/tracking/abstract_store.py +816 -0
- mlflow/store/tracking/dbmodels/__init__.py +0 -0
- mlflow/store/tracking/dbmodels/initial_models.py +243 -0
- mlflow/store/tracking/dbmodels/models.py +1073 -0
- mlflow/store/tracking/file_store.py +2438 -0
- mlflow/store/tracking/postgres_managed_identity.py +146 -0
- mlflow/store/tracking/rest_store.py +1131 -0
- mlflow/store/tracking/sqlalchemy_store.py +2785 -0
- mlflow/system_metrics/__init__.py +61 -0
- mlflow/system_metrics/metrics/__init__.py +0 -0
- mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
- mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
- mlflow/system_metrics/metrics/disk_monitor.py +21 -0
- mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
- mlflow/system_metrics/metrics/network_monitor.py +34 -0
- mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
- mlflow/system_metrics/system_metrics_monitor.py +198 -0
- mlflow/tracing/__init__.py +16 -0
- mlflow/tracing/assessment.py +356 -0
- mlflow/tracing/client.py +531 -0
- mlflow/tracing/config.py +125 -0
- mlflow/tracing/constant.py +105 -0
- mlflow/tracing/destination.py +81 -0
- mlflow/tracing/display/__init__.py +40 -0
- mlflow/tracing/display/display_handler.py +196 -0
- mlflow/tracing/export/async_export_queue.py +186 -0
- mlflow/tracing/export/inference_table.py +138 -0
- mlflow/tracing/export/mlflow_v3.py +137 -0
- mlflow/tracing/export/utils.py +70 -0
- mlflow/tracing/fluent.py +1417 -0
- mlflow/tracing/processor/base_mlflow.py +199 -0
- mlflow/tracing/processor/inference_table.py +175 -0
- mlflow/tracing/processor/mlflow_v3.py +47 -0
- mlflow/tracing/processor/otel.py +73 -0
- mlflow/tracing/provider.py +487 -0
- mlflow/tracing/trace_manager.py +200 -0
- mlflow/tracing/utils/__init__.py +616 -0
- mlflow/tracing/utils/artifact_utils.py +28 -0
- mlflow/tracing/utils/copy.py +55 -0
- mlflow/tracing/utils/environment.py +55 -0
- mlflow/tracing/utils/exception.py +21 -0
- mlflow/tracing/utils/once.py +35 -0
- mlflow/tracing/utils/otlp.py +63 -0
- mlflow/tracing/utils/processor.py +54 -0
- mlflow/tracing/utils/search.py +292 -0
- mlflow/tracing/utils/timeout.py +250 -0
- mlflow/tracing/utils/token.py +19 -0
- mlflow/tracing/utils/truncation.py +124 -0
- mlflow/tracing/utils/warning.py +76 -0
- mlflow/tracking/__init__.py +39 -0
- mlflow/tracking/_model_registry/__init__.py +1 -0
- mlflow/tracking/_model_registry/client.py +764 -0
- mlflow/tracking/_model_registry/fluent.py +853 -0
- mlflow/tracking/_model_registry/registry.py +67 -0
- mlflow/tracking/_model_registry/utils.py +251 -0
- mlflow/tracking/_tracking_service/__init__.py +0 -0
- mlflow/tracking/_tracking_service/client.py +883 -0
- mlflow/tracking/_tracking_service/registry.py +56 -0
- mlflow/tracking/_tracking_service/utils.py +275 -0
- mlflow/tracking/artifact_utils.py +179 -0
- mlflow/tracking/client.py +5900 -0
- mlflow/tracking/context/__init__.py +0 -0
- mlflow/tracking/context/abstract_context.py +35 -0
- mlflow/tracking/context/databricks_cluster_context.py +15 -0
- mlflow/tracking/context/databricks_command_context.py +15 -0
- mlflow/tracking/context/databricks_job_context.py +49 -0
- mlflow/tracking/context/databricks_notebook_context.py +41 -0
- mlflow/tracking/context/databricks_repo_context.py +43 -0
- mlflow/tracking/context/default_context.py +51 -0
- mlflow/tracking/context/git_context.py +32 -0
- mlflow/tracking/context/registry.py +98 -0
- mlflow/tracking/context/system_environment_context.py +15 -0
- mlflow/tracking/default_experiment/__init__.py +1 -0
- mlflow/tracking/default_experiment/abstract_context.py +43 -0
- mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
- mlflow/tracking/default_experiment/registry.py +75 -0
- mlflow/tracking/fluent.py +3595 -0
- mlflow/tracking/metric_value_conversion_utils.py +93 -0
- mlflow/tracking/multimedia.py +206 -0
- mlflow/tracking/registry.py +86 -0
- mlflow/tracking/request_auth/__init__.py +0 -0
- mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
- mlflow/tracking/request_auth/registry.py +60 -0
- mlflow/tracking/request_header/__init__.py +0 -0
- mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
- mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
- mlflow/tracking/request_header/default_request_header_provider.py +17 -0
- mlflow/tracking/request_header/registry.py +79 -0
- mlflow/transformers/__init__.py +2982 -0
- mlflow/transformers/flavor_config.py +258 -0
- mlflow/transformers/hub_utils.py +83 -0
- mlflow/transformers/llm_inference_utils.py +468 -0
- mlflow/transformers/model_io.py +301 -0
- mlflow/transformers/peft.py +51 -0
- mlflow/transformers/signature.py +183 -0
- mlflow/transformers/torch_utils.py +55 -0
- mlflow/types/__init__.py +21 -0
- mlflow/types/agent.py +270 -0
- mlflow/types/chat.py +240 -0
- mlflow/types/llm.py +935 -0
- mlflow/types/responses.py +139 -0
- mlflow/types/responses_helpers.py +416 -0
- mlflow/types/schema.py +1505 -0
- mlflow/types/type_hints.py +647 -0
- mlflow/types/utils.py +753 -0
- mlflow/utils/__init__.py +283 -0
- mlflow/utils/_capture_modules.py +256 -0
- mlflow/utils/_capture_transformers_modules.py +75 -0
- mlflow/utils/_spark_utils.py +201 -0
- mlflow/utils/_unity_catalog_oss_utils.py +97 -0
- mlflow/utils/_unity_catalog_utils.py +479 -0
- mlflow/utils/annotations.py +218 -0
- mlflow/utils/arguments_utils.py +16 -0
- mlflow/utils/async_logging/__init__.py +1 -0
- mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
- mlflow/utils/async_logging/async_logging_queue.py +366 -0
- mlflow/utils/async_logging/run_artifact.py +38 -0
- mlflow/utils/async_logging/run_batch.py +58 -0
- mlflow/utils/async_logging/run_operations.py +49 -0
- mlflow/utils/autologging_utils/__init__.py +737 -0
- mlflow/utils/autologging_utils/client.py +432 -0
- mlflow/utils/autologging_utils/config.py +33 -0
- mlflow/utils/autologging_utils/events.py +294 -0
- mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
- mlflow/utils/autologging_utils/metrics_queue.py +71 -0
- mlflow/utils/autologging_utils/safety.py +1104 -0
- mlflow/utils/autologging_utils/versioning.py +95 -0
- mlflow/utils/checkpoint_utils.py +206 -0
- mlflow/utils/class_utils.py +6 -0
- mlflow/utils/cli_args.py +257 -0
- mlflow/utils/conda.py +354 -0
- mlflow/utils/credentials.py +231 -0
- mlflow/utils/data_utils.py +17 -0
- mlflow/utils/databricks_utils.py +1436 -0
- mlflow/utils/docstring_utils.py +477 -0
- mlflow/utils/doctor.py +133 -0
- mlflow/utils/download_cloud_file_chunk.py +43 -0
- mlflow/utils/env_manager.py +16 -0
- mlflow/utils/env_pack.py +131 -0
- mlflow/utils/environment.py +1009 -0
- mlflow/utils/exception_utils.py +14 -0
- mlflow/utils/file_utils.py +978 -0
- mlflow/utils/git_utils.py +77 -0
- mlflow/utils/gorilla.py +797 -0
- mlflow/utils/import_hooks/__init__.py +363 -0
- mlflow/utils/lazy_load.py +51 -0
- mlflow/utils/logging_utils.py +168 -0
- mlflow/utils/mime_type_utils.py +58 -0
- mlflow/utils/mlflow_tags.py +103 -0
- mlflow/utils/model_utils.py +486 -0
- mlflow/utils/name_utils.py +346 -0
- mlflow/utils/nfs_on_spark.py +62 -0
- mlflow/utils/openai_utils.py +164 -0
- mlflow/utils/os.py +12 -0
- mlflow/utils/oss_registry_utils.py +29 -0
- mlflow/utils/plugins.py +17 -0
- mlflow/utils/process.py +182 -0
- mlflow/utils/promptlab_utils.py +146 -0
- mlflow/utils/proto_json_utils.py +743 -0
- mlflow/utils/pydantic_utils.py +54 -0
- mlflow/utils/request_utils.py +279 -0
- mlflow/utils/requirements_utils.py +704 -0
- mlflow/utils/rest_utils.py +673 -0
- mlflow/utils/search_logged_model_utils.py +127 -0
- mlflow/utils/search_utils.py +2111 -0
- mlflow/utils/secure_loading.py +221 -0
- mlflow/utils/security_validation.py +384 -0
- mlflow/utils/server_cli_utils.py +61 -0
- mlflow/utils/spark_utils.py +15 -0
- mlflow/utils/string_utils.py +138 -0
- mlflow/utils/thread_utils.py +63 -0
- mlflow/utils/time.py +54 -0
- mlflow/utils/timeout.py +42 -0
- mlflow/utils/uri.py +572 -0
- mlflow/utils/validation.py +662 -0
- mlflow/utils/virtualenv.py +458 -0
- mlflow/utils/warnings_utils.py +25 -0
- mlflow/utils/yaml_utils.py +179 -0
- mlflow/version.py +24 -0
@@ -0,0 +1,978 @@
|
|
1
|
+
import atexit
|
2
|
+
import codecs
|
3
|
+
import errno
|
4
|
+
import fnmatch
|
5
|
+
import gzip
|
6
|
+
import importlib.util
|
7
|
+
import json
|
8
|
+
import logging
|
9
|
+
import math
|
10
|
+
import os
|
11
|
+
import pathlib
|
12
|
+
import posixpath
|
13
|
+
import shutil
|
14
|
+
import stat
|
15
|
+
import subprocess
|
16
|
+
import sys
|
17
|
+
import tarfile
|
18
|
+
import tempfile
|
19
|
+
import time
|
20
|
+
import urllib.parse
|
21
|
+
import urllib.request
|
22
|
+
import uuid
|
23
|
+
from concurrent.futures import as_completed
|
24
|
+
from contextlib import contextmanager
|
25
|
+
from dataclasses import dataclass
|
26
|
+
from subprocess import CalledProcessError, TimeoutExpired
|
27
|
+
from typing import Any, Optional, Union
|
28
|
+
from urllib.parse import unquote
|
29
|
+
from urllib.request import pathname2url
|
30
|
+
|
31
|
+
from mlflow.entities import FileInfo
|
32
|
+
from mlflow.environment_variables import (
|
33
|
+
_MLFLOW_MPD_NUM_RETRIES,
|
34
|
+
_MLFLOW_MPD_RETRY_INTERVAL_SECONDS,
|
35
|
+
MLFLOW_DOWNLOAD_CHUNK_TIMEOUT,
|
36
|
+
MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR,
|
37
|
+
)
|
38
|
+
from mlflow.exceptions import MlflowException
|
39
|
+
from mlflow.protos.databricks_artifacts_pb2 import ArtifactCredentialType
|
40
|
+
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
|
41
|
+
from mlflow.utils import download_cloud_file_chunk
|
42
|
+
from mlflow.utils.databricks_utils import (
|
43
|
+
get_databricks_local_temp_dir,
|
44
|
+
get_databricks_nfs_temp_dir,
|
45
|
+
)
|
46
|
+
from mlflow.utils.os import is_windows
|
47
|
+
from mlflow.utils.process import cache_return_value_per_process
|
48
|
+
from mlflow.utils.request_utils import cloud_storage_http_request, download_chunk
|
49
|
+
from mlflow.utils.rest_utils import augmented_raise_for_status
|
50
|
+
|
51
|
+
ENCODING = "utf-8"
|
52
|
+
_PROGRESS_BAR_DISPLAY_THRESHOLD = 500_000_000 # 500 MB
|
53
|
+
|
54
|
+
_logger = logging.getLogger(__name__)
|
55
|
+
|
56
|
+
# This is for backward compatibility with databricks-feature-engineering<=0.10.2
|
57
|
+
if importlib.util.find_spec("yaml") is not None:
|
58
|
+
try:
|
59
|
+
from yaml import CSafeDumper as YamlSafeDumper
|
60
|
+
except ImportError:
|
61
|
+
from yaml import SafeDumper as YamlSafeDumper # noqa: F401
|
62
|
+
|
63
|
+
|
64
|
+
class ArtifactProgressBar:
|
65
|
+
def __init__(self, desc, total, step, **kwargs) -> None:
|
66
|
+
self.desc = desc
|
67
|
+
self.total = total
|
68
|
+
self.step = step
|
69
|
+
self.pbar = None
|
70
|
+
self.progress = 0
|
71
|
+
self.kwargs = kwargs
|
72
|
+
|
73
|
+
def set_pbar(self):
|
74
|
+
if MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR.get():
|
75
|
+
try:
|
76
|
+
from tqdm.auto import tqdm
|
77
|
+
|
78
|
+
self.pbar = tqdm(total=self.total, desc=self.desc, **self.kwargs)
|
79
|
+
except ImportError:
|
80
|
+
pass
|
81
|
+
|
82
|
+
@classmethod
|
83
|
+
def chunks(cls, file_size, desc, chunk_size):
|
84
|
+
bar = cls(
|
85
|
+
desc,
|
86
|
+
total=file_size,
|
87
|
+
step=chunk_size,
|
88
|
+
unit="iB",
|
89
|
+
unit_scale=True,
|
90
|
+
unit_divisor=1024,
|
91
|
+
miniters=1,
|
92
|
+
)
|
93
|
+
if file_size >= _PROGRESS_BAR_DISPLAY_THRESHOLD:
|
94
|
+
bar.set_pbar()
|
95
|
+
return bar
|
96
|
+
|
97
|
+
@classmethod
|
98
|
+
def files(cls, desc, total):
|
99
|
+
bar = cls(desc, total=total, step=1)
|
100
|
+
bar.set_pbar()
|
101
|
+
return bar
|
102
|
+
|
103
|
+
def update(self):
|
104
|
+
if self.pbar:
|
105
|
+
update_step = min(self.total - self.progress, self.step)
|
106
|
+
self.pbar.update(update_step)
|
107
|
+
self.pbar.refresh()
|
108
|
+
self.progress += update_step
|
109
|
+
|
110
|
+
def __enter__(self):
|
111
|
+
return self
|
112
|
+
|
113
|
+
def __exit__(self, *args):
|
114
|
+
if self.pbar:
|
115
|
+
self.pbar.close()
|
116
|
+
|
117
|
+
|
118
|
+
def is_directory(name):
|
119
|
+
return os.path.isdir(name)
|
120
|
+
|
121
|
+
|
122
|
+
def is_file(name):
|
123
|
+
return os.path.isfile(name)
|
124
|
+
|
125
|
+
|
126
|
+
def exists(name):
|
127
|
+
return os.path.exists(name)
|
128
|
+
|
129
|
+
|
130
|
+
def list_all(root, filter_func=lambda x: True, full_path=False):
|
131
|
+
"""List all entities directly under 'dir_name' that satisfy 'filter_func'
|
132
|
+
|
133
|
+
Args:
|
134
|
+
root: Name of directory to start search.
|
135
|
+
filter_func: function or lambda that takes path.
|
136
|
+
full_path: If True will return results as full path including `root`.
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
list of all files or directories that satisfy the criteria.
|
140
|
+
|
141
|
+
"""
|
142
|
+
if not is_directory(root):
|
143
|
+
raise Exception(f"Invalid parent directory '{root}'")
|
144
|
+
matches = [x for x in os.listdir(root) if filter_func(os.path.join(root, x))]
|
145
|
+
return [os.path.join(root, m) for m in matches] if full_path else matches
|
146
|
+
|
147
|
+
|
148
|
+
def list_subdirs(dir_name, full_path=False):
|
149
|
+
"""
|
150
|
+
Equivalent to UNIX command:
|
151
|
+
``find $dir_name -depth 1 -type d``
|
152
|
+
|
153
|
+
Args:
|
154
|
+
dir_name: Name of directory to start search.
|
155
|
+
full_path: If True will return results as full path including `root`.
|
156
|
+
|
157
|
+
Returns:
|
158
|
+
list of all directories directly under 'dir_name'.
|
159
|
+
"""
|
160
|
+
return list_all(dir_name, os.path.isdir, full_path)
|
161
|
+
|
162
|
+
|
163
|
+
def list_files(dir_name, full_path=False):
|
164
|
+
"""
|
165
|
+
Equivalent to UNIX command:
|
166
|
+
``find $dir_name -depth 1 -type f``
|
167
|
+
|
168
|
+
Args:
|
169
|
+
dir_name: Name of directory to start search.
|
170
|
+
full_path: If True will return results as full path including `root`.
|
171
|
+
|
172
|
+
Returns:
|
173
|
+
list of all files directly under 'dir_name'.
|
174
|
+
"""
|
175
|
+
return list_all(dir_name, os.path.isfile, full_path)
|
176
|
+
|
177
|
+
|
178
|
+
def find(root, name, full_path=False):
|
179
|
+
"""Search for a file in a root directory. Equivalent to:
|
180
|
+
``find $root -name "$name" -depth 1``
|
181
|
+
|
182
|
+
Args:
|
183
|
+
root: Name of root directory for find.
|
184
|
+
name: Name of file or directory to find directly under root directory.
|
185
|
+
full_path: If True will return results as full path including `root`.
|
186
|
+
|
187
|
+
Returns:
|
188
|
+
list of matching files or directories.
|
189
|
+
"""
|
190
|
+
path_name = os.path.join(root, name)
|
191
|
+
return list_all(root, lambda x: x == path_name, full_path)
|
192
|
+
|
193
|
+
|
194
|
+
def mkdir(root, name=None):
|
195
|
+
"""Make directory with name "root/name", or just "root" if name is None.
|
196
|
+
|
197
|
+
Args:
|
198
|
+
root: Name of parent directory.
|
199
|
+
name: Optional name of leaf directory.
|
200
|
+
|
201
|
+
Returns:
|
202
|
+
Path to created directory.
|
203
|
+
"""
|
204
|
+
target = os.path.join(root, name) if name is not None else root
|
205
|
+
try:
|
206
|
+
os.makedirs(target, exist_ok=True)
|
207
|
+
except OSError as e:
|
208
|
+
if e.errno != errno.EEXIST or not os.path.isdir(target):
|
209
|
+
raise e
|
210
|
+
return target
|
211
|
+
|
212
|
+
|
213
|
+
def make_containing_dirs(path):
|
214
|
+
"""
|
215
|
+
Create the base directory for a given file path if it does not exist; also creates parent
|
216
|
+
directories.
|
217
|
+
"""
|
218
|
+
dir_name = os.path.dirname(path)
|
219
|
+
if not os.path.exists(dir_name):
|
220
|
+
os.makedirs(dir_name)
|
221
|
+
|
222
|
+
|
223
|
+
def read_parquet_as_pandas_df(data_parquet_path: str):
|
224
|
+
"""Deserialize and load the specified parquet file as a Pandas DataFrame.
|
225
|
+
|
226
|
+
Args:
|
227
|
+
data_parquet_path: String, path object (implementing os.PathLike[str]),
|
228
|
+
or file-like object implementing a binary read() function. The string
|
229
|
+
could be a URL. Valid URL schemes include http, ftp, s3, gs, and file.
|
230
|
+
For file URLs, a host is expected. A local file could
|
231
|
+
be: file://localhost/path/to/table.parquet. A file URL can also be a path to a
|
232
|
+
directory that contains multiple partitioned parquet files. Pyarrow
|
233
|
+
support paths to directories as well as file URLs. A directory
|
234
|
+
path could be: file://localhost/path/to/tables or s3://bucket/partition_dir.
|
235
|
+
|
236
|
+
Returns:
|
237
|
+
pandas dataframe
|
238
|
+
"""
|
239
|
+
import pandas as pd
|
240
|
+
|
241
|
+
return pd.read_parquet(data_parquet_path, engine="pyarrow")
|
242
|
+
|
243
|
+
|
244
|
+
def write_pandas_df_as_parquet(df, data_parquet_path: str):
|
245
|
+
"""Write a DataFrame to the binary parquet format.
|
246
|
+
|
247
|
+
Args:
|
248
|
+
df: pandas data frame.
|
249
|
+
data_parquet_path: String, path object (implementing os.PathLike[str]),
|
250
|
+
or file-like object implementing a binary write() function.
|
251
|
+
|
252
|
+
"""
|
253
|
+
df.to_parquet(data_parquet_path, engine="pyarrow")
|
254
|
+
|
255
|
+
|
256
|
+
class TempDir:
|
257
|
+
def __init__(self, chdr=False, remove_on_exit=True):
|
258
|
+
self._dir = None
|
259
|
+
self._path = None
|
260
|
+
self._chdr = chdr
|
261
|
+
self._remove = remove_on_exit
|
262
|
+
|
263
|
+
def __enter__(self):
|
264
|
+
self._path = os.path.abspath(create_tmp_dir())
|
265
|
+
assert os.path.exists(self._path)
|
266
|
+
if self._chdr:
|
267
|
+
self._dir = os.path.abspath(os.getcwd())
|
268
|
+
os.chdir(self._path)
|
269
|
+
return self
|
270
|
+
|
271
|
+
def __exit__(self, tp, val, traceback):
|
272
|
+
if self._chdr and self._dir:
|
273
|
+
os.chdir(self._dir)
|
274
|
+
self._dir = None
|
275
|
+
if self._remove and os.path.exists(self._path):
|
276
|
+
shutil.rmtree(self._path)
|
277
|
+
|
278
|
+
assert not self._remove or not os.path.exists(self._path)
|
279
|
+
|
280
|
+
def path(self, *path):
|
281
|
+
return os.path.join("./", *path) if self._chdr else os.path.join(self._path, *path)
|
282
|
+
|
283
|
+
|
284
|
+
def read_file_lines(parent_path, file_name):
|
285
|
+
"""Return the contents of the file as an array where each element is a separate line.
|
286
|
+
|
287
|
+
Args:
|
288
|
+
parent_path: Full path to the directory that contains the file.
|
289
|
+
file_name: Leaf file name.
|
290
|
+
|
291
|
+
Returns:
|
292
|
+
All lines in the file as an array.
|
293
|
+
|
294
|
+
"""
|
295
|
+
file_path = os.path.join(parent_path, file_name)
|
296
|
+
with codecs.open(file_path, mode="r", encoding=ENCODING) as f:
|
297
|
+
return f.readlines()
|
298
|
+
|
299
|
+
|
300
|
+
def read_file(parent_path, file_name):
|
301
|
+
"""Return the contents of the file.
|
302
|
+
|
303
|
+
Args:
|
304
|
+
parent_path: Full path to the directory that contains the file.
|
305
|
+
file_name: Leaf file name.
|
306
|
+
|
307
|
+
Returns:
|
308
|
+
The contents of the file.
|
309
|
+
|
310
|
+
"""
|
311
|
+
file_path = os.path.join(parent_path, file_name)
|
312
|
+
with codecs.open(file_path, mode="r", encoding=ENCODING) as f:
|
313
|
+
return f.read()
|
314
|
+
|
315
|
+
|
316
|
+
def get_file_info(path, rel_path):
|
317
|
+
"""Returns file meta data : location, size, ... etc
|
318
|
+
|
319
|
+
Args:
|
320
|
+
path: Path to artifact.
|
321
|
+
rel_path: Relative path.
|
322
|
+
|
323
|
+
Returns:
|
324
|
+
`FileInfo` object
|
325
|
+
"""
|
326
|
+
if is_directory(path):
|
327
|
+
return FileInfo(rel_path, True, None)
|
328
|
+
else:
|
329
|
+
return FileInfo(rel_path, False, os.path.getsize(path))
|
330
|
+
|
331
|
+
|
332
|
+
def get_relative_path(root_path, target_path):
|
333
|
+
"""Remove root path common prefix and return part of `path` relative to `root_path`.
|
334
|
+
|
335
|
+
Args:
|
336
|
+
root_path: Root path.
|
337
|
+
target_path: Desired path for common prefix removal.
|
338
|
+
|
339
|
+
Returns:
|
340
|
+
Path relative to root_path.
|
341
|
+
"""
|
342
|
+
if len(root_path) > len(target_path):
|
343
|
+
raise Exception(f"Root path '{root_path}' longer than target path '{target_path}'")
|
344
|
+
common_prefix = os.path.commonprefix([root_path, target_path])
|
345
|
+
return os.path.relpath(target_path, common_prefix)
|
346
|
+
|
347
|
+
|
348
|
+
def mv(target, new_parent):
|
349
|
+
shutil.move(target, new_parent)
|
350
|
+
|
351
|
+
|
352
|
+
def write_to(filename, data):
|
353
|
+
with codecs.open(filename, mode="w", encoding=ENCODING) as handle:
|
354
|
+
handle.write(data)
|
355
|
+
|
356
|
+
|
357
|
+
def append_to(filename, data):
|
358
|
+
with open(filename, "a") as handle:
|
359
|
+
handle.write(data)
|
360
|
+
|
361
|
+
|
362
|
+
def make_tarfile(output_filename, source_dir, archive_name, custom_filter=None):
|
363
|
+
# Helper for filtering out modification timestamps
|
364
|
+
def _filter_timestamps(tar_info):
|
365
|
+
tar_info.mtime = 0
|
366
|
+
return tar_info if custom_filter is None else custom_filter(tar_info)
|
367
|
+
|
368
|
+
unzipped_file_handle, unzipped_filename = tempfile.mkstemp()
|
369
|
+
try:
|
370
|
+
with tarfile.open(unzipped_filename, "w") as tar:
|
371
|
+
tar.add(source_dir, arcname=archive_name, filter=_filter_timestamps)
|
372
|
+
# When gzipping the tar, don't include the tar's filename or modification time in the
|
373
|
+
# zipped archive (see https://docs.python.org/3/library/gzip.html#gzip.GzipFile)
|
374
|
+
with (
|
375
|
+
gzip.GzipFile(
|
376
|
+
filename="", fileobj=open(output_filename, "wb"), mode="wb", mtime=0
|
377
|
+
) as gzipped_tar,
|
378
|
+
open(unzipped_filename, "rb") as tar,
|
379
|
+
):
|
380
|
+
gzipped_tar.write(tar.read())
|
381
|
+
finally:
|
382
|
+
os.close(unzipped_file_handle)
|
383
|
+
|
384
|
+
|
385
|
+
def _copy_project(src_path, dst_path=""):
|
386
|
+
"""Internal function used to copy MLflow project during development.
|
387
|
+
|
388
|
+
Copies the content of the whole directory tree except patterns defined in .dockerignore.
|
389
|
+
The MLflow is assumed to be accessible as a local directory in this case.
|
390
|
+
|
391
|
+
Args:
|
392
|
+
src_path: Path to the original MLflow project
|
393
|
+
dst_path: MLflow will be copied here
|
394
|
+
|
395
|
+
Returns:
|
396
|
+
Name of the MLflow project directory.
|
397
|
+
"""
|
398
|
+
|
399
|
+
def _docker_ignore(mlflow_root):
|
400
|
+
docker_ignore = os.path.join(mlflow_root, ".dockerignore")
|
401
|
+
patterns = []
|
402
|
+
if os.path.exists(docker_ignore):
|
403
|
+
with open(docker_ignore) as f:
|
404
|
+
patterns = [x.strip() for x in f.readlines()]
|
405
|
+
|
406
|
+
def ignore(_, names):
|
407
|
+
res = set()
|
408
|
+
for p in patterns:
|
409
|
+
res.update(set(fnmatch.filter(names, p)))
|
410
|
+
return list(res)
|
411
|
+
|
412
|
+
return ignore if patterns else None
|
413
|
+
|
414
|
+
mlflow_dir = "mlflow-project"
|
415
|
+
# check if we have project root
|
416
|
+
assert os.path.isfile(os.path.join(src_path, "pyproject.toml")), "file not found " + str(
|
417
|
+
os.path.abspath(os.path.join(src_path, "pyproject.toml"))
|
418
|
+
)
|
419
|
+
shutil.copytree(src_path, os.path.join(dst_path, mlflow_dir), ignore=_docker_ignore(src_path))
|
420
|
+
return mlflow_dir
|
421
|
+
|
422
|
+
|
423
|
+
def _copy_file_or_tree(src, dst, dst_dir=None):
|
424
|
+
"""
|
425
|
+
Returns:
|
426
|
+
The path to the copied artifacts, relative to `dst`.
|
427
|
+
"""
|
428
|
+
dst_subpath = os.path.basename(os.path.abspath(src))
|
429
|
+
if dst_dir is not None:
|
430
|
+
dst_subpath = os.path.join(dst_dir, dst_subpath)
|
431
|
+
dst_path = os.path.join(dst, dst_subpath)
|
432
|
+
if os.path.isfile(src):
|
433
|
+
dst_dirpath = os.path.dirname(dst_path)
|
434
|
+
if not os.path.exists(dst_dirpath):
|
435
|
+
os.makedirs(dst_dirpath)
|
436
|
+
shutil.copy(src=src, dst=dst_path)
|
437
|
+
else:
|
438
|
+
shutil.copytree(src=src, dst=dst_path, ignore=shutil.ignore_patterns("__pycache__"))
|
439
|
+
return dst_subpath
|
440
|
+
|
441
|
+
|
442
|
+
def _get_local_project_dir_size(project_path):
|
443
|
+
"""Internal function for reporting the size of a local project directory before copying to
|
444
|
+
destination for cli logging reporting to stdout.
|
445
|
+
|
446
|
+
Args:
|
447
|
+
project_path: local path of the project directory
|
448
|
+
|
449
|
+
Returns:
|
450
|
+
directory file sizes in KB, rounded to single decimal point for legibility
|
451
|
+
"""
|
452
|
+
|
453
|
+
total_size = 0
|
454
|
+
for root, _, files in os.walk(project_path):
|
455
|
+
for f in files:
|
456
|
+
path = os.path.join(root, f)
|
457
|
+
total_size += os.path.getsize(path)
|
458
|
+
return round(total_size / 1024.0, 1)
|
459
|
+
|
460
|
+
|
461
|
+
def _get_local_file_size(file):
|
462
|
+
"""
|
463
|
+
Get the size of a local file in KB
|
464
|
+
"""
|
465
|
+
return round(os.path.getsize(file) / 1024.0, 1)
|
466
|
+
|
467
|
+
|
468
|
+
def get_parent_dir(path):
|
469
|
+
return os.path.abspath(os.path.join(path, os.pardir))
|
470
|
+
|
471
|
+
|
472
|
+
def relative_path_to_artifact_path(path):
|
473
|
+
if os.path == posixpath:
|
474
|
+
return path
|
475
|
+
if os.path.abspath(path) == path:
|
476
|
+
raise Exception("This method only works with relative paths.")
|
477
|
+
return unquote(pathname2url(path))
|
478
|
+
|
479
|
+
|
480
|
+
def path_to_local_file_uri(path):
|
481
|
+
"""
|
482
|
+
Convert local filesystem path to local file uri.
|
483
|
+
"""
|
484
|
+
return pathlib.Path(os.path.abspath(path)).as_uri()
|
485
|
+
|
486
|
+
|
487
|
+
def path_to_local_sqlite_uri(path):
|
488
|
+
"""
|
489
|
+
Convert local filesystem path to sqlite uri.
|
490
|
+
"""
|
491
|
+
path = posixpath.abspath(pathname2url(os.path.abspath(path)))
|
492
|
+
prefix = "sqlite://" if sys.platform == "win32" else "sqlite:///"
|
493
|
+
return prefix + path
|
494
|
+
|
495
|
+
|
496
|
+
def local_file_uri_to_path(uri):
|
497
|
+
"""
|
498
|
+
Convert URI to local filesystem path.
|
499
|
+
No-op if the uri does not have the expected scheme.
|
500
|
+
"""
|
501
|
+
path = uri
|
502
|
+
if uri.startswith("file:"):
|
503
|
+
parsed_path = urllib.parse.urlparse(uri)
|
504
|
+
path = parsed_path.path
|
505
|
+
# Fix for retaining server name in UNC path.
|
506
|
+
if is_windows() and parsed_path.netloc:
|
507
|
+
return urllib.request.url2pathname(rf"\\{parsed_path.netloc}{path}")
|
508
|
+
return urllib.request.url2pathname(path)
|
509
|
+
|
510
|
+
|
511
|
+
def get_local_path_or_none(path_or_uri):
|
512
|
+
"""Check if the argument is a local path (no scheme or file:///) and return local path if true,
|
513
|
+
None otherwise.
|
514
|
+
"""
|
515
|
+
parsed_uri = urllib.parse.urlparse(path_or_uri)
|
516
|
+
if len(parsed_uri.scheme) == 0 or parsed_uri.scheme == "file" and len(parsed_uri.netloc) == 0:
|
517
|
+
return local_file_uri_to_path(path_or_uri)
|
518
|
+
else:
|
519
|
+
return None
|
520
|
+
|
521
|
+
|
522
|
+
def yield_file_in_chunks(file, chunk_size=100000000):
|
523
|
+
"""
|
524
|
+
Generator to chunk-ify the inputted file based on the chunk-size.
|
525
|
+
"""
|
526
|
+
with open(file, "rb") as f:
|
527
|
+
while True:
|
528
|
+
chunk = f.read(chunk_size)
|
529
|
+
if chunk:
|
530
|
+
yield chunk
|
531
|
+
else:
|
532
|
+
break
|
533
|
+
|
534
|
+
|
535
|
+
def download_file_using_http_uri(http_uri, download_path, chunk_size=100000000, headers=None):
|
536
|
+
"""
|
537
|
+
Downloads a file specified using the `http_uri` to a local `download_path`. This function
|
538
|
+
uses a `chunk_size` to ensure an OOM error is not raised a large file is downloaded.
|
539
|
+
|
540
|
+
Note : This function is meant to download files using presigned urls from various cloud
|
541
|
+
providers.
|
542
|
+
"""
|
543
|
+
if headers is None:
|
544
|
+
headers = {}
|
545
|
+
with cloud_storage_http_request("get", http_uri, stream=True, headers=headers) as response:
|
546
|
+
augmented_raise_for_status(response)
|
547
|
+
with open(download_path, "wb") as output_file:
|
548
|
+
for chunk in response.iter_content(chunk_size=chunk_size):
|
549
|
+
if not chunk:
|
550
|
+
break
|
551
|
+
output_file.write(chunk)
|
552
|
+
|
553
|
+
|
554
|
+
@dataclass(frozen=True)
|
555
|
+
class _Chunk:
|
556
|
+
index: int
|
557
|
+
start: int
|
558
|
+
end: int
|
559
|
+
path: str
|
560
|
+
|
561
|
+
|
562
|
+
def _yield_chunks(path, file_size, chunk_size):
|
563
|
+
num_requests = int(math.ceil(file_size / float(chunk_size)))
|
564
|
+
for i in range(num_requests):
|
565
|
+
range_start = i * chunk_size
|
566
|
+
range_end = min(range_start + chunk_size - 1, file_size - 1)
|
567
|
+
yield _Chunk(i, range_start, range_end, path)
|
568
|
+
|
569
|
+
|
570
|
+
def parallelized_download_file_using_http_uri(
|
571
|
+
thread_pool_executor,
|
572
|
+
http_uri,
|
573
|
+
download_path,
|
574
|
+
remote_file_path,
|
575
|
+
file_size,
|
576
|
+
uri_type,
|
577
|
+
chunk_size,
|
578
|
+
env,
|
579
|
+
headers=None,
|
580
|
+
):
|
581
|
+
"""
|
582
|
+
Downloads a file specified using the `http_uri` to a local `download_path`. This function
|
583
|
+
sends multiple requests in parallel each specifying its own desired byte range as a header,
|
584
|
+
then reconstructs the file from the downloaded chunks. This allows for downloads of large files
|
585
|
+
without OOM risk.
|
586
|
+
|
587
|
+
Note : This function is meant to download files using presigned urls from various cloud
|
588
|
+
providers.
|
589
|
+
Returns a dict of chunk index : exception, if one was thrown for that index.
|
590
|
+
"""
|
591
|
+
|
592
|
+
def run_download(chunk: _Chunk):
|
593
|
+
try:
|
594
|
+
subprocess.run(
|
595
|
+
[
|
596
|
+
sys.executable,
|
597
|
+
download_cloud_file_chunk.__file__,
|
598
|
+
"--range-start",
|
599
|
+
str(chunk.start),
|
600
|
+
"--range-end",
|
601
|
+
str(chunk.end),
|
602
|
+
"--headers",
|
603
|
+
json.dumps(headers or {}),
|
604
|
+
"--download-path",
|
605
|
+
download_path,
|
606
|
+
"--http-uri",
|
607
|
+
http_uri,
|
608
|
+
],
|
609
|
+
text=True,
|
610
|
+
check=True,
|
611
|
+
capture_output=True,
|
612
|
+
timeout=MLFLOW_DOWNLOAD_CHUNK_TIMEOUT.get(),
|
613
|
+
env=env,
|
614
|
+
)
|
615
|
+
except (TimeoutExpired, CalledProcessError) as e:
|
616
|
+
raise MlflowException(
|
617
|
+
f"""
|
618
|
+
----- stdout -----
|
619
|
+
{e.stdout.strip()}
|
620
|
+
|
621
|
+
----- stderr -----
|
622
|
+
{e.stderr.strip()}
|
623
|
+
"""
|
624
|
+
) from e
|
625
|
+
|
626
|
+
chunks = _yield_chunks(remote_file_path, file_size, chunk_size)
|
627
|
+
# Create file if it doesn't exist or erase the contents if it does. We should do this here
|
628
|
+
# before sending to the workers so they can each individually seek to their respective positions
|
629
|
+
# and write chunks without overwriting.
|
630
|
+
with open(download_path, "w"):
|
631
|
+
pass
|
632
|
+
if uri_type == ArtifactCredentialType.GCP_SIGNED_URL or uri_type is None:
|
633
|
+
chunk = next(chunks)
|
634
|
+
# GCP files could be transcoded, in which case the range header is ignored.
|
635
|
+
# Test if this is the case by downloading one chunk and seeing if it's larger than the
|
636
|
+
# requested size. If yes, let that be the file; if not, continue downloading more chunks.
|
637
|
+
download_chunk(
|
638
|
+
range_start=chunk.start,
|
639
|
+
range_end=chunk.end,
|
640
|
+
headers=headers,
|
641
|
+
download_path=download_path,
|
642
|
+
http_uri=http_uri,
|
643
|
+
)
|
644
|
+
downloaded_size = os.path.getsize(download_path)
|
645
|
+
# If downloaded size was equal to the chunk size it would have been downloaded serially,
|
646
|
+
# so we don't need to consider this here
|
647
|
+
if downloaded_size > chunk_size:
|
648
|
+
return {}
|
649
|
+
|
650
|
+
futures = {thread_pool_executor.submit(run_download, chunk): chunk for chunk in chunks}
|
651
|
+
failed_downloads = {}
|
652
|
+
with ArtifactProgressBar.chunks(file_size, f"Downloading {download_path}", chunk_size) as pbar:
|
653
|
+
for future in as_completed(futures):
|
654
|
+
chunk = futures[future]
|
655
|
+
try:
|
656
|
+
future.result()
|
657
|
+
except Exception as e:
|
658
|
+
_logger.debug(
|
659
|
+
f"Failed to download chunk {chunk.index} for {chunk.path}: {e}. "
|
660
|
+
f"The download of this chunk will be retried later."
|
661
|
+
)
|
662
|
+
failed_downloads[chunk] = future.exception()
|
663
|
+
else:
|
664
|
+
pbar.update()
|
665
|
+
|
666
|
+
return failed_downloads
|
667
|
+
|
668
|
+
|
669
|
+
def download_chunk_retries(*, chunks, http_uri, headers, download_path):
|
670
|
+
num_retries = _MLFLOW_MPD_NUM_RETRIES.get()
|
671
|
+
interval = _MLFLOW_MPD_RETRY_INTERVAL_SECONDS.get()
|
672
|
+
for chunk in chunks:
|
673
|
+
_logger.info(f"Retrying download of chunk {chunk.index} for {chunk.path}")
|
674
|
+
for retry in range(num_retries):
|
675
|
+
try:
|
676
|
+
download_chunk(
|
677
|
+
range_start=chunk.start,
|
678
|
+
range_end=chunk.end,
|
679
|
+
headers=headers,
|
680
|
+
download_path=download_path,
|
681
|
+
http_uri=http_uri,
|
682
|
+
)
|
683
|
+
_logger.info(f"Successfully downloaded chunk {chunk.index} for {chunk.path}")
|
684
|
+
break
|
685
|
+
except Exception:
|
686
|
+
if retry == num_retries - 1:
|
687
|
+
raise
|
688
|
+
time.sleep(interval)
|
689
|
+
|
690
|
+
|
691
|
+
def _handle_readonly_on_windows(func, path, exc_info):
|
692
|
+
"""
|
693
|
+
This function should not be called directly but should be passed to `onerror` of
|
694
|
+
`shutil.rmtree` in order to reattempt the removal of a read-only file after making
|
695
|
+
it writable on Windows.
|
696
|
+
|
697
|
+
References:
|
698
|
+
- https://bugs.python.org/issue19643
|
699
|
+
- https://bugs.python.org/issue43657
|
700
|
+
"""
|
701
|
+
exc_type, exc_value = exc_info[:2]
|
702
|
+
should_reattempt = (
|
703
|
+
is_windows()
|
704
|
+
and func in (os.unlink, os.rmdir)
|
705
|
+
and issubclass(exc_type, PermissionError)
|
706
|
+
and exc_value.winerror == 5
|
707
|
+
)
|
708
|
+
if not should_reattempt:
|
709
|
+
raise exc_value
|
710
|
+
os.chmod(path, stat.S_IWRITE)
|
711
|
+
func(path)
|
712
|
+
|
713
|
+
|
714
|
+
def _get_tmp_dir():
|
715
|
+
from mlflow.utils.databricks_utils import get_repl_id, is_in_databricks_runtime
|
716
|
+
|
717
|
+
if is_in_databricks_runtime():
|
718
|
+
try:
|
719
|
+
return get_databricks_local_temp_dir()
|
720
|
+
except Exception:
|
721
|
+
pass
|
722
|
+
|
723
|
+
if repl_id := get_repl_id():
|
724
|
+
return os.path.join("/tmp", "repl_tmp_data", repl_id)
|
725
|
+
|
726
|
+
return None
|
727
|
+
|
728
|
+
|
729
|
+
def create_tmp_dir():
|
730
|
+
if directory := _get_tmp_dir():
|
731
|
+
os.makedirs(directory, exist_ok=True)
|
732
|
+
return tempfile.mkdtemp(dir=directory)
|
733
|
+
|
734
|
+
return tempfile.mkdtemp()
|
735
|
+
|
736
|
+
|
737
|
+
@cache_return_value_per_process
|
738
|
+
def get_or_create_tmp_dir():
|
739
|
+
"""
|
740
|
+
Get or create a temporary directory which will be removed once python process exit.
|
741
|
+
"""
|
742
|
+
from mlflow.utils.databricks_utils import get_repl_id, is_in_databricks_runtime
|
743
|
+
|
744
|
+
if is_in_databricks_runtime() and get_repl_id() is not None:
|
745
|
+
# Note: For python process attached to databricks notebook, atexit does not work.
|
746
|
+
# The directory returned by `get_databricks_local_tmp_dir`
|
747
|
+
# will be removed once databricks notebook detaches.
|
748
|
+
# The temp directory is designed to be used by all kinds of applications,
|
749
|
+
# so create a child directory "mlflow" for storing mlflow temp data.
|
750
|
+
try:
|
751
|
+
repl_local_tmp_dir = get_databricks_local_temp_dir()
|
752
|
+
except Exception:
|
753
|
+
repl_local_tmp_dir = os.path.join("/tmp", "repl_tmp_data", get_repl_id())
|
754
|
+
|
755
|
+
tmp_dir = os.path.join(repl_local_tmp_dir, "mlflow")
|
756
|
+
os.makedirs(tmp_dir, exist_ok=True)
|
757
|
+
else:
|
758
|
+
tmp_dir = tempfile.mkdtemp()
|
759
|
+
# mkdtemp creates a directory with permission 0o700
|
760
|
+
# change it to be 0o777 to ensure it can be seen in spark UDF
|
761
|
+
os.chmod(tmp_dir, 0o777)
|
762
|
+
atexit.register(shutil.rmtree, tmp_dir, ignore_errors=True)
|
763
|
+
|
764
|
+
return tmp_dir
|
765
|
+
|
766
|
+
|
767
|
+
@cache_return_value_per_process
|
768
|
+
def get_or_create_nfs_tmp_dir():
|
769
|
+
"""
|
770
|
+
Get or create a temporary NFS directory which will be removed once python process exit.
|
771
|
+
"""
|
772
|
+
from mlflow.utils.databricks_utils import get_repl_id, is_in_databricks_runtime
|
773
|
+
from mlflow.utils.nfs_on_spark import get_nfs_cache_root_dir
|
774
|
+
|
775
|
+
nfs_root_dir = get_nfs_cache_root_dir()
|
776
|
+
|
777
|
+
if is_in_databricks_runtime() and get_repl_id() is not None:
|
778
|
+
# Note: In databricks, atexit hook does not work.
|
779
|
+
# The directory returned by `get_databricks_nfs_tmp_dir`
|
780
|
+
# will be removed once databricks notebook detaches.
|
781
|
+
# The temp directory is designed to be used by all kinds of applications,
|
782
|
+
# so create a child directory "mlflow" for storing mlflow temp data.
|
783
|
+
try:
|
784
|
+
repl_nfs_tmp_dir = get_databricks_nfs_temp_dir()
|
785
|
+
except Exception:
|
786
|
+
repl_nfs_tmp_dir = os.path.join(nfs_root_dir, "repl_tmp_data", get_repl_id())
|
787
|
+
|
788
|
+
tmp_nfs_dir = os.path.join(repl_nfs_tmp_dir, "mlflow")
|
789
|
+
os.makedirs(tmp_nfs_dir, exist_ok=True)
|
790
|
+
else:
|
791
|
+
tmp_nfs_dir = tempfile.mkdtemp(dir=nfs_root_dir)
|
792
|
+
# mkdtemp creates a directory with permission 0o700
|
793
|
+
# change it to be 0o777 to ensure it can be seen in spark UDF
|
794
|
+
os.chmod(tmp_nfs_dir, 0o777)
|
795
|
+
atexit.register(shutil.rmtree, tmp_nfs_dir, ignore_errors=True)
|
796
|
+
|
797
|
+
return tmp_nfs_dir
|
798
|
+
|
799
|
+
|
800
|
+
def write_spark_dataframe_to_parquet_on_local_disk(spark_df, output_path):
|
801
|
+
"""Write spark dataframe in parquet format to local disk.
|
802
|
+
|
803
|
+
Args:
|
804
|
+
spark_df: Spark dataframe.
|
805
|
+
output_path: Path to write the data to.
|
806
|
+
|
807
|
+
"""
|
808
|
+
from mlflow.utils.databricks_utils import is_in_databricks_runtime
|
809
|
+
|
810
|
+
if is_in_databricks_runtime():
|
811
|
+
dbfs_path = os.path.join(".mlflow", "cache", str(uuid.uuid4()))
|
812
|
+
spark_df.coalesce(1).write.format("parquet").save(dbfs_path)
|
813
|
+
shutil.copytree("/dbfs/" + dbfs_path, output_path)
|
814
|
+
shutil.rmtree("/dbfs/" + dbfs_path)
|
815
|
+
else:
|
816
|
+
spark_df.coalesce(1).write.format("parquet").save(output_path)
|
817
|
+
|
818
|
+
|
819
|
+
def shutil_copytree_without_file_permissions(src_dir, dst_dir):
|
820
|
+
"""
|
821
|
+
Copies the directory src_dir into dst_dir, without preserving filesystem permissions
|
822
|
+
"""
|
823
|
+
for dirpath, dirnames, filenames in os.walk(src_dir):
|
824
|
+
for dirname in dirnames:
|
825
|
+
relative_dir_path = os.path.relpath(os.path.join(dirpath, dirname), src_dir)
|
826
|
+
# For each directory <dirname> immediately under <dirpath>, create an equivalently-named
|
827
|
+
# directory under the destination directory
|
828
|
+
abs_dir_path = os.path.join(dst_dir, relative_dir_path)
|
829
|
+
os.mkdir(abs_dir_path)
|
830
|
+
for filename in filenames:
|
831
|
+
# For each file with name <filename> immediately under <dirpath>, copy that file to
|
832
|
+
# the appropriate location in the destination directory
|
833
|
+
file_path = os.path.join(dirpath, filename)
|
834
|
+
relative_file_path = os.path.relpath(file_path, src_dir)
|
835
|
+
abs_file_path = os.path.join(dst_dir, relative_file_path)
|
836
|
+
shutil.copy2(file_path, abs_file_path)
|
837
|
+
|
838
|
+
|
839
|
+
def contains_path_separator(path):
|
840
|
+
"""
|
841
|
+
Returns True if a path contains a path separator, False otherwise.
|
842
|
+
"""
|
843
|
+
return any((sep in path) for sep in (os.path.sep, os.path.altsep) if sep is not None)
|
844
|
+
|
845
|
+
|
846
|
+
def contains_percent(path):
|
847
|
+
"""
|
848
|
+
Returns True if a path contains a percent character, False otherwise.
|
849
|
+
"""
|
850
|
+
return "%" in path
|
851
|
+
|
852
|
+
|
853
|
+
def read_chunk(path: os.PathLike, size: int, start_byte: int = 0) -> bytes:
|
854
|
+
"""Read a chunk of bytes from a file.
|
855
|
+
|
856
|
+
Args:
|
857
|
+
path: Path to the file.
|
858
|
+
size: The size of the chunk.
|
859
|
+
start_byte: The start byte of the chunk.
|
860
|
+
|
861
|
+
Returns:
|
862
|
+
The chunk of bytes.
|
863
|
+
|
864
|
+
"""
|
865
|
+
with open(path, "rb") as f:
|
866
|
+
if start_byte > 0:
|
867
|
+
f.seek(start_byte)
|
868
|
+
return f.read(size)
|
869
|
+
|
870
|
+
|
871
|
+
@contextmanager
|
872
|
+
def remove_on_error(path: os.PathLike, onerror=None):
|
873
|
+
"""A context manager that removes a file or directory if an exception is raised during
|
874
|
+
execution.
|
875
|
+
|
876
|
+
Args:
|
877
|
+
path: Path to the file or directory.
|
878
|
+
onerror: A callback function that will be called with the captured exception before
|
879
|
+
the file or directory is removed. For example, you can use this callback to
|
880
|
+
log the exception.
|
881
|
+
|
882
|
+
"""
|
883
|
+
try:
|
884
|
+
yield
|
885
|
+
except Exception as e:
|
886
|
+
if onerror:
|
887
|
+
onerror(e)
|
888
|
+
if os.path.exists(path):
|
889
|
+
if os.path.isfile(path):
|
890
|
+
os.remove(path)
|
891
|
+
elif os.path.isdir(path):
|
892
|
+
shutil.rmtree(path)
|
893
|
+
_logger.warning(
|
894
|
+
f"Failed to remove {path}" if os.path.exists(path) else f"Successfully removed {path}"
|
895
|
+
)
|
896
|
+
raise
|
897
|
+
|
898
|
+
|
899
|
+
@contextmanager
|
900
|
+
def chdir(path: str) -> None:
|
901
|
+
"""Temporarily change the current working directory to the specified path.
|
902
|
+
|
903
|
+
Args:
|
904
|
+
path: The path to use as the temporary working directory.
|
905
|
+
"""
|
906
|
+
cwd = os.getcwd()
|
907
|
+
try:
|
908
|
+
os.chdir(path)
|
909
|
+
yield
|
910
|
+
finally:
|
911
|
+
os.chdir(cwd)
|
912
|
+
|
913
|
+
|
914
|
+
def get_total_file_size(path: Union[str, pathlib.Path]) -> Optional[int]:
|
915
|
+
"""Return the size of all files under given path, including files in subdirectories.
|
916
|
+
|
917
|
+
Args:
|
918
|
+
path: The absolute path of a local directory.
|
919
|
+
|
920
|
+
Returns:
|
921
|
+
size in bytes.
|
922
|
+
|
923
|
+
"""
|
924
|
+
try:
|
925
|
+
if isinstance(path, pathlib.Path):
|
926
|
+
path = str(path)
|
927
|
+
if not os.path.exists(path):
|
928
|
+
raise MlflowException(
|
929
|
+
message=f"The given {path} does not exist.", error_code=INVALID_PARAMETER_VALUE
|
930
|
+
)
|
931
|
+
if not os.path.isdir(path):
|
932
|
+
raise MlflowException(
|
933
|
+
message=f"The given {path} is not a directory.", error_code=INVALID_PARAMETER_VALUE
|
934
|
+
)
|
935
|
+
|
936
|
+
total_size = 0
|
937
|
+
for cur_path, dirs, files in os.walk(path):
|
938
|
+
full_paths = [os.path.join(cur_path, file) for file in files]
|
939
|
+
total_size += sum([os.path.getsize(file) for file in full_paths])
|
940
|
+
return total_size
|
941
|
+
except Exception as e:
|
942
|
+
_logger.info(f"Failed to get the total size of {path} because of error :{e}")
|
943
|
+
return None
|
944
|
+
|
945
|
+
|
946
|
+
def write_yaml(
|
947
|
+
root: str,
|
948
|
+
file_name: str,
|
949
|
+
data: dict[str, Any],
|
950
|
+
overwrite: bool = False,
|
951
|
+
sort_keys: bool = True,
|
952
|
+
ensure_yaml_extension: bool = True,
|
953
|
+
) -> None:
|
954
|
+
"""
|
955
|
+
NEVER TOUCH THIS FUNCTION. KEPT FOR BACKWARD COMPATIBILITY with
|
956
|
+
databricks-feature-engineering<=0.10.2
|
957
|
+
"""
|
958
|
+
import yaml
|
959
|
+
|
960
|
+
with open(os.path.join(root, file_name), "w") as f:
|
961
|
+
yaml.safe_dump(
|
962
|
+
data,
|
963
|
+
f,
|
964
|
+
default_flow_style=False,
|
965
|
+
allow_unicode=True,
|
966
|
+
sort_keys=sort_keys,
|
967
|
+
)
|
968
|
+
|
969
|
+
|
970
|
+
def read_yaml(root: str, file_name: str) -> dict[str, Any]:
|
971
|
+
"""
|
972
|
+
NEVER TOUCH THIS FUNCTION. KEPT FOR BACKWARD COMPATIBILITY with
|
973
|
+
databricks-feature-engineering<=0.10.2
|
974
|
+
"""
|
975
|
+
import yaml
|
976
|
+
|
977
|
+
with open(os.path.join(root, file_name)) as f:
|
978
|
+
return yaml.safe_load(f)
|