genesis-flow 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genesis_flow-1.0.0.dist-info/METADATA +822 -0
- genesis_flow-1.0.0.dist-info/RECORD +645 -0
- genesis_flow-1.0.0.dist-info/WHEEL +5 -0
- genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
- genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
- genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
- mlflow/__init__.py +367 -0
- mlflow/__main__.py +3 -0
- mlflow/ag2/__init__.py +56 -0
- mlflow/ag2/ag2_logger.py +294 -0
- mlflow/anthropic/__init__.py +40 -0
- mlflow/anthropic/autolog.py +129 -0
- mlflow/anthropic/chat.py +144 -0
- mlflow/artifacts/__init__.py +268 -0
- mlflow/autogen/__init__.py +144 -0
- mlflow/autogen/chat.py +142 -0
- mlflow/azure/__init__.py +26 -0
- mlflow/azure/auth_handler.py +257 -0
- mlflow/azure/client.py +319 -0
- mlflow/azure/config.py +120 -0
- mlflow/azure/connection_factory.py +340 -0
- mlflow/azure/exceptions.py +27 -0
- mlflow/azure/stores.py +327 -0
- mlflow/azure/utils.py +183 -0
- mlflow/bedrock/__init__.py +45 -0
- mlflow/bedrock/_autolog.py +202 -0
- mlflow/bedrock/chat.py +122 -0
- mlflow/bedrock/stream.py +160 -0
- mlflow/bedrock/utils.py +43 -0
- mlflow/cli.py +707 -0
- mlflow/client.py +12 -0
- mlflow/config/__init__.py +56 -0
- mlflow/crewai/__init__.py +79 -0
- mlflow/crewai/autolog.py +253 -0
- mlflow/crewai/chat.py +29 -0
- mlflow/data/__init__.py +75 -0
- mlflow/data/artifact_dataset_sources.py +170 -0
- mlflow/data/code_dataset_source.py +40 -0
- mlflow/data/dataset.py +123 -0
- mlflow/data/dataset_registry.py +168 -0
- mlflow/data/dataset_source.py +110 -0
- mlflow/data/dataset_source_registry.py +219 -0
- mlflow/data/delta_dataset_source.py +167 -0
- mlflow/data/digest_utils.py +108 -0
- mlflow/data/evaluation_dataset.py +562 -0
- mlflow/data/filesystem_dataset_source.py +81 -0
- mlflow/data/http_dataset_source.py +145 -0
- mlflow/data/huggingface_dataset.py +258 -0
- mlflow/data/huggingface_dataset_source.py +118 -0
- mlflow/data/meta_dataset.py +104 -0
- mlflow/data/numpy_dataset.py +223 -0
- mlflow/data/pandas_dataset.py +231 -0
- mlflow/data/polars_dataset.py +352 -0
- mlflow/data/pyfunc_dataset_mixin.py +31 -0
- mlflow/data/schema.py +76 -0
- mlflow/data/sources.py +1 -0
- mlflow/data/spark_dataset.py +406 -0
- mlflow/data/spark_dataset_source.py +74 -0
- mlflow/data/spark_delta_utils.py +118 -0
- mlflow/data/tensorflow_dataset.py +350 -0
- mlflow/data/uc_volume_dataset_source.py +81 -0
- mlflow/db.py +27 -0
- mlflow/dspy/__init__.py +17 -0
- mlflow/dspy/autolog.py +197 -0
- mlflow/dspy/callback.py +398 -0
- mlflow/dspy/constant.py +1 -0
- mlflow/dspy/load.py +93 -0
- mlflow/dspy/save.py +393 -0
- mlflow/dspy/util.py +109 -0
- mlflow/dspy/wrapper.py +226 -0
- mlflow/entities/__init__.py +104 -0
- mlflow/entities/_mlflow_object.py +52 -0
- mlflow/entities/assessment.py +545 -0
- mlflow/entities/assessment_error.py +80 -0
- mlflow/entities/assessment_source.py +141 -0
- mlflow/entities/dataset.py +92 -0
- mlflow/entities/dataset_input.py +51 -0
- mlflow/entities/dataset_summary.py +62 -0
- mlflow/entities/document.py +48 -0
- mlflow/entities/experiment.py +109 -0
- mlflow/entities/experiment_tag.py +35 -0
- mlflow/entities/file_info.py +45 -0
- mlflow/entities/input_tag.py +35 -0
- mlflow/entities/lifecycle_stage.py +35 -0
- mlflow/entities/logged_model.py +228 -0
- mlflow/entities/logged_model_input.py +26 -0
- mlflow/entities/logged_model_output.py +32 -0
- mlflow/entities/logged_model_parameter.py +46 -0
- mlflow/entities/logged_model_status.py +74 -0
- mlflow/entities/logged_model_tag.py +33 -0
- mlflow/entities/metric.py +200 -0
- mlflow/entities/model_registry/__init__.py +29 -0
- mlflow/entities/model_registry/_model_registry_entity.py +13 -0
- mlflow/entities/model_registry/model_version.py +243 -0
- mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
- mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
- mlflow/entities/model_registry/model_version_search.py +25 -0
- mlflow/entities/model_registry/model_version_stages.py +25 -0
- mlflow/entities/model_registry/model_version_status.py +35 -0
- mlflow/entities/model_registry/model_version_tag.py +35 -0
- mlflow/entities/model_registry/prompt.py +73 -0
- mlflow/entities/model_registry/prompt_version.py +244 -0
- mlflow/entities/model_registry/registered_model.py +175 -0
- mlflow/entities/model_registry/registered_model_alias.py +35 -0
- mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
- mlflow/entities/model_registry/registered_model_search.py +25 -0
- mlflow/entities/model_registry/registered_model_tag.py +35 -0
- mlflow/entities/multipart_upload.py +74 -0
- mlflow/entities/param.py +49 -0
- mlflow/entities/run.py +97 -0
- mlflow/entities/run_data.py +84 -0
- mlflow/entities/run_info.py +188 -0
- mlflow/entities/run_inputs.py +59 -0
- mlflow/entities/run_outputs.py +43 -0
- mlflow/entities/run_status.py +41 -0
- mlflow/entities/run_tag.py +36 -0
- mlflow/entities/source_type.py +31 -0
- mlflow/entities/span.py +774 -0
- mlflow/entities/span_event.py +96 -0
- mlflow/entities/span_status.py +102 -0
- mlflow/entities/trace.py +317 -0
- mlflow/entities/trace_data.py +71 -0
- mlflow/entities/trace_info.py +220 -0
- mlflow/entities/trace_info_v2.py +162 -0
- mlflow/entities/trace_location.py +173 -0
- mlflow/entities/trace_state.py +39 -0
- mlflow/entities/trace_status.py +68 -0
- mlflow/entities/view_type.py +51 -0
- mlflow/environment_variables.py +866 -0
- mlflow/evaluation/__init__.py +16 -0
- mlflow/evaluation/assessment.py +369 -0
- mlflow/evaluation/evaluation.py +411 -0
- mlflow/evaluation/evaluation_tag.py +61 -0
- mlflow/evaluation/fluent.py +48 -0
- mlflow/evaluation/utils.py +201 -0
- mlflow/exceptions.py +213 -0
- mlflow/experiments.py +140 -0
- mlflow/gemini/__init__.py +81 -0
- mlflow/gemini/autolog.py +186 -0
- mlflow/gemini/chat.py +261 -0
- mlflow/genai/__init__.py +71 -0
- mlflow/genai/datasets/__init__.py +67 -0
- mlflow/genai/datasets/evaluation_dataset.py +131 -0
- mlflow/genai/evaluation/__init__.py +3 -0
- mlflow/genai/evaluation/base.py +411 -0
- mlflow/genai/evaluation/constant.py +23 -0
- mlflow/genai/evaluation/utils.py +244 -0
- mlflow/genai/judges/__init__.py +21 -0
- mlflow/genai/judges/databricks.py +404 -0
- mlflow/genai/label_schemas/__init__.py +153 -0
- mlflow/genai/label_schemas/label_schemas.py +209 -0
- mlflow/genai/labeling/__init__.py +159 -0
- mlflow/genai/labeling/labeling.py +250 -0
- mlflow/genai/optimize/__init__.py +13 -0
- mlflow/genai/optimize/base.py +198 -0
- mlflow/genai/optimize/optimizers/__init__.py +4 -0
- mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
- mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
- mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
- mlflow/genai/optimize/types.py +75 -0
- mlflow/genai/optimize/util.py +30 -0
- mlflow/genai/prompts/__init__.py +206 -0
- mlflow/genai/scheduled_scorers.py +431 -0
- mlflow/genai/scorers/__init__.py +26 -0
- mlflow/genai/scorers/base.py +492 -0
- mlflow/genai/scorers/builtin_scorers.py +765 -0
- mlflow/genai/scorers/scorer_utils.py +138 -0
- mlflow/genai/scorers/validation.py +165 -0
- mlflow/genai/utils/data_validation.py +146 -0
- mlflow/genai/utils/enum_utils.py +23 -0
- mlflow/genai/utils/trace_utils.py +211 -0
- mlflow/groq/__init__.py +42 -0
- mlflow/groq/_groq_autolog.py +74 -0
- mlflow/johnsnowlabs/__init__.py +888 -0
- mlflow/langchain/__init__.py +24 -0
- mlflow/langchain/api_request_parallel_processor.py +330 -0
- mlflow/langchain/autolog.py +147 -0
- mlflow/langchain/chat_agent_langgraph.py +340 -0
- mlflow/langchain/constant.py +1 -0
- mlflow/langchain/constants.py +1 -0
- mlflow/langchain/databricks_dependencies.py +444 -0
- mlflow/langchain/langchain_tracer.py +597 -0
- mlflow/langchain/model.py +919 -0
- mlflow/langchain/output_parsers.py +142 -0
- mlflow/langchain/retriever_chain.py +153 -0
- mlflow/langchain/runnables.py +527 -0
- mlflow/langchain/utils/chat.py +402 -0
- mlflow/langchain/utils/logging.py +671 -0
- mlflow/langchain/utils/serialization.py +36 -0
- mlflow/legacy_databricks_cli/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/provider.py +482 -0
- mlflow/litellm/__init__.py +175 -0
- mlflow/llama_index/__init__.py +22 -0
- mlflow/llama_index/autolog.py +55 -0
- mlflow/llama_index/chat.py +43 -0
- mlflow/llama_index/constant.py +1 -0
- mlflow/llama_index/model.py +577 -0
- mlflow/llama_index/pyfunc_wrapper.py +332 -0
- mlflow/llama_index/serialize_objects.py +188 -0
- mlflow/llama_index/tracer.py +561 -0
- mlflow/metrics/__init__.py +479 -0
- mlflow/metrics/base.py +39 -0
- mlflow/metrics/genai/__init__.py +25 -0
- mlflow/metrics/genai/base.py +101 -0
- mlflow/metrics/genai/genai_metric.py +771 -0
- mlflow/metrics/genai/metric_definitions.py +450 -0
- mlflow/metrics/genai/model_utils.py +371 -0
- mlflow/metrics/genai/prompt_template.py +68 -0
- mlflow/metrics/genai/prompts/__init__.py +0 -0
- mlflow/metrics/genai/prompts/v1.py +422 -0
- mlflow/metrics/genai/utils.py +6 -0
- mlflow/metrics/metric_definitions.py +619 -0
- mlflow/mismatch.py +34 -0
- mlflow/mistral/__init__.py +34 -0
- mlflow/mistral/autolog.py +71 -0
- mlflow/mistral/chat.py +135 -0
- mlflow/ml_package_versions.py +452 -0
- mlflow/models/__init__.py +97 -0
- mlflow/models/auth_policy.py +83 -0
- mlflow/models/cli.py +354 -0
- mlflow/models/container/__init__.py +294 -0
- mlflow/models/container/scoring_server/__init__.py +0 -0
- mlflow/models/container/scoring_server/nginx.conf +39 -0
- mlflow/models/dependencies_schemas.py +287 -0
- mlflow/models/display_utils.py +158 -0
- mlflow/models/docker_utils.py +211 -0
- mlflow/models/evaluation/__init__.py +23 -0
- mlflow/models/evaluation/_shap_patch.py +64 -0
- mlflow/models/evaluation/artifacts.py +194 -0
- mlflow/models/evaluation/base.py +1811 -0
- mlflow/models/evaluation/calibration_curve.py +109 -0
- mlflow/models/evaluation/default_evaluator.py +996 -0
- mlflow/models/evaluation/deprecated.py +23 -0
- mlflow/models/evaluation/evaluator_registry.py +80 -0
- mlflow/models/evaluation/evaluators/classifier.py +704 -0
- mlflow/models/evaluation/evaluators/default.py +233 -0
- mlflow/models/evaluation/evaluators/regressor.py +96 -0
- mlflow/models/evaluation/evaluators/shap.py +296 -0
- mlflow/models/evaluation/lift_curve.py +178 -0
- mlflow/models/evaluation/utils/metric.py +123 -0
- mlflow/models/evaluation/utils/trace.py +179 -0
- mlflow/models/evaluation/validation.py +434 -0
- mlflow/models/flavor_backend.py +93 -0
- mlflow/models/flavor_backend_registry.py +53 -0
- mlflow/models/model.py +1639 -0
- mlflow/models/model_config.py +150 -0
- mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
- mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
- mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
- mlflow/models/python_api.py +369 -0
- mlflow/models/rag_signatures.py +128 -0
- mlflow/models/resources.py +321 -0
- mlflow/models/signature.py +662 -0
- mlflow/models/utils.py +2054 -0
- mlflow/models/wheeled_model.py +280 -0
- mlflow/openai/__init__.py +57 -0
- mlflow/openai/_agent_tracer.py +364 -0
- mlflow/openai/api_request_parallel_processor.py +131 -0
- mlflow/openai/autolog.py +509 -0
- mlflow/openai/constant.py +1 -0
- mlflow/openai/model.py +824 -0
- mlflow/openai/utils/chat_schema.py +367 -0
- mlflow/optuna/__init__.py +3 -0
- mlflow/optuna/storage.py +646 -0
- mlflow/plugins/__init__.py +72 -0
- mlflow/plugins/base.py +358 -0
- mlflow/plugins/builtin/__init__.py +24 -0
- mlflow/plugins/builtin/pytorch_plugin.py +150 -0
- mlflow/plugins/builtin/sklearn_plugin.py +158 -0
- mlflow/plugins/builtin/transformers_plugin.py +187 -0
- mlflow/plugins/cli.py +321 -0
- mlflow/plugins/discovery.py +340 -0
- mlflow/plugins/manager.py +465 -0
- mlflow/plugins/registry.py +316 -0
- mlflow/plugins/templates/framework_plugin_template.py +329 -0
- mlflow/prompt/constants.py +20 -0
- mlflow/prompt/promptlab_model.py +197 -0
- mlflow/prompt/registry_utils.py +248 -0
- mlflow/promptflow/__init__.py +495 -0
- mlflow/protos/__init__.py +0 -0
- mlflow/protos/assessments_pb2.py +174 -0
- mlflow/protos/databricks_artifacts_pb2.py +489 -0
- mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
- mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
- mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
- mlflow/protos/databricks_pb2.py +267 -0
- mlflow/protos/databricks_trace_server_pb2.py +374 -0
- mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
- mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
- mlflow/protos/facet_feature_statistics_pb2.py +296 -0
- mlflow/protos/internal_pb2.py +77 -0
- mlflow/protos/mlflow_artifacts_pb2.py +336 -0
- mlflow/protos/model_registry_pb2.py +1073 -0
- mlflow/protos/scalapb/__init__.py +0 -0
- mlflow/protos/scalapb/scalapb_pb2.py +104 -0
- mlflow/protos/service_pb2.py +2600 -0
- mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
- mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
- mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
- mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
- mlflow/py.typed +0 -0
- mlflow/pydantic_ai/__init__.py +57 -0
- mlflow/pydantic_ai/autolog.py +173 -0
- mlflow/pyfunc/__init__.py +3844 -0
- mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
- mlflow/pyfunc/backend.py +523 -0
- mlflow/pyfunc/context.py +78 -0
- mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
- mlflow/pyfunc/loaders/__init__.py +7 -0
- mlflow/pyfunc/loaders/chat_agent.py +117 -0
- mlflow/pyfunc/loaders/chat_model.py +125 -0
- mlflow/pyfunc/loaders/code_model.py +31 -0
- mlflow/pyfunc/loaders/responses_agent.py +112 -0
- mlflow/pyfunc/mlserver.py +46 -0
- mlflow/pyfunc/model.py +1473 -0
- mlflow/pyfunc/scoring_server/__init__.py +604 -0
- mlflow/pyfunc/scoring_server/app.py +7 -0
- mlflow/pyfunc/scoring_server/client.py +146 -0
- mlflow/pyfunc/spark_model_cache.py +48 -0
- mlflow/pyfunc/stdin_server.py +44 -0
- mlflow/pyfunc/utils/__init__.py +3 -0
- mlflow/pyfunc/utils/data_validation.py +224 -0
- mlflow/pyfunc/utils/environment.py +22 -0
- mlflow/pyfunc/utils/input_converter.py +47 -0
- mlflow/pyfunc/utils/serving_data_parser.py +11 -0
- mlflow/pytorch/__init__.py +1171 -0
- mlflow/pytorch/_lightning_autolog.py +580 -0
- mlflow/pytorch/_pytorch_autolog.py +50 -0
- mlflow/pytorch/pickle_module.py +35 -0
- mlflow/rfunc/__init__.py +42 -0
- mlflow/rfunc/backend.py +134 -0
- mlflow/runs.py +89 -0
- mlflow/server/__init__.py +302 -0
- mlflow/server/auth/__init__.py +1224 -0
- mlflow/server/auth/__main__.py +4 -0
- mlflow/server/auth/basic_auth.ini +6 -0
- mlflow/server/auth/cli.py +11 -0
- mlflow/server/auth/client.py +537 -0
- mlflow/server/auth/config.py +34 -0
- mlflow/server/auth/db/__init__.py +0 -0
- mlflow/server/auth/db/cli.py +18 -0
- mlflow/server/auth/db/migrations/__init__.py +0 -0
- mlflow/server/auth/db/migrations/alembic.ini +110 -0
- mlflow/server/auth/db/migrations/env.py +76 -0
- mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
- mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
- mlflow/server/auth/db/models.py +67 -0
- mlflow/server/auth/db/utils.py +37 -0
- mlflow/server/auth/entities.py +165 -0
- mlflow/server/auth/logo.py +14 -0
- mlflow/server/auth/permissions.py +65 -0
- mlflow/server/auth/routes.py +18 -0
- mlflow/server/auth/sqlalchemy_store.py +263 -0
- mlflow/server/graphql/__init__.py +0 -0
- mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
- mlflow/server/graphql/graphql_custom_scalars.py +24 -0
- mlflow/server/graphql/graphql_errors.py +15 -0
- mlflow/server/graphql/graphql_no_batching.py +89 -0
- mlflow/server/graphql/graphql_schema_extensions.py +74 -0
- mlflow/server/handlers.py +3217 -0
- mlflow/server/prometheus_exporter.py +17 -0
- mlflow/server/validation.py +30 -0
- mlflow/shap/__init__.py +691 -0
- mlflow/sklearn/__init__.py +1994 -0
- mlflow/sklearn/utils.py +1041 -0
- mlflow/smolagents/__init__.py +66 -0
- mlflow/smolagents/autolog.py +139 -0
- mlflow/smolagents/chat.py +29 -0
- mlflow/store/__init__.py +10 -0
- mlflow/store/_unity_catalog/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/constants.py +2 -0
- mlflow/store/_unity_catalog/registry/__init__.py +6 -0
- mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
- mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
- mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
- mlflow/store/_unity_catalog/registry/utils.py +121 -0
- mlflow/store/artifact/__init__.py +0 -0
- mlflow/store/artifact/artifact_repo.py +472 -0
- mlflow/store/artifact/artifact_repository_registry.py +154 -0
- mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
- mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
- mlflow/store/artifact/cli.py +141 -0
- mlflow/store/artifact/cloud_artifact_repo.py +332 -0
- mlflow/store/artifact/databricks_artifact_repo.py +729 -0
- mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
- mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
- mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
- mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
- mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
- mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
- mlflow/store/artifact/ftp_artifact_repo.py +132 -0
- mlflow/store/artifact/gcs_artifact_repo.py +296 -0
- mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
- mlflow/store/artifact/http_artifact_repo.py +218 -0
- mlflow/store/artifact/local_artifact_repo.py +142 -0
- mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
- mlflow/store/artifact/models_artifact_repo.py +259 -0
- mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
- mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
- mlflow/store/artifact/r2_artifact_repo.py +70 -0
- mlflow/store/artifact/runs_artifact_repo.py +265 -0
- mlflow/store/artifact/s3_artifact_repo.py +330 -0
- mlflow/store/artifact/sftp_artifact_repo.py +141 -0
- mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
- mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
- mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
- mlflow/store/artifact/utils/__init__.py +0 -0
- mlflow/store/artifact/utils/models.py +148 -0
- mlflow/store/db/__init__.py +0 -0
- mlflow/store/db/base_sql_model.py +3 -0
- mlflow/store/db/db_types.py +10 -0
- mlflow/store/db/utils.py +314 -0
- mlflow/store/db_migrations/__init__.py +0 -0
- mlflow/store/db_migrations/alembic.ini +74 -0
- mlflow/store/db_migrations/env.py +84 -0
- mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
- mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
- mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
- mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
- mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
- mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
- mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
- mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
- mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
- mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
- mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
- mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
- mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
- mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
- mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
- mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
- mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
- mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
- mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
- mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
- mlflow/store/db_migrations/versions/__init__.py +0 -0
- mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
- mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
- mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
- mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
- mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
- mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
- mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
- mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
- mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
- mlflow/store/entities/__init__.py +3 -0
- mlflow/store/entities/paged_list.py +18 -0
- mlflow/store/model_registry/__init__.py +10 -0
- mlflow/store/model_registry/abstract_store.py +1081 -0
- mlflow/store/model_registry/base_rest_store.py +44 -0
- mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
- mlflow/store/model_registry/dbmodels/__init__.py +0 -0
- mlflow/store/model_registry/dbmodels/models.py +206 -0
- mlflow/store/model_registry/file_store.py +1091 -0
- mlflow/store/model_registry/rest_store.py +481 -0
- mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
- mlflow/store/tracking/__init__.py +23 -0
- mlflow/store/tracking/abstract_store.py +816 -0
- mlflow/store/tracking/dbmodels/__init__.py +0 -0
- mlflow/store/tracking/dbmodels/initial_models.py +243 -0
- mlflow/store/tracking/dbmodels/models.py +1073 -0
- mlflow/store/tracking/file_store.py +2438 -0
- mlflow/store/tracking/postgres_managed_identity.py +146 -0
- mlflow/store/tracking/rest_store.py +1131 -0
- mlflow/store/tracking/sqlalchemy_store.py +2785 -0
- mlflow/system_metrics/__init__.py +61 -0
- mlflow/system_metrics/metrics/__init__.py +0 -0
- mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
- mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
- mlflow/system_metrics/metrics/disk_monitor.py +21 -0
- mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
- mlflow/system_metrics/metrics/network_monitor.py +34 -0
- mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
- mlflow/system_metrics/system_metrics_monitor.py +198 -0
- mlflow/tracing/__init__.py +16 -0
- mlflow/tracing/assessment.py +356 -0
- mlflow/tracing/client.py +531 -0
- mlflow/tracing/config.py +125 -0
- mlflow/tracing/constant.py +105 -0
- mlflow/tracing/destination.py +81 -0
- mlflow/tracing/display/__init__.py +40 -0
- mlflow/tracing/display/display_handler.py +196 -0
- mlflow/tracing/export/async_export_queue.py +186 -0
- mlflow/tracing/export/inference_table.py +138 -0
- mlflow/tracing/export/mlflow_v3.py +137 -0
- mlflow/tracing/export/utils.py +70 -0
- mlflow/tracing/fluent.py +1417 -0
- mlflow/tracing/processor/base_mlflow.py +199 -0
- mlflow/tracing/processor/inference_table.py +175 -0
- mlflow/tracing/processor/mlflow_v3.py +47 -0
- mlflow/tracing/processor/otel.py +73 -0
- mlflow/tracing/provider.py +487 -0
- mlflow/tracing/trace_manager.py +200 -0
- mlflow/tracing/utils/__init__.py +616 -0
- mlflow/tracing/utils/artifact_utils.py +28 -0
- mlflow/tracing/utils/copy.py +55 -0
- mlflow/tracing/utils/environment.py +55 -0
- mlflow/tracing/utils/exception.py +21 -0
- mlflow/tracing/utils/once.py +35 -0
- mlflow/tracing/utils/otlp.py +63 -0
- mlflow/tracing/utils/processor.py +54 -0
- mlflow/tracing/utils/search.py +292 -0
- mlflow/tracing/utils/timeout.py +250 -0
- mlflow/tracing/utils/token.py +19 -0
- mlflow/tracing/utils/truncation.py +124 -0
- mlflow/tracing/utils/warning.py +76 -0
- mlflow/tracking/__init__.py +39 -0
- mlflow/tracking/_model_registry/__init__.py +1 -0
- mlflow/tracking/_model_registry/client.py +764 -0
- mlflow/tracking/_model_registry/fluent.py +853 -0
- mlflow/tracking/_model_registry/registry.py +67 -0
- mlflow/tracking/_model_registry/utils.py +251 -0
- mlflow/tracking/_tracking_service/__init__.py +0 -0
- mlflow/tracking/_tracking_service/client.py +883 -0
- mlflow/tracking/_tracking_service/registry.py +56 -0
- mlflow/tracking/_tracking_service/utils.py +275 -0
- mlflow/tracking/artifact_utils.py +179 -0
- mlflow/tracking/client.py +5900 -0
- mlflow/tracking/context/__init__.py +0 -0
- mlflow/tracking/context/abstract_context.py +35 -0
- mlflow/tracking/context/databricks_cluster_context.py +15 -0
- mlflow/tracking/context/databricks_command_context.py +15 -0
- mlflow/tracking/context/databricks_job_context.py +49 -0
- mlflow/tracking/context/databricks_notebook_context.py +41 -0
- mlflow/tracking/context/databricks_repo_context.py +43 -0
- mlflow/tracking/context/default_context.py +51 -0
- mlflow/tracking/context/git_context.py +32 -0
- mlflow/tracking/context/registry.py +98 -0
- mlflow/tracking/context/system_environment_context.py +15 -0
- mlflow/tracking/default_experiment/__init__.py +1 -0
- mlflow/tracking/default_experiment/abstract_context.py +43 -0
- mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
- mlflow/tracking/default_experiment/registry.py +75 -0
- mlflow/tracking/fluent.py +3595 -0
- mlflow/tracking/metric_value_conversion_utils.py +93 -0
- mlflow/tracking/multimedia.py +206 -0
- mlflow/tracking/registry.py +86 -0
- mlflow/tracking/request_auth/__init__.py +0 -0
- mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
- mlflow/tracking/request_auth/registry.py +60 -0
- mlflow/tracking/request_header/__init__.py +0 -0
- mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
- mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
- mlflow/tracking/request_header/default_request_header_provider.py +17 -0
- mlflow/tracking/request_header/registry.py +79 -0
- mlflow/transformers/__init__.py +2982 -0
- mlflow/transformers/flavor_config.py +258 -0
- mlflow/transformers/hub_utils.py +83 -0
- mlflow/transformers/llm_inference_utils.py +468 -0
- mlflow/transformers/model_io.py +301 -0
- mlflow/transformers/peft.py +51 -0
- mlflow/transformers/signature.py +183 -0
- mlflow/transformers/torch_utils.py +55 -0
- mlflow/types/__init__.py +21 -0
- mlflow/types/agent.py +270 -0
- mlflow/types/chat.py +240 -0
- mlflow/types/llm.py +935 -0
- mlflow/types/responses.py +139 -0
- mlflow/types/responses_helpers.py +416 -0
- mlflow/types/schema.py +1505 -0
- mlflow/types/type_hints.py +647 -0
- mlflow/types/utils.py +753 -0
- mlflow/utils/__init__.py +283 -0
- mlflow/utils/_capture_modules.py +256 -0
- mlflow/utils/_capture_transformers_modules.py +75 -0
- mlflow/utils/_spark_utils.py +201 -0
- mlflow/utils/_unity_catalog_oss_utils.py +97 -0
- mlflow/utils/_unity_catalog_utils.py +479 -0
- mlflow/utils/annotations.py +218 -0
- mlflow/utils/arguments_utils.py +16 -0
- mlflow/utils/async_logging/__init__.py +1 -0
- mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
- mlflow/utils/async_logging/async_logging_queue.py +366 -0
- mlflow/utils/async_logging/run_artifact.py +38 -0
- mlflow/utils/async_logging/run_batch.py +58 -0
- mlflow/utils/async_logging/run_operations.py +49 -0
- mlflow/utils/autologging_utils/__init__.py +737 -0
- mlflow/utils/autologging_utils/client.py +432 -0
- mlflow/utils/autologging_utils/config.py +33 -0
- mlflow/utils/autologging_utils/events.py +294 -0
- mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
- mlflow/utils/autologging_utils/metrics_queue.py +71 -0
- mlflow/utils/autologging_utils/safety.py +1104 -0
- mlflow/utils/autologging_utils/versioning.py +95 -0
- mlflow/utils/checkpoint_utils.py +206 -0
- mlflow/utils/class_utils.py +6 -0
- mlflow/utils/cli_args.py +257 -0
- mlflow/utils/conda.py +354 -0
- mlflow/utils/credentials.py +231 -0
- mlflow/utils/data_utils.py +17 -0
- mlflow/utils/databricks_utils.py +1436 -0
- mlflow/utils/docstring_utils.py +477 -0
- mlflow/utils/doctor.py +133 -0
- mlflow/utils/download_cloud_file_chunk.py +43 -0
- mlflow/utils/env_manager.py +16 -0
- mlflow/utils/env_pack.py +131 -0
- mlflow/utils/environment.py +1009 -0
- mlflow/utils/exception_utils.py +14 -0
- mlflow/utils/file_utils.py +978 -0
- mlflow/utils/git_utils.py +77 -0
- mlflow/utils/gorilla.py +797 -0
- mlflow/utils/import_hooks/__init__.py +363 -0
- mlflow/utils/lazy_load.py +51 -0
- mlflow/utils/logging_utils.py +168 -0
- mlflow/utils/mime_type_utils.py +58 -0
- mlflow/utils/mlflow_tags.py +103 -0
- mlflow/utils/model_utils.py +486 -0
- mlflow/utils/name_utils.py +346 -0
- mlflow/utils/nfs_on_spark.py +62 -0
- mlflow/utils/openai_utils.py +164 -0
- mlflow/utils/os.py +12 -0
- mlflow/utils/oss_registry_utils.py +29 -0
- mlflow/utils/plugins.py +17 -0
- mlflow/utils/process.py +182 -0
- mlflow/utils/promptlab_utils.py +146 -0
- mlflow/utils/proto_json_utils.py +743 -0
- mlflow/utils/pydantic_utils.py +54 -0
- mlflow/utils/request_utils.py +279 -0
- mlflow/utils/requirements_utils.py +704 -0
- mlflow/utils/rest_utils.py +673 -0
- mlflow/utils/search_logged_model_utils.py +127 -0
- mlflow/utils/search_utils.py +2111 -0
- mlflow/utils/secure_loading.py +221 -0
- mlflow/utils/security_validation.py +384 -0
- mlflow/utils/server_cli_utils.py +61 -0
- mlflow/utils/spark_utils.py +15 -0
- mlflow/utils/string_utils.py +138 -0
- mlflow/utils/thread_utils.py +63 -0
- mlflow/utils/time.py +54 -0
- mlflow/utils/timeout.py +42 -0
- mlflow/utils/uri.py +572 -0
- mlflow/utils/validation.py +662 -0
- mlflow/utils/virtualenv.py +458 -0
- mlflow/utils/warnings_utils.py +25 -0
- mlflow/utils/yaml_utils.py +179 -0
- mlflow/version.py +24 -0
@@ -0,0 +1,2438 @@
|
|
1
|
+
import hashlib
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
import shutil
|
6
|
+
import sys
|
7
|
+
import time
|
8
|
+
import uuid
|
9
|
+
from collections import defaultdict
|
10
|
+
from dataclasses import dataclass
|
11
|
+
from typing import Any, NamedTuple, Optional, TypedDict
|
12
|
+
|
13
|
+
from mlflow.entities import (
|
14
|
+
Dataset,
|
15
|
+
DatasetInput,
|
16
|
+
Experiment,
|
17
|
+
ExperimentTag,
|
18
|
+
InputTag,
|
19
|
+
LoggedModel,
|
20
|
+
LoggedModelInput,
|
21
|
+
LoggedModelOutput,
|
22
|
+
LoggedModelParameter,
|
23
|
+
LoggedModelStatus,
|
24
|
+
LoggedModelTag,
|
25
|
+
Metric,
|
26
|
+
Param,
|
27
|
+
Run,
|
28
|
+
RunData,
|
29
|
+
RunInfo,
|
30
|
+
RunInputs,
|
31
|
+
RunOutputs,
|
32
|
+
RunStatus,
|
33
|
+
RunTag,
|
34
|
+
SourceType,
|
35
|
+
TraceInfo,
|
36
|
+
ViewType,
|
37
|
+
_DatasetSummary,
|
38
|
+
)
|
39
|
+
from mlflow.entities.lifecycle_stage import LifecycleStage
|
40
|
+
from mlflow.entities.run_info import check_run_is_active
|
41
|
+
from mlflow.entities.trace_info_v2 import TraceInfoV2
|
42
|
+
from mlflow.entities.trace_status import TraceStatus
|
43
|
+
from mlflow.environment_variables import MLFLOW_TRACKING_DIR
|
44
|
+
from mlflow.exceptions import MissingConfigException, MlflowException
|
45
|
+
from mlflow.protos import databricks_pb2
|
46
|
+
from mlflow.protos.databricks_pb2 import (
|
47
|
+
INTERNAL_ERROR,
|
48
|
+
INVALID_PARAMETER_VALUE,
|
49
|
+
RESOURCE_DOES_NOT_EXIST,
|
50
|
+
)
|
51
|
+
from mlflow.protos.internal_pb2 import InputVertexType, OutputVertexType
|
52
|
+
from mlflow.store.entities.paged_list import PagedList
|
53
|
+
from mlflow.store.model_registry.file_store import FileStore as ModelRegistryFileStore
|
54
|
+
from mlflow.store.tracking import (
|
55
|
+
DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH,
|
56
|
+
SEARCH_LOGGED_MODEL_MAX_RESULTS_DEFAULT,
|
57
|
+
SEARCH_MAX_RESULTS_DEFAULT,
|
58
|
+
SEARCH_MAX_RESULTS_THRESHOLD,
|
59
|
+
SEARCH_TRACES_DEFAULT_MAX_RESULTS,
|
60
|
+
)
|
61
|
+
from mlflow.store.tracking.abstract_store import AbstractStore
|
62
|
+
from mlflow.tracing.utils import generate_request_id_v2
|
63
|
+
from mlflow.utils import get_results_from_paginated_fn
|
64
|
+
from mlflow.utils.file_utils import (
|
65
|
+
append_to,
|
66
|
+
exists,
|
67
|
+
find,
|
68
|
+
get_parent_dir,
|
69
|
+
is_directory,
|
70
|
+
list_all,
|
71
|
+
list_subdirs,
|
72
|
+
local_file_uri_to_path,
|
73
|
+
make_containing_dirs,
|
74
|
+
mkdir,
|
75
|
+
mv,
|
76
|
+
path_to_local_file_uri,
|
77
|
+
read_file,
|
78
|
+
read_file_lines,
|
79
|
+
write_to,
|
80
|
+
)
|
81
|
+
from mlflow.utils.mlflow_tags import (
|
82
|
+
MLFLOW_ARTIFACT_LOCATION,
|
83
|
+
MLFLOW_DATASET_CONTEXT,
|
84
|
+
MLFLOW_LOGGED_MODELS,
|
85
|
+
MLFLOW_RUN_NAME,
|
86
|
+
_get_run_name_from_tags,
|
87
|
+
)
|
88
|
+
from mlflow.utils.name_utils import _generate_random_name, _generate_unique_integer_id
|
89
|
+
from mlflow.utils.search_utils import (
|
90
|
+
SearchExperimentsUtils,
|
91
|
+
SearchLoggedModelsUtils,
|
92
|
+
SearchTraceUtils,
|
93
|
+
SearchUtils,
|
94
|
+
)
|
95
|
+
from mlflow.utils.string_utils import is_string_type
|
96
|
+
from mlflow.utils.time import get_current_time_millis
|
97
|
+
from mlflow.utils.uri import (
|
98
|
+
append_to_uri_path,
|
99
|
+
resolve_uri_if_local,
|
100
|
+
)
|
101
|
+
from mlflow.utils.validation import (
|
102
|
+
_validate_batch_log_data,
|
103
|
+
_validate_batch_log_limits,
|
104
|
+
_validate_experiment_artifact_location_length,
|
105
|
+
_validate_experiment_id,
|
106
|
+
_validate_experiment_name,
|
107
|
+
_validate_logged_model_name,
|
108
|
+
_validate_metric,
|
109
|
+
_validate_metric_name,
|
110
|
+
_validate_param,
|
111
|
+
_validate_param_keys_unique,
|
112
|
+
_validate_param_name,
|
113
|
+
_validate_run_id,
|
114
|
+
_validate_tag_name,
|
115
|
+
)
|
116
|
+
from mlflow.utils.yaml_utils import overwrite_yaml, read_yaml, write_yaml
|
117
|
+
|
118
|
+
_logger = logging.getLogger(__name__)
|
119
|
+
|
120
|
+
|
121
|
+
def _default_root_dir():
|
122
|
+
return MLFLOW_TRACKING_DIR.get() or os.path.abspath(DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH)
|
123
|
+
|
124
|
+
|
125
|
+
def _read_persisted_experiment_dict(experiment_dict):
|
126
|
+
dict_copy = experiment_dict.copy()
|
127
|
+
|
128
|
+
# 'experiment_id' was changed from int to string, so we must cast to string
|
129
|
+
# when reading legacy experiments
|
130
|
+
if isinstance(dict_copy["experiment_id"], int):
|
131
|
+
dict_copy["experiment_id"] = str(dict_copy["experiment_id"])
|
132
|
+
return Experiment.from_dictionary(dict_copy)
|
133
|
+
|
134
|
+
|
135
|
+
def _make_persisted_run_info_dict(run_info):
|
136
|
+
# 'tags' was moved from RunInfo to RunData, so we must keep storing it in the meta.yaml for
|
137
|
+
# old mlflow versions to read
|
138
|
+
run_info_dict = dict(run_info)
|
139
|
+
run_info_dict["tags"] = []
|
140
|
+
if "status" in run_info_dict:
|
141
|
+
# 'status' is stored as an integer enum in meta file, but RunInfo.status field is a string.
|
142
|
+
# Convert from string to enum/int before storing.
|
143
|
+
run_info_dict["status"] = RunStatus.from_string(run_info.status)
|
144
|
+
else:
|
145
|
+
run_info_dict["status"] = RunStatus.RUNNING
|
146
|
+
run_info_dict["source_type"] = SourceType.LOCAL
|
147
|
+
run_info_dict["source_name"] = ""
|
148
|
+
run_info_dict["entry_point_name"] = ""
|
149
|
+
run_info_dict["source_version"] = ""
|
150
|
+
return run_info_dict
|
151
|
+
|
152
|
+
|
153
|
+
def _read_persisted_run_info_dict(run_info_dict):
|
154
|
+
dict_copy = run_info_dict.copy()
|
155
|
+
if "lifecycle_stage" not in dict_copy:
|
156
|
+
dict_copy["lifecycle_stage"] = LifecycleStage.ACTIVE
|
157
|
+
# 'status' is stored as an integer enum in meta file, but RunInfo.status field is a string.
|
158
|
+
# converting to string before hydrating RunInfo.
|
159
|
+
# If 'status' value not recorded in files, mark it as 'RUNNING' (default)
|
160
|
+
dict_copy["status"] = RunStatus.to_string(run_info_dict.get("status", RunStatus.RUNNING))
|
161
|
+
|
162
|
+
# 'experiment_id' was changed from int to string, so we must cast to string
|
163
|
+
# when reading legacy run_infos
|
164
|
+
if isinstance(dict_copy["experiment_id"], int):
|
165
|
+
dict_copy["experiment_id"] = str(dict_copy["experiment_id"])
|
166
|
+
return RunInfo.from_dictionary(dict_copy)
|
167
|
+
|
168
|
+
|
169
|
+
class DatasetFilter(TypedDict, total=False):
|
170
|
+
"""
|
171
|
+
Dataset filter used for search_logged_models.
|
172
|
+
"""
|
173
|
+
|
174
|
+
dataset_name: str
|
175
|
+
dataset_digest: str
|
176
|
+
|
177
|
+
|
178
|
+
class FileStore(AbstractStore):
|
179
|
+
TRASH_FOLDER_NAME = ".trash"
|
180
|
+
ARTIFACTS_FOLDER_NAME = "artifacts"
|
181
|
+
METRICS_FOLDER_NAME = "metrics"
|
182
|
+
PARAMS_FOLDER_NAME = "params"
|
183
|
+
TAGS_FOLDER_NAME = "tags"
|
184
|
+
EXPERIMENT_TAGS_FOLDER_NAME = "tags"
|
185
|
+
DATASETS_FOLDER_NAME = "datasets"
|
186
|
+
INPUTS_FOLDER_NAME = "inputs"
|
187
|
+
OUTPUTS_FOLDER_NAME = "outputs"
|
188
|
+
META_DATA_FILE_NAME = "meta.yaml"
|
189
|
+
DEFAULT_EXPERIMENT_ID = "0"
|
190
|
+
TRACE_INFO_FILE_NAME = "trace_info.yaml"
|
191
|
+
TRACES_FOLDER_NAME = "traces"
|
192
|
+
TRACE_TAGS_FOLDER_NAME = "tags"
|
193
|
+
# "request_metadata" field is renamed to "trace_metadata" in V3,
|
194
|
+
# but we keep the old name for backward compatibility
|
195
|
+
TRACE_TRACE_METADATA_FOLDER_NAME = "request_metadata"
|
196
|
+
MODELS_FOLDER_NAME = "models"
|
197
|
+
RESERVED_EXPERIMENT_FOLDERS = [
|
198
|
+
EXPERIMENT_TAGS_FOLDER_NAME,
|
199
|
+
DATASETS_FOLDER_NAME,
|
200
|
+
TRACES_FOLDER_NAME,
|
201
|
+
MODELS_FOLDER_NAME,
|
202
|
+
]
|
203
|
+
|
204
|
+
def __init__(self, root_directory=None, artifact_root_uri=None):
|
205
|
+
"""
|
206
|
+
Create a new FileStore with the given root directory and a given default artifact root URI.
|
207
|
+
"""
|
208
|
+
super().__init__()
|
209
|
+
self.root_directory = local_file_uri_to_path(root_directory or _default_root_dir())
|
210
|
+
if not artifact_root_uri:
|
211
|
+
self.artifact_root_uri = path_to_local_file_uri(self.root_directory)
|
212
|
+
else:
|
213
|
+
self.artifact_root_uri = resolve_uri_if_local(artifact_root_uri)
|
214
|
+
self.trash_folder = os.path.join(self.root_directory, FileStore.TRASH_FOLDER_NAME)
|
215
|
+
# Create root directory if needed
|
216
|
+
if not exists(self.root_directory):
|
217
|
+
self._create_default_experiment()
|
218
|
+
# Create trash folder if needed
|
219
|
+
if not exists(self.trash_folder):
|
220
|
+
mkdir(self.trash_folder)
|
221
|
+
|
222
|
+
def _create_default_experiment(self):
|
223
|
+
mkdir(self.root_directory)
|
224
|
+
self._create_experiment_with_id(
|
225
|
+
name=Experiment.DEFAULT_EXPERIMENT_NAME,
|
226
|
+
experiment_id=FileStore.DEFAULT_EXPERIMENT_ID,
|
227
|
+
artifact_uri=None,
|
228
|
+
tags=None,
|
229
|
+
)
|
230
|
+
|
231
|
+
def _check_root_dir(self):
|
232
|
+
"""
|
233
|
+
Run checks before running directory operations.
|
234
|
+
"""
|
235
|
+
if not exists(self.root_directory):
|
236
|
+
raise Exception(f"'{self.root_directory}' does not exist.")
|
237
|
+
if not is_directory(self.root_directory):
|
238
|
+
raise Exception(f"'{self.root_directory}' is not a directory.")
|
239
|
+
|
240
|
+
def _get_experiment_path(self, experiment_id, view_type=ViewType.ALL, assert_exists=False):
|
241
|
+
parents = []
|
242
|
+
if view_type == ViewType.ACTIVE_ONLY or view_type == ViewType.ALL:
|
243
|
+
parents.append(self.root_directory)
|
244
|
+
if view_type == ViewType.DELETED_ONLY or view_type == ViewType.ALL:
|
245
|
+
parents.append(self.trash_folder)
|
246
|
+
for parent in parents:
|
247
|
+
exp_list = find(parent, experiment_id, full_path=True)
|
248
|
+
if len(exp_list) > 0:
|
249
|
+
return exp_list[0]
|
250
|
+
if assert_exists:
|
251
|
+
raise MlflowException(
|
252
|
+
f"Experiment {experiment_id} does not exist.",
|
253
|
+
databricks_pb2.RESOURCE_DOES_NOT_EXIST,
|
254
|
+
)
|
255
|
+
return None
|
256
|
+
|
257
|
+
def _get_run_dir(self, experiment_id, run_uuid):
|
258
|
+
_validate_run_id(run_uuid)
|
259
|
+
if not self._has_experiment(experiment_id):
|
260
|
+
return None
|
261
|
+
return os.path.join(self._get_experiment_path(experiment_id, assert_exists=True), run_uuid)
|
262
|
+
|
263
|
+
def _get_metric_path(self, experiment_id, run_uuid, metric_key):
|
264
|
+
_validate_run_id(run_uuid)
|
265
|
+
_validate_metric_name(metric_key, "name")
|
266
|
+
return os.path.join(
|
267
|
+
self._get_run_dir(experiment_id, run_uuid),
|
268
|
+
FileStore.METRICS_FOLDER_NAME,
|
269
|
+
metric_key,
|
270
|
+
)
|
271
|
+
|
272
|
+
def _get_model_metric_path(self, experiment_id: str, model_id: str, metric_key: str) -> str:
|
273
|
+
_validate_metric_name(metric_key)
|
274
|
+
return os.path.join(
|
275
|
+
self._get_model_dir(experiment_id, model_id), FileStore.METRICS_FOLDER_NAME, metric_key
|
276
|
+
)
|
277
|
+
|
278
|
+
def _get_param_path(self, experiment_id, run_uuid, param_name):
|
279
|
+
_validate_run_id(run_uuid)
|
280
|
+
_validate_param_name(param_name)
|
281
|
+
return os.path.join(
|
282
|
+
self._get_run_dir(experiment_id, run_uuid),
|
283
|
+
FileStore.PARAMS_FOLDER_NAME,
|
284
|
+
param_name,
|
285
|
+
)
|
286
|
+
|
287
|
+
def _get_experiment_tag_path(self, experiment_id, tag_name):
|
288
|
+
_validate_experiment_id(experiment_id)
|
289
|
+
_validate_tag_name(tag_name)
|
290
|
+
if not self._has_experiment(experiment_id):
|
291
|
+
return None
|
292
|
+
return os.path.join(
|
293
|
+
self._get_experiment_path(experiment_id, assert_exists=True),
|
294
|
+
FileStore.TAGS_FOLDER_NAME,
|
295
|
+
tag_name,
|
296
|
+
)
|
297
|
+
|
298
|
+
def _get_tag_path(self, experiment_id, run_uuid, tag_name):
|
299
|
+
_validate_run_id(run_uuid)
|
300
|
+
_validate_tag_name(tag_name)
|
301
|
+
return os.path.join(
|
302
|
+
self._get_run_dir(experiment_id, run_uuid),
|
303
|
+
FileStore.TAGS_FOLDER_NAME,
|
304
|
+
tag_name,
|
305
|
+
)
|
306
|
+
|
307
|
+
def _get_artifact_dir(self, experiment_id, run_uuid):
|
308
|
+
_validate_run_id(run_uuid)
|
309
|
+
return append_to_uri_path(
|
310
|
+
self.get_experiment(experiment_id).artifact_location,
|
311
|
+
run_uuid,
|
312
|
+
FileStore.ARTIFACTS_FOLDER_NAME,
|
313
|
+
)
|
314
|
+
|
315
|
+
def _get_active_experiments(self, full_path=False):
|
316
|
+
exp_list = list_subdirs(self.root_directory, full_path)
|
317
|
+
return [
|
318
|
+
exp
|
319
|
+
for exp in exp_list
|
320
|
+
if not exp.endswith(FileStore.TRASH_FOLDER_NAME)
|
321
|
+
and exp != ModelRegistryFileStore.MODELS_FOLDER_NAME
|
322
|
+
]
|
323
|
+
|
324
|
+
def _get_deleted_experiments(self, full_path=False):
|
325
|
+
return list_subdirs(self.trash_folder, full_path)
|
326
|
+
|
327
|
+
def search_experiments(
|
328
|
+
self,
|
329
|
+
view_type=ViewType.ACTIVE_ONLY,
|
330
|
+
max_results=SEARCH_MAX_RESULTS_DEFAULT,
|
331
|
+
filter_string=None,
|
332
|
+
order_by=None,
|
333
|
+
page_token=None,
|
334
|
+
):
|
335
|
+
if not isinstance(max_results, int) or max_results < 1:
|
336
|
+
raise MlflowException(
|
337
|
+
f"Invalid value {max_results} for parameter 'max_results' supplied. It must be "
|
338
|
+
f"a positive integer",
|
339
|
+
INVALID_PARAMETER_VALUE,
|
340
|
+
)
|
341
|
+
if max_results > SEARCH_MAX_RESULTS_THRESHOLD:
|
342
|
+
raise MlflowException(
|
343
|
+
f"Invalid value {max_results} for parameter 'max_results' supplied. It must be at "
|
344
|
+
f"most {SEARCH_MAX_RESULTS_THRESHOLD}",
|
345
|
+
INVALID_PARAMETER_VALUE,
|
346
|
+
)
|
347
|
+
|
348
|
+
self._check_root_dir()
|
349
|
+
experiment_ids = []
|
350
|
+
if view_type == ViewType.ACTIVE_ONLY or view_type == ViewType.ALL:
|
351
|
+
experiment_ids += self._get_active_experiments(full_path=False)
|
352
|
+
if view_type == ViewType.DELETED_ONLY or view_type == ViewType.ALL:
|
353
|
+
experiment_ids += self._get_deleted_experiments(full_path=False)
|
354
|
+
|
355
|
+
experiments = []
|
356
|
+
for exp_id in experiment_ids:
|
357
|
+
try:
|
358
|
+
# trap and warn known issues, will raise unexpected exceptions to caller
|
359
|
+
exp = self._get_experiment(exp_id, view_type)
|
360
|
+
if exp is not None:
|
361
|
+
experiments.append(exp)
|
362
|
+
except MissingConfigException as e:
|
363
|
+
logging.warning(
|
364
|
+
f"Malformed experiment '{exp_id}'. Detailed error {e}",
|
365
|
+
exc_info=True,
|
366
|
+
)
|
367
|
+
filtered = SearchExperimentsUtils.filter(experiments, filter_string)
|
368
|
+
sorted_experiments = SearchExperimentsUtils.sort(
|
369
|
+
filtered, order_by or ["creation_time DESC", "experiment_id ASC"]
|
370
|
+
)
|
371
|
+
experiments, next_page_token = SearchUtils.paginate(
|
372
|
+
sorted_experiments, page_token, max_results
|
373
|
+
)
|
374
|
+
return PagedList(experiments, next_page_token)
|
375
|
+
|
376
|
+
def get_experiment_by_name(self, experiment_name):
|
377
|
+
def pagination_wrapper_func(number_to_get, next_page_token):
|
378
|
+
return self.search_experiments(
|
379
|
+
view_type=ViewType.ALL,
|
380
|
+
max_results=number_to_get,
|
381
|
+
filter_string=f"name = '{experiment_name}'",
|
382
|
+
page_token=next_page_token,
|
383
|
+
)
|
384
|
+
|
385
|
+
experiments = get_results_from_paginated_fn(
|
386
|
+
paginated_fn=pagination_wrapper_func,
|
387
|
+
max_results_per_page=SEARCH_MAX_RESULTS_THRESHOLD,
|
388
|
+
max_results=None,
|
389
|
+
)
|
390
|
+
return experiments[0] if len(experiments) > 0 else None
|
391
|
+
|
392
|
+
def _create_experiment_with_id(self, name, experiment_id, artifact_uri, tags):
|
393
|
+
if not artifact_uri:
|
394
|
+
resolved_artifact_uri = append_to_uri_path(self.artifact_root_uri, str(experiment_id))
|
395
|
+
else:
|
396
|
+
resolved_artifact_uri = resolve_uri_if_local(artifact_uri)
|
397
|
+
meta_dir = mkdir(self.root_directory, str(experiment_id))
|
398
|
+
creation_time = get_current_time_millis()
|
399
|
+
experiment = Experiment(
|
400
|
+
experiment_id,
|
401
|
+
name,
|
402
|
+
resolved_artifact_uri,
|
403
|
+
LifecycleStage.ACTIVE,
|
404
|
+
creation_time=creation_time,
|
405
|
+
last_update_time=creation_time,
|
406
|
+
)
|
407
|
+
experiment_dict = dict(experiment)
|
408
|
+
# tags are added to the file system and are not written to this dict on write
|
409
|
+
# As such, we should not include them in the meta file.
|
410
|
+
del experiment_dict["tags"]
|
411
|
+
write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict)
|
412
|
+
if tags is not None:
|
413
|
+
for tag in tags:
|
414
|
+
self.set_experiment_tag(experiment_id, tag)
|
415
|
+
return experiment_id
|
416
|
+
|
417
|
+
def _validate_experiment_does_not_exist(self, name):
|
418
|
+
experiment = self.get_experiment_by_name(name)
|
419
|
+
if experiment is not None:
|
420
|
+
if experiment.lifecycle_stage == LifecycleStage.DELETED:
|
421
|
+
raise MlflowException(
|
422
|
+
f"Experiment {experiment.name!r} already exists in deleted state. "
|
423
|
+
"You can restore the experiment, or permanently delete the experiment "
|
424
|
+
"from the .trash folder (under tracking server's root folder) in order to "
|
425
|
+
"use this experiment name again.",
|
426
|
+
databricks_pb2.RESOURCE_ALREADY_EXISTS,
|
427
|
+
)
|
428
|
+
else:
|
429
|
+
raise MlflowException(
|
430
|
+
f"Experiment '{experiment.name}' already exists.",
|
431
|
+
databricks_pb2.RESOURCE_ALREADY_EXISTS,
|
432
|
+
)
|
433
|
+
|
434
|
+
def create_experiment(self, name, artifact_location=None, tags=None):
|
435
|
+
self._check_root_dir()
|
436
|
+
_validate_experiment_name(name)
|
437
|
+
|
438
|
+
# Genesis-Flow: Use MLFLOW_ARTIFACT_LOCATION if no artifact location is provided
|
439
|
+
if not artifact_location:
|
440
|
+
from mlflow.environment_variables import MLFLOW_ARTIFACT_LOCATION
|
441
|
+
if MLFLOW_ARTIFACT_LOCATION.defined:
|
442
|
+
artifact_location = MLFLOW_ARTIFACT_LOCATION.get()
|
443
|
+
|
444
|
+
if artifact_location:
|
445
|
+
_validate_experiment_artifact_location_length(artifact_location)
|
446
|
+
|
447
|
+
self._validate_experiment_does_not_exist(name)
|
448
|
+
experiment_id = _generate_unique_integer_id()
|
449
|
+
return self._create_experiment_with_id(name, str(experiment_id), artifact_location, tags)
|
450
|
+
|
451
|
+
def _has_experiment(self, experiment_id):
|
452
|
+
return self._get_experiment_path(experiment_id) is not None
|
453
|
+
|
454
|
+
def _get_experiment(self, experiment_id, view_type=ViewType.ALL):
|
455
|
+
self._check_root_dir()
|
456
|
+
_validate_experiment_id(experiment_id)
|
457
|
+
experiment_dir = self._get_experiment_path(experiment_id, view_type)
|
458
|
+
if experiment_dir is None:
|
459
|
+
raise MlflowException(
|
460
|
+
f"Could not find experiment with ID {experiment_id}",
|
461
|
+
databricks_pb2.RESOURCE_DOES_NOT_EXIST,
|
462
|
+
)
|
463
|
+
meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
|
464
|
+
if meta is None:
|
465
|
+
raise MissingConfigException(
|
466
|
+
f"Experiment {experiment_id} is invalid with empty "
|
467
|
+
f"{FileStore.META_DATA_FILE_NAME} in directory '{experiment_dir}'."
|
468
|
+
)
|
469
|
+
|
470
|
+
meta["tags"] = self.get_all_experiment_tags(experiment_id)
|
471
|
+
experiment = _read_persisted_experiment_dict(meta)
|
472
|
+
if experiment_id != experiment.experiment_id:
|
473
|
+
logging.warning(
|
474
|
+
"Experiment ID mismatch for exp %s. ID recorded as '%s' in meta data. "
|
475
|
+
"Experiment will be ignored.",
|
476
|
+
experiment_id,
|
477
|
+
experiment.experiment_id,
|
478
|
+
exc_info=True,
|
479
|
+
)
|
480
|
+
return None
|
481
|
+
return experiment
|
482
|
+
|
483
|
+
def get_experiment(self, experiment_id):
|
484
|
+
"""
|
485
|
+
Fetch the experiment.
|
486
|
+
Note: This API will search for active as well as deleted experiments.
|
487
|
+
|
488
|
+
Args:
|
489
|
+
experiment_id: Integer id for the experiment
|
490
|
+
|
491
|
+
Returns:
|
492
|
+
A single Experiment object if it exists, otherwise raises an Exception.
|
493
|
+
"""
|
494
|
+
experiment_id = FileStore.DEFAULT_EXPERIMENT_ID if experiment_id is None else experiment_id
|
495
|
+
experiment = self._get_experiment(experiment_id)
|
496
|
+
if experiment is None:
|
497
|
+
raise MlflowException(
|
498
|
+
f"Experiment '{experiment_id}' does not exist.",
|
499
|
+
databricks_pb2.RESOURCE_DOES_NOT_EXIST,
|
500
|
+
)
|
501
|
+
return experiment
|
502
|
+
|
503
|
+
def delete_experiment(self, experiment_id):
|
504
|
+
if str(experiment_id) == str(FileStore.DEFAULT_EXPERIMENT_ID):
|
505
|
+
raise MlflowException(
|
506
|
+
"Cannot delete the default experiment "
|
507
|
+
f"'{FileStore.DEFAULT_EXPERIMENT_ID}'. This is an internally "
|
508
|
+
f"reserved experiment."
|
509
|
+
)
|
510
|
+
experiment_dir = self._get_experiment_path(experiment_id, ViewType.ACTIVE_ONLY)
|
511
|
+
if experiment_dir is None:
|
512
|
+
raise MlflowException(
|
513
|
+
f"Could not find experiment with ID {experiment_id}",
|
514
|
+
databricks_pb2.RESOURCE_DOES_NOT_EXIST,
|
515
|
+
)
|
516
|
+
experiment = self._get_experiment(experiment_id)
|
517
|
+
experiment._lifecycle_stage = LifecycleStage.DELETED
|
518
|
+
deletion_time = get_current_time_millis()
|
519
|
+
experiment._set_last_update_time(deletion_time)
|
520
|
+
runs = self._list_run_infos(experiment_id, view_type=ViewType.ACTIVE_ONLY)
|
521
|
+
for run_info in runs:
|
522
|
+
if run_info is not None:
|
523
|
+
new_info = run_info._copy_with_overrides(lifecycle_stage=LifecycleStage.DELETED)
|
524
|
+
self._overwrite_run_info(new_info, deleted_time=deletion_time)
|
525
|
+
else:
|
526
|
+
logging.warning("Run metadata is in invalid state.")
|
527
|
+
meta_dir = os.path.join(self.root_directory, experiment_id)
|
528
|
+
overwrite_yaml(
|
529
|
+
root=meta_dir,
|
530
|
+
file_name=FileStore.META_DATA_FILE_NAME,
|
531
|
+
data=dict(experiment),
|
532
|
+
)
|
533
|
+
mv(experiment_dir, self.trash_folder)
|
534
|
+
|
535
|
+
def _hard_delete_experiment(self, experiment_id):
|
536
|
+
"""
|
537
|
+
Permanently delete an experiment.
|
538
|
+
This is used by the ``mlflow gc`` command line and is not intended to be used elsewhere.
|
539
|
+
"""
|
540
|
+
experiment_dir = self._get_experiment_path(experiment_id, ViewType.DELETED_ONLY)
|
541
|
+
shutil.rmtree(experiment_dir)
|
542
|
+
|
543
|
+
def restore_experiment(self, experiment_id):
|
544
|
+
experiment_dir = self._get_experiment_path(experiment_id, ViewType.DELETED_ONLY)
|
545
|
+
if experiment_dir is None:
|
546
|
+
raise MlflowException(
|
547
|
+
f"Could not find deleted experiment with ID {experiment_id}",
|
548
|
+
databricks_pb2.RESOURCE_DOES_NOT_EXIST,
|
549
|
+
)
|
550
|
+
conflict_experiment = self._get_experiment_path(experiment_id, ViewType.ACTIVE_ONLY)
|
551
|
+
if conflict_experiment is not None:
|
552
|
+
raise MlflowException(
|
553
|
+
f"Cannot restore experiment with ID {experiment_id}. "
|
554
|
+
"An experiment with same ID already exists.",
|
555
|
+
databricks_pb2.RESOURCE_ALREADY_EXISTS,
|
556
|
+
)
|
557
|
+
mv(experiment_dir, self.root_directory)
|
558
|
+
experiment = self._get_experiment(experiment_id)
|
559
|
+
meta_dir = os.path.join(self.root_directory, experiment_id)
|
560
|
+
experiment._lifecycle_stage = LifecycleStage.ACTIVE
|
561
|
+
experiment._set_last_update_time(get_current_time_millis())
|
562
|
+
runs = self._list_run_infos(experiment_id, view_type=ViewType.DELETED_ONLY)
|
563
|
+
for run_info in runs:
|
564
|
+
if run_info is not None:
|
565
|
+
new_info = run_info._copy_with_overrides(lifecycle_stage=LifecycleStage.ACTIVE)
|
566
|
+
self._overwrite_run_info(new_info, deleted_time=None)
|
567
|
+
else:
|
568
|
+
logging.warning("Run metadata is in invalid state.")
|
569
|
+
overwrite_yaml(
|
570
|
+
root=meta_dir,
|
571
|
+
file_name=FileStore.META_DATA_FILE_NAME,
|
572
|
+
data=dict(experiment),
|
573
|
+
)
|
574
|
+
|
575
|
+
def rename_experiment(self, experiment_id, new_name):
|
576
|
+
_validate_experiment_name(new_name)
|
577
|
+
meta_dir = os.path.join(self.root_directory, experiment_id)
|
578
|
+
# if experiment is malformed, will raise error
|
579
|
+
experiment = self._get_experiment(experiment_id)
|
580
|
+
if experiment is None:
|
581
|
+
raise MlflowException(
|
582
|
+
f"Experiment '{experiment_id}' does not exist.",
|
583
|
+
databricks_pb2.RESOURCE_DOES_NOT_EXIST,
|
584
|
+
)
|
585
|
+
self._validate_experiment_does_not_exist(new_name)
|
586
|
+
experiment._set_name(new_name)
|
587
|
+
experiment._set_last_update_time(get_current_time_millis())
|
588
|
+
if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
|
589
|
+
raise Exception(
|
590
|
+
"Cannot rename experiment in non-active lifecycle stage."
|
591
|
+
f" Current stage: {experiment.lifecycle_stage}"
|
592
|
+
)
|
593
|
+
overwrite_yaml(
|
594
|
+
root=meta_dir,
|
595
|
+
file_name=FileStore.META_DATA_FILE_NAME,
|
596
|
+
data=dict(experiment),
|
597
|
+
)
|
598
|
+
|
599
|
+
def delete_run(self, run_id):
|
600
|
+
run_info = self._get_run_info(run_id)
|
601
|
+
if run_info is None:
|
602
|
+
raise MlflowException(
|
603
|
+
f"Run '{run_id}' metadata is in invalid state.",
|
604
|
+
databricks_pb2.INVALID_STATE,
|
605
|
+
)
|
606
|
+
new_info = run_info._copy_with_overrides(lifecycle_stage=LifecycleStage.DELETED)
|
607
|
+
self._overwrite_run_info(new_info, deleted_time=get_current_time_millis())
|
608
|
+
|
609
|
+
def _hard_delete_run(self, run_id):
|
610
|
+
"""
|
611
|
+
Permanently delete a run (metadata and metrics, tags, parameters).
|
612
|
+
This is used by the ``mlflow gc`` command line and is not intended to be used elsewhere.
|
613
|
+
"""
|
614
|
+
_, run_dir = self._find_run_root(run_id)
|
615
|
+
shutil.rmtree(run_dir)
|
616
|
+
|
617
|
+
def _get_deleted_runs(self, older_than=0):
|
618
|
+
"""
|
619
|
+
Get all deleted run ids.
|
620
|
+
|
621
|
+
Args:
|
622
|
+
older_than: get runs that is older than this variable in number of milliseconds.
|
623
|
+
defaults to 0 ms to get all deleted runs.
|
624
|
+
"""
|
625
|
+
current_time = get_current_time_millis()
|
626
|
+
experiment_ids = self._get_active_experiments() + self._get_deleted_experiments()
|
627
|
+
deleted_runs = self.search_runs(
|
628
|
+
experiment_ids=experiment_ids,
|
629
|
+
filter_string="",
|
630
|
+
run_view_type=ViewType.DELETED_ONLY,
|
631
|
+
)
|
632
|
+
deleted_run_ids = []
|
633
|
+
for deleted_run in deleted_runs:
|
634
|
+
_, run_dir = self._find_run_root(deleted_run.info.run_id)
|
635
|
+
meta = read_yaml(run_dir, FileStore.META_DATA_FILE_NAME)
|
636
|
+
if "deleted_time" not in meta or current_time - int(meta["deleted_time"]) >= older_than:
|
637
|
+
deleted_run_ids.append(deleted_run.info.run_id)
|
638
|
+
|
639
|
+
return deleted_run_ids
|
640
|
+
|
641
|
+
def restore_run(self, run_id):
|
642
|
+
run_info = self._get_run_info(run_id)
|
643
|
+
if run_info is None:
|
644
|
+
raise MlflowException(
|
645
|
+
f"Run '{run_id}' metadata is in invalid state.",
|
646
|
+
databricks_pb2.INVALID_STATE,
|
647
|
+
)
|
648
|
+
new_info = run_info._copy_with_overrides(lifecycle_stage=LifecycleStage.ACTIVE)
|
649
|
+
self._overwrite_run_info(new_info, deleted_time=None)
|
650
|
+
|
651
|
+
def _find_experiment_folder(self, run_path):
|
652
|
+
"""
|
653
|
+
Given a run path, return the parent directory for its experiment.
|
654
|
+
"""
|
655
|
+
parent = get_parent_dir(run_path)
|
656
|
+
if os.path.basename(parent) == FileStore.TRASH_FOLDER_NAME:
|
657
|
+
return get_parent_dir(parent)
|
658
|
+
return parent
|
659
|
+
|
660
|
+
def _find_run_root(self, run_uuid):
|
661
|
+
_validate_run_id(run_uuid)
|
662
|
+
self._check_root_dir()
|
663
|
+
all_experiments = self._get_active_experiments(True) + self._get_deleted_experiments(True)
|
664
|
+
for experiment_dir in all_experiments:
|
665
|
+
runs = find(experiment_dir, run_uuid, full_path=True)
|
666
|
+
if len(runs) == 0:
|
667
|
+
continue
|
668
|
+
return os.path.basename(os.path.abspath(experiment_dir)), runs[0]
|
669
|
+
return None, None
|
670
|
+
|
671
|
+
def update_run_info(self, run_id, run_status, end_time, run_name):
|
672
|
+
_validate_run_id(run_id)
|
673
|
+
run_info = self._get_run_info(run_id)
|
674
|
+
check_run_is_active(run_info)
|
675
|
+
new_info = run_info._copy_with_overrides(run_status, end_time, run_name=run_name)
|
676
|
+
if run_name:
|
677
|
+
self._set_run_tag(run_info, RunTag(MLFLOW_RUN_NAME, run_name))
|
678
|
+
self._overwrite_run_info(new_info)
|
679
|
+
return new_info
|
680
|
+
|
681
|
+
def create_run(self, experiment_id, user_id, start_time, tags, run_name):
|
682
|
+
"""
|
683
|
+
Creates a run with the specified attributes.
|
684
|
+
"""
|
685
|
+
experiment_id = FileStore.DEFAULT_EXPERIMENT_ID if experiment_id is None else experiment_id
|
686
|
+
experiment = self.get_experiment(experiment_id)
|
687
|
+
if experiment is None:
|
688
|
+
raise MlflowException(
|
689
|
+
f"Could not create run under experiment with ID {experiment_id} - no such "
|
690
|
+
"experiment exists.",
|
691
|
+
databricks_pb2.RESOURCE_DOES_NOT_EXIST,
|
692
|
+
)
|
693
|
+
if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
|
694
|
+
raise MlflowException(
|
695
|
+
f"Could not create run under non-active experiment with ID {experiment_id}.",
|
696
|
+
databricks_pb2.INVALID_STATE,
|
697
|
+
)
|
698
|
+
tags = tags or []
|
699
|
+
run_name_tag = _get_run_name_from_tags(tags)
|
700
|
+
if run_name and run_name_tag and run_name != run_name_tag:
|
701
|
+
raise MlflowException(
|
702
|
+
"Both 'run_name' argument and 'mlflow.runName' tag are specified, but with "
|
703
|
+
f"different values (run_name='{run_name}', mlflow.runName='{run_name_tag}').",
|
704
|
+
INVALID_PARAMETER_VALUE,
|
705
|
+
)
|
706
|
+
run_name = run_name or run_name_tag or _generate_random_name()
|
707
|
+
if not run_name_tag:
|
708
|
+
tags.append(RunTag(key=MLFLOW_RUN_NAME, value=run_name))
|
709
|
+
run_uuid = uuid.uuid4().hex
|
710
|
+
artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
|
711
|
+
run_info = RunInfo(
|
712
|
+
run_id=run_uuid,
|
713
|
+
run_name=run_name,
|
714
|
+
experiment_id=experiment_id,
|
715
|
+
artifact_uri=artifact_uri,
|
716
|
+
user_id=user_id,
|
717
|
+
status=RunStatus.to_string(RunStatus.RUNNING),
|
718
|
+
start_time=start_time,
|
719
|
+
end_time=None,
|
720
|
+
lifecycle_stage=LifecycleStage.ACTIVE,
|
721
|
+
)
|
722
|
+
# Persist run metadata and create directories for logging metrics, parameters, artifacts
|
723
|
+
run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_id)
|
724
|
+
mkdir(run_dir)
|
725
|
+
run_info_dict = _make_persisted_run_info_dict(run_info)
|
726
|
+
run_info_dict["deleted_time"] = None
|
727
|
+
write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict)
|
728
|
+
mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
|
729
|
+
mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
|
730
|
+
mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
|
731
|
+
for tag in tags:
|
732
|
+
self.set_tag(run_uuid, tag)
|
733
|
+
return self.get_run(run_id=run_uuid)
|
734
|
+
|
735
|
+
def get_run(self, run_id):
|
736
|
+
"""
|
737
|
+
Note: Will get both active and deleted runs.
|
738
|
+
"""
|
739
|
+
_validate_run_id(run_id)
|
740
|
+
run_info = self._get_run_info(run_id)
|
741
|
+
if run_info is None:
|
742
|
+
raise MlflowException(
|
743
|
+
f"Run '{run_id}' metadata is in invalid state.",
|
744
|
+
databricks_pb2.INVALID_STATE,
|
745
|
+
)
|
746
|
+
return self._get_run_from_info(run_info)
|
747
|
+
|
748
|
+
def _get_run_from_info(self, run_info):
|
749
|
+
metrics = self._get_all_metrics(run_info)
|
750
|
+
params = self._get_all_params(run_info)
|
751
|
+
tags = self._get_all_tags(run_info)
|
752
|
+
inputs: RunInputs = self._get_all_inputs(run_info)
|
753
|
+
outputs: RunOutputs = self._get_all_outputs(run_info)
|
754
|
+
if not run_info.run_name:
|
755
|
+
run_name = _get_run_name_from_tags(tags)
|
756
|
+
if run_name:
|
757
|
+
run_info._set_run_name(run_name)
|
758
|
+
return Run(run_info, RunData(metrics, params, tags), inputs, outputs)
|
759
|
+
|
760
|
+
def _get_run_info(self, run_uuid):
|
761
|
+
"""
|
762
|
+
Note: Will get both active and deleted runs.
|
763
|
+
"""
|
764
|
+
exp_id, run_dir = self._find_run_root(run_uuid)
|
765
|
+
if run_dir is None:
|
766
|
+
raise MlflowException(
|
767
|
+
f"Run '{run_uuid}' not found", databricks_pb2.RESOURCE_DOES_NOT_EXIST
|
768
|
+
)
|
769
|
+
run_info = self._get_run_info_from_dir(run_dir)
|
770
|
+
if run_info.experiment_id != exp_id:
|
771
|
+
raise MlflowException(
|
772
|
+
f"Run '{run_uuid}' metadata is in invalid state.",
|
773
|
+
databricks_pb2.INVALID_STATE,
|
774
|
+
)
|
775
|
+
return run_info
|
776
|
+
|
777
|
+
def _get_run_info_from_dir(self, run_dir):
|
778
|
+
meta = FileStore._read_yaml(run_dir, FileStore.META_DATA_FILE_NAME)
|
779
|
+
return _read_persisted_run_info_dict(meta)
|
780
|
+
|
781
|
+
def _get_run_files(self, run_info, resource_type):
|
782
|
+
run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_id)
|
783
|
+
# run_dir exists since run validity has been confirmed above.
|
784
|
+
if resource_type == "metric":
|
785
|
+
subfolder_name = FileStore.METRICS_FOLDER_NAME
|
786
|
+
elif resource_type == "param":
|
787
|
+
subfolder_name = FileStore.PARAMS_FOLDER_NAME
|
788
|
+
elif resource_type == "tag":
|
789
|
+
subfolder_name = FileStore.TAGS_FOLDER_NAME
|
790
|
+
else:
|
791
|
+
raise Exception("Looking for unknown resource under run.")
|
792
|
+
return self._get_resource_files(run_dir, subfolder_name)
|
793
|
+
|
794
|
+
def _get_experiment_files(self, experiment_id):
|
795
|
+
_validate_experiment_id(experiment_id)
|
796
|
+
experiment_dir = self._get_experiment_path(experiment_id, assert_exists=True)
|
797
|
+
return self._get_resource_files(experiment_dir, FileStore.EXPERIMENT_TAGS_FOLDER_NAME)
|
798
|
+
|
799
|
+
def _get_resource_files(self, root_dir, subfolder_name):
|
800
|
+
source_dirs = find(root_dir, subfolder_name, full_path=True)
|
801
|
+
if len(source_dirs) == 0:
|
802
|
+
return root_dir, []
|
803
|
+
file_names = []
|
804
|
+
for root, _, files in os.walk(source_dirs[0]):
|
805
|
+
for name in files:
|
806
|
+
abspath = os.path.join(root, name)
|
807
|
+
file_names.append(os.path.relpath(abspath, source_dirs[0]))
|
808
|
+
if sys.platform == "win32":
|
809
|
+
# Turn metric relative path into metric name.
|
810
|
+
# Metrics can have '/' in the name. On windows, '/' is interpreted as a separator.
|
811
|
+
# When the metric is read back the path will use '\' for separator.
|
812
|
+
# We need to translate the path into posix path.
|
813
|
+
from mlflow.utils.file_utils import relative_path_to_artifact_path
|
814
|
+
|
815
|
+
file_names = [relative_path_to_artifact_path(x) for x in file_names]
|
816
|
+
return source_dirs[0], file_names
|
817
|
+
|
818
|
+
@staticmethod
|
819
|
+
def _get_metric_from_file(
|
820
|
+
parent_path: str, metric_name: str, run_id: str, exp_id: str
|
821
|
+
) -> Metric:
|
822
|
+
_validate_metric_name(metric_name)
|
823
|
+
metric_objs = [
|
824
|
+
FileStore._get_metric_from_line(run_id, metric_name, line, exp_id)
|
825
|
+
for line in read_file_lines(parent_path, metric_name)
|
826
|
+
]
|
827
|
+
if len(metric_objs) == 0:
|
828
|
+
raise ValueError(f"Metric '{metric_name}' is malformed. No data found.")
|
829
|
+
# Python performs element-wise comparison of equal-length tuples, ordering them
|
830
|
+
# based on their first differing element. Therefore, we use max() operator to find the
|
831
|
+
# largest value at the largest timestamp. For more information, see
|
832
|
+
# https://docs.python.org/3/reference/expressions.html#value-comparisons
|
833
|
+
return max(metric_objs, key=lambda m: (m.step, m.timestamp, m.value))
|
834
|
+
|
835
|
+
def get_all_metrics(self, run_uuid):
|
836
|
+
_validate_run_id(run_uuid)
|
837
|
+
run_info = self._get_run_info(run_uuid)
|
838
|
+
return self._get_all_metrics(run_info)
|
839
|
+
|
840
|
+
def _get_all_metrics(self, run_info):
|
841
|
+
parent_path, metric_files = self._get_run_files(run_info, "metric")
|
842
|
+
metrics = []
|
843
|
+
for metric_file in metric_files:
|
844
|
+
metrics.append(
|
845
|
+
self._get_metric_from_file(
|
846
|
+
parent_path, metric_file, run_info.run_id, run_info.experiment_id
|
847
|
+
)
|
848
|
+
)
|
849
|
+
return metrics
|
850
|
+
|
851
|
+
@staticmethod
|
852
|
+
def _get_metric_from_line(
|
853
|
+
run_id: str, metric_name: str, metric_line: str, exp_id: str
|
854
|
+
) -> Metric:
|
855
|
+
metric_parts = metric_line.strip().split(" ")
|
856
|
+
if len(metric_parts) != 2 and len(metric_parts) != 3 and len(metric_parts) != 5:
|
857
|
+
raise MlflowException(
|
858
|
+
f"Metric '{metric_name}' is malformed; persisted metric data contained "
|
859
|
+
f"{len(metric_parts)} fields. Expected 2, 3, or 5 fields. "
|
860
|
+
f"Experiment id: {exp_id}",
|
861
|
+
databricks_pb2.INTERNAL_ERROR,
|
862
|
+
)
|
863
|
+
ts = int(metric_parts[0])
|
864
|
+
val = float(metric_parts[1])
|
865
|
+
step = int(metric_parts[2]) if len(metric_parts) == 3 else 0
|
866
|
+
dataset_name = str(metric_parts[3]) if len(metric_parts) == 5 else None
|
867
|
+
dataset_digest = str(metric_parts[4]) if len(metric_parts) == 5 else None
|
868
|
+
return Metric(
|
869
|
+
key=metric_name,
|
870
|
+
value=val,
|
871
|
+
timestamp=ts,
|
872
|
+
step=step,
|
873
|
+
dataset_name=dataset_name,
|
874
|
+
dataset_digest=dataset_digest,
|
875
|
+
run_id=run_id,
|
876
|
+
)
|
877
|
+
|
878
|
+
def get_metric_history(self, run_id, metric_key, max_results=None, page_token=None):
|
879
|
+
"""
|
880
|
+
Return all logged values for a given metric.
|
881
|
+
|
882
|
+
Args:
|
883
|
+
run_id: Unique identifier for run.
|
884
|
+
metric_key: Metric name within the run.
|
885
|
+
max_results: An indicator for paginated results.
|
886
|
+
page_token: Token indicating the page of metric history to fetch.
|
887
|
+
|
888
|
+
Returns:
|
889
|
+
A :py:class:`mlflow.store.entities.paged_list.PagedList` of
|
890
|
+
:py:class:`mlflow.entities.Metric` entities if ``metric_key`` values
|
891
|
+
have been logged to the ``run_id``, else an empty list.
|
892
|
+
|
893
|
+
"""
|
894
|
+
_validate_run_id(run_id)
|
895
|
+
_validate_metric_name(metric_key)
|
896
|
+
run_info = self._get_run_info(run_id)
|
897
|
+
|
898
|
+
parent_path, metric_files = self._get_run_files(run_info, "metric")
|
899
|
+
if metric_key not in metric_files:
|
900
|
+
return PagedList([], None)
|
901
|
+
|
902
|
+
all_lines = read_file_lines(parent_path, metric_key)
|
903
|
+
|
904
|
+
all_metrics = [
|
905
|
+
FileStore._get_metric_from_line(run_id, metric_key, line, run_info.experiment_id)
|
906
|
+
for line in all_lines
|
907
|
+
]
|
908
|
+
|
909
|
+
if max_results is None:
|
910
|
+
# If no max_results specified, return all metrics but handle page_token if provided
|
911
|
+
offset = SearchUtils.parse_start_offset_from_page_token(page_token)
|
912
|
+
metrics = all_metrics[offset:]
|
913
|
+
next_page_token = None
|
914
|
+
else:
|
915
|
+
metrics, next_page_token = SearchUtils.paginate(all_metrics, page_token, max_results)
|
916
|
+
|
917
|
+
return PagedList(metrics, next_page_token)
|
918
|
+
|
919
|
+
@staticmethod
|
920
|
+
def _get_param_from_file(parent_path, param_name):
|
921
|
+
_validate_param_name(param_name)
|
922
|
+
value = read_file(parent_path, param_name)
|
923
|
+
return Param(param_name, value)
|
924
|
+
|
925
|
+
def get_all_params(self, run_uuid):
|
926
|
+
_validate_run_id(run_uuid)
|
927
|
+
run_info = self._get_run_info(run_uuid)
|
928
|
+
return self._get_all_params(run_info)
|
929
|
+
|
930
|
+
def _get_all_params(self, run_info):
|
931
|
+
parent_path, param_files = self._get_run_files(run_info, "param")
|
932
|
+
params = []
|
933
|
+
for param_file in param_files:
|
934
|
+
params.append(self._get_param_from_file(parent_path, param_file))
|
935
|
+
return params
|
936
|
+
|
937
|
+
@staticmethod
|
938
|
+
def _get_experiment_tag_from_file(parent_path, tag_name):
|
939
|
+
_validate_tag_name(tag_name)
|
940
|
+
tag_data = read_file(parent_path, tag_name)
|
941
|
+
return ExperimentTag(tag_name, tag_data)
|
942
|
+
|
943
|
+
def get_all_experiment_tags(self, exp_id):
|
944
|
+
parent_path, tag_files = self._get_experiment_files(exp_id)
|
945
|
+
tags = []
|
946
|
+
for tag_file in tag_files:
|
947
|
+
tags.append(self._get_experiment_tag_from_file(parent_path, tag_file))
|
948
|
+
return tags
|
949
|
+
|
950
|
+
@staticmethod
|
951
|
+
def _get_tag_from_file(parent_path, tag_name):
|
952
|
+
_validate_tag_name(tag_name)
|
953
|
+
tag_data = read_file(parent_path, tag_name)
|
954
|
+
return RunTag(tag_name, tag_data)
|
955
|
+
|
956
|
+
def get_all_tags(self, run_uuid):
|
957
|
+
_validate_run_id(run_uuid)
|
958
|
+
run_info = self._get_run_info(run_uuid)
|
959
|
+
return self._get_all_tags(run_info)
|
960
|
+
|
961
|
+
def _get_all_tags(self, run_info):
|
962
|
+
parent_path, tag_files = self._get_run_files(run_info, "tag")
|
963
|
+
tags = []
|
964
|
+
for tag_file in tag_files:
|
965
|
+
tags.append(self._get_tag_from_file(parent_path, tag_file))
|
966
|
+
return tags
|
967
|
+
|
968
|
+
def _list_run_infos(self, experiment_id, view_type):
|
969
|
+
self._check_root_dir()
|
970
|
+
if not self._has_experiment(experiment_id):
|
971
|
+
return []
|
972
|
+
experiment_dir = self._get_experiment_path(experiment_id, assert_exists=True)
|
973
|
+
run_dirs = list_all(
|
974
|
+
experiment_dir,
|
975
|
+
filter_func=lambda x: all(
|
976
|
+
os.path.basename(os.path.normpath(x)) != reservedFolderName
|
977
|
+
for reservedFolderName in FileStore.RESERVED_EXPERIMENT_FOLDERS
|
978
|
+
)
|
979
|
+
and os.path.isdir(x),
|
980
|
+
full_path=True,
|
981
|
+
)
|
982
|
+
run_infos = []
|
983
|
+
for r_dir in run_dirs:
|
984
|
+
try:
|
985
|
+
# trap and warn known issues, will raise unexpected exceptions to caller
|
986
|
+
run_info = self._get_run_info_from_dir(r_dir)
|
987
|
+
if run_info.experiment_id != experiment_id:
|
988
|
+
logging.warning(
|
989
|
+
"Wrong experiment ID (%s) recorded for run '%s'. "
|
990
|
+
"It should be %s. Run will be ignored.",
|
991
|
+
str(run_info.experiment_id),
|
992
|
+
str(run_info.run_id),
|
993
|
+
str(experiment_id),
|
994
|
+
exc_info=True,
|
995
|
+
)
|
996
|
+
continue
|
997
|
+
if LifecycleStage.matches_view_type(view_type, run_info.lifecycle_stage):
|
998
|
+
run_infos.append(run_info)
|
999
|
+
except MissingConfigException as rnfe:
|
1000
|
+
# trap malformed run exception and log
|
1001
|
+
# this is at debug level because if the same store is used for
|
1002
|
+
# artifact storage, it's common the folder is not a run folder
|
1003
|
+
r_id = os.path.basename(r_dir)
|
1004
|
+
logging.debug(
|
1005
|
+
"Malformed run '%s'. Detailed error %s",
|
1006
|
+
r_id,
|
1007
|
+
str(rnfe),
|
1008
|
+
exc_info=True,
|
1009
|
+
)
|
1010
|
+
return run_infos
|
1011
|
+
|
1012
|
+
def _search_runs(
|
1013
|
+
self,
|
1014
|
+
experiment_ids,
|
1015
|
+
filter_string,
|
1016
|
+
run_view_type,
|
1017
|
+
max_results,
|
1018
|
+
order_by,
|
1019
|
+
page_token,
|
1020
|
+
):
|
1021
|
+
if max_results > SEARCH_MAX_RESULTS_THRESHOLD:
|
1022
|
+
raise MlflowException(
|
1023
|
+
"Invalid value for request parameter max_results. It must be at "
|
1024
|
+
f"most {SEARCH_MAX_RESULTS_THRESHOLD}, but got value {max_results}",
|
1025
|
+
databricks_pb2.INVALID_PARAMETER_VALUE,
|
1026
|
+
)
|
1027
|
+
runs = []
|
1028
|
+
for experiment_id in experiment_ids:
|
1029
|
+
run_infos = self._list_run_infos(experiment_id, run_view_type)
|
1030
|
+
runs.extend(self._get_run_from_info(r) for r in run_infos)
|
1031
|
+
filtered = SearchUtils.filter(runs, filter_string)
|
1032
|
+
sorted_runs = SearchUtils.sort(filtered, order_by)
|
1033
|
+
runs, next_page_token = SearchUtils.paginate(sorted_runs, page_token, max_results)
|
1034
|
+
return runs, next_page_token
|
1035
|
+
|
1036
|
+
def log_metric(self, run_id: str, metric: Metric):
|
1037
|
+
_validate_run_id(run_id)
|
1038
|
+
_validate_metric(metric.key, metric.value, metric.timestamp, metric.step)
|
1039
|
+
run_info = self._get_run_info(run_id)
|
1040
|
+
check_run_is_active(run_info)
|
1041
|
+
self._log_run_metric(run_info, metric)
|
1042
|
+
if metric.model_id is not None:
|
1043
|
+
self._log_model_metric(
|
1044
|
+
experiment_id=run_info.experiment_id,
|
1045
|
+
model_id=metric.model_id,
|
1046
|
+
run_id=run_id,
|
1047
|
+
metric=metric,
|
1048
|
+
)
|
1049
|
+
|
1050
|
+
def _log_run_metric(self, run_info, metric):
|
1051
|
+
metric_path = self._get_metric_path(run_info.experiment_id, run_info.run_id, metric.key)
|
1052
|
+
make_containing_dirs(metric_path)
|
1053
|
+
if metric.dataset_name is not None and metric.dataset_digest is not None:
|
1054
|
+
append_to(
|
1055
|
+
metric_path,
|
1056
|
+
f"{metric.timestamp} {metric.value} {metric.step} {metric.dataset_name} "
|
1057
|
+
f"{metric.dataset_digest}\n",
|
1058
|
+
)
|
1059
|
+
else:
|
1060
|
+
append_to(metric_path, f"{metric.timestamp} {metric.value} {metric.step}\n")
|
1061
|
+
|
1062
|
+
def _log_model_metric(self, experiment_id: str, model_id: str, run_id: str, metric: Metric):
|
1063
|
+
metric_path = self._get_model_metric_path(
|
1064
|
+
experiment_id=experiment_id, model_id=model_id, metric_key=metric.key
|
1065
|
+
)
|
1066
|
+
make_containing_dirs(metric_path)
|
1067
|
+
if metric.dataset_name is not None and metric.dataset_digest is not None:
|
1068
|
+
append_to(
|
1069
|
+
metric_path,
|
1070
|
+
f"{metric.timestamp} {metric.value} {metric.step} {run_id} {metric.dataset_name} "
|
1071
|
+
f"{metric.dataset_digest}\n",
|
1072
|
+
)
|
1073
|
+
else:
|
1074
|
+
append_to(metric_path, f"{metric.timestamp} {metric.value} {metric.step} {run_id}\n")
|
1075
|
+
|
1076
|
+
def _writeable_value(self, tag_value):
|
1077
|
+
if tag_value is None:
|
1078
|
+
return ""
|
1079
|
+
elif is_string_type(tag_value):
|
1080
|
+
return tag_value
|
1081
|
+
else:
|
1082
|
+
return str(tag_value)
|
1083
|
+
|
1084
|
+
def log_param(self, run_id, param):
|
1085
|
+
_validate_run_id(run_id)
|
1086
|
+
param = _validate_param(param.key, param.value)
|
1087
|
+
run_info = self._get_run_info(run_id)
|
1088
|
+
check_run_is_active(run_info)
|
1089
|
+
self._log_run_param(run_info, param)
|
1090
|
+
|
1091
|
+
def _log_run_param(self, run_info, param):
|
1092
|
+
param_path = self._get_param_path(run_info.experiment_id, run_info.run_id, param.key)
|
1093
|
+
writeable_param_value = self._writeable_value(param.value)
|
1094
|
+
if os.path.exists(param_path):
|
1095
|
+
self._validate_new_param_value(
|
1096
|
+
param_path=param_path,
|
1097
|
+
param_key=param.key,
|
1098
|
+
run_id=run_info.run_id,
|
1099
|
+
new_value=writeable_param_value,
|
1100
|
+
)
|
1101
|
+
make_containing_dirs(param_path)
|
1102
|
+
write_to(param_path, writeable_param_value)
|
1103
|
+
|
1104
|
+
def _validate_new_param_value(self, param_path, param_key, run_id, new_value):
|
1105
|
+
"""
|
1106
|
+
When logging a parameter with a key that already exists, this function is used to
|
1107
|
+
enforce immutability by verifying that the specified parameter value matches the existing
|
1108
|
+
value.
|
1109
|
+
:raises: py:class:`mlflow.exceptions.MlflowException` if the specified new parameter value
|
1110
|
+
does not match the existing parameter value.
|
1111
|
+
"""
|
1112
|
+
with open(param_path) as param_file:
|
1113
|
+
current_value = param_file.read()
|
1114
|
+
if current_value != new_value:
|
1115
|
+
raise MlflowException(
|
1116
|
+
f"Changing param values is not allowed. Param with key='{param_key}' was already"
|
1117
|
+
f" logged with value='{current_value}' for run ID='{run_id}'. Attempted logging"
|
1118
|
+
f" new value '{new_value}'.",
|
1119
|
+
databricks_pb2.INVALID_PARAMETER_VALUE,
|
1120
|
+
)
|
1121
|
+
|
1122
|
+
def set_experiment_tag(self, experiment_id, tag):
|
1123
|
+
"""
|
1124
|
+
Set a tag for the specified experiment
|
1125
|
+
|
1126
|
+
Args:
|
1127
|
+
experiment_id: String ID of the experiment
|
1128
|
+
tag: ExperimentRunTag instance to log
|
1129
|
+
"""
|
1130
|
+
_validate_tag_name(tag.key)
|
1131
|
+
experiment = self.get_experiment(experiment_id)
|
1132
|
+
if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
|
1133
|
+
raise MlflowException(
|
1134
|
+
f"The experiment {experiment.experiment_id} must be in the 'active' "
|
1135
|
+
"lifecycle_stage to set tags",
|
1136
|
+
error_code=databricks_pb2.INVALID_PARAMETER_VALUE,
|
1137
|
+
)
|
1138
|
+
tag_path = self._get_experiment_tag_path(experiment_id, tag.key)
|
1139
|
+
make_containing_dirs(tag_path)
|
1140
|
+
write_to(tag_path, self._writeable_value(tag.value))
|
1141
|
+
|
1142
|
+
def set_tag(self, run_id, tag):
|
1143
|
+
_validate_run_id(run_id)
|
1144
|
+
_validate_tag_name(tag.key)
|
1145
|
+
run_info = self._get_run_info(run_id)
|
1146
|
+
check_run_is_active(run_info)
|
1147
|
+
self._set_run_tag(run_info, tag)
|
1148
|
+
if tag.key == MLFLOW_RUN_NAME:
|
1149
|
+
run_status = RunStatus.from_string(run_info.status)
|
1150
|
+
self.update_run_info(run_id, run_status, run_info.end_time, tag.value)
|
1151
|
+
|
1152
|
+
def _set_run_tag(self, run_info, tag):
|
1153
|
+
tag_path = self._get_tag_path(run_info.experiment_id, run_info.run_id, tag.key)
|
1154
|
+
make_containing_dirs(tag_path)
|
1155
|
+
# Don't add trailing newline
|
1156
|
+
write_to(tag_path, self._writeable_value(tag.value))
|
1157
|
+
|
1158
|
+
def delete_tag(self, run_id, key):
|
1159
|
+
"""
|
1160
|
+
Delete a tag from a run. This is irreversible.
|
1161
|
+
|
1162
|
+
Args:
|
1163
|
+
run_id: String ID of the run.
|
1164
|
+
key: Name of the tag.
|
1165
|
+
"""
|
1166
|
+
_validate_run_id(run_id)
|
1167
|
+
run_info = self._get_run_info(run_id)
|
1168
|
+
check_run_is_active(run_info)
|
1169
|
+
tag_path = self._get_tag_path(run_info.experiment_id, run_id, key)
|
1170
|
+
if not exists(tag_path):
|
1171
|
+
raise MlflowException(
|
1172
|
+
f"No tag with name: {key} in run with id {run_id}",
|
1173
|
+
error_code=RESOURCE_DOES_NOT_EXIST,
|
1174
|
+
)
|
1175
|
+
os.remove(tag_path)
|
1176
|
+
|
1177
|
+
def _overwrite_run_info(self, run_info, deleted_time=None):
|
1178
|
+
run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_id)
|
1179
|
+
run_info_dict = _make_persisted_run_info_dict(run_info)
|
1180
|
+
if deleted_time is not None:
|
1181
|
+
run_info_dict["deleted_time"] = deleted_time
|
1182
|
+
write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict, overwrite=True)
|
1183
|
+
|
1184
|
+
def log_batch(self, run_id, metrics, params, tags):
|
1185
|
+
_validate_run_id(run_id)
|
1186
|
+
metrics, params, tags = _validate_batch_log_data(metrics, params, tags)
|
1187
|
+
_validate_batch_log_limits(metrics, params, tags)
|
1188
|
+
_validate_param_keys_unique(params)
|
1189
|
+
run_info = self._get_run_info(run_id)
|
1190
|
+
check_run_is_active(run_info)
|
1191
|
+
try:
|
1192
|
+
for param in params:
|
1193
|
+
self._log_run_param(run_info, param)
|
1194
|
+
for metric in metrics:
|
1195
|
+
self._log_run_metric(run_info, metric)
|
1196
|
+
if metric.model_id is not None:
|
1197
|
+
self._log_model_metric(
|
1198
|
+
experiment_id=run_info.experiment_id,
|
1199
|
+
model_id=metric.model_id,
|
1200
|
+
run_id=run_id,
|
1201
|
+
metric=metric,
|
1202
|
+
)
|
1203
|
+
for tag in tags:
|
1204
|
+
# NB: If the tag run name value is set, update the run info to assure
|
1205
|
+
# synchronization.
|
1206
|
+
if tag.key == MLFLOW_RUN_NAME:
|
1207
|
+
run_status = RunStatus.from_string(run_info.status)
|
1208
|
+
self.update_run_info(run_id, run_status, run_info.end_time, tag.value)
|
1209
|
+
self._set_run_tag(run_info, tag)
|
1210
|
+
except Exception as e:
|
1211
|
+
raise MlflowException(e, INTERNAL_ERROR)
|
1212
|
+
|
1213
|
+
def record_logged_model(self, run_id, mlflow_model):
|
1214
|
+
from mlflow.models import Model
|
1215
|
+
|
1216
|
+
if not isinstance(mlflow_model, Model):
|
1217
|
+
raise TypeError(
|
1218
|
+
f"Argument 'mlflow_model' should be mlflow.models.Model, got '{type(mlflow_model)}'"
|
1219
|
+
)
|
1220
|
+
_validate_run_id(run_id)
|
1221
|
+
run_info = self._get_run_info(run_id)
|
1222
|
+
check_run_is_active(run_info)
|
1223
|
+
model_dict = mlflow_model.get_tags_dict()
|
1224
|
+
run_info = self._get_run_info(run_id)
|
1225
|
+
path = self._get_tag_path(run_info.experiment_id, run_info.run_id, MLFLOW_LOGGED_MODELS)
|
1226
|
+
if os.path.exists(path):
|
1227
|
+
with open(path) as f:
|
1228
|
+
model_list = json.loads(f.read())
|
1229
|
+
else:
|
1230
|
+
model_list = []
|
1231
|
+
tag = RunTag(MLFLOW_LOGGED_MODELS, json.dumps(model_list + [model_dict]))
|
1232
|
+
|
1233
|
+
try:
|
1234
|
+
self._set_run_tag(run_info, tag)
|
1235
|
+
except Exception as e:
|
1236
|
+
raise MlflowException(e, INTERNAL_ERROR)
|
1237
|
+
|
1238
|
+
def log_inputs(
|
1239
|
+
self,
|
1240
|
+
run_id: str,
|
1241
|
+
datasets: Optional[list[DatasetInput]] = None,
|
1242
|
+
models: Optional[list[LoggedModelInput]] = None,
|
1243
|
+
):
|
1244
|
+
"""
|
1245
|
+
Log inputs, such as datasets and models, to the specified run.
|
1246
|
+
|
1247
|
+
Args:
|
1248
|
+
run_id: String id for the run
|
1249
|
+
datasets: List of :py:class:`mlflow.entities.DatasetInput` instances to log
|
1250
|
+
as inputs to the run.
|
1251
|
+
models: List of :py:class:`mlflow.entities.LoggedModelInput` instances to log
|
1252
|
+
as inputs to the run.
|
1253
|
+
|
1254
|
+
Returns:
|
1255
|
+
None.
|
1256
|
+
"""
|
1257
|
+
_validate_run_id(run_id)
|
1258
|
+
run_info = self._get_run_info(run_id)
|
1259
|
+
check_run_is_active(run_info)
|
1260
|
+
|
1261
|
+
if datasets is None and models is None:
|
1262
|
+
return
|
1263
|
+
|
1264
|
+
experiment_dir = self._get_experiment_path(run_info.experiment_id, assert_exists=True)
|
1265
|
+
run_dir = self._get_run_dir(run_info.experiment_id, run_id)
|
1266
|
+
|
1267
|
+
for dataset_input in datasets or []:
|
1268
|
+
dataset = dataset_input.dataset
|
1269
|
+
dataset_id = FileStore._get_dataset_id(
|
1270
|
+
dataset_name=dataset.name, dataset_digest=dataset.digest
|
1271
|
+
)
|
1272
|
+
dataset_dir = os.path.join(experiment_dir, FileStore.DATASETS_FOLDER_NAME, dataset_id)
|
1273
|
+
if not os.path.exists(dataset_dir):
|
1274
|
+
os.makedirs(dataset_dir, exist_ok=True)
|
1275
|
+
write_yaml(dataset_dir, FileStore.META_DATA_FILE_NAME, dict(dataset))
|
1276
|
+
|
1277
|
+
input_id = FileStore._get_dataset_input_id(dataset_id=dataset_id, run_id=run_id)
|
1278
|
+
input_dir = os.path.join(run_dir, FileStore.INPUTS_FOLDER_NAME, input_id)
|
1279
|
+
if not os.path.exists(input_dir):
|
1280
|
+
os.makedirs(input_dir, exist_ok=True)
|
1281
|
+
fs_input = FileStore._FileStoreInput(
|
1282
|
+
source_type=InputVertexType.DATASET,
|
1283
|
+
source_id=dataset_id,
|
1284
|
+
destination_type=InputVertexType.RUN,
|
1285
|
+
destination_id=run_id,
|
1286
|
+
tags={tag.key: tag.value for tag in dataset_input.tags},
|
1287
|
+
)
|
1288
|
+
fs_input.write_yaml(input_dir, FileStore.META_DATA_FILE_NAME)
|
1289
|
+
|
1290
|
+
for model_input in models or []:
|
1291
|
+
model_id = model_input.model_id
|
1292
|
+
input_id = FileStore._get_model_input_id(model_id=model_id, run_id=run_id)
|
1293
|
+
input_dir = os.path.join(run_dir, FileStore.INPUTS_FOLDER_NAME, input_id)
|
1294
|
+
if not os.path.exists(input_dir):
|
1295
|
+
os.makedirs(input_dir, exist_ok=True)
|
1296
|
+
fs_input = FileStore._FileStoreInput(
|
1297
|
+
source_type=InputVertexType.MODEL,
|
1298
|
+
source_id=model_id,
|
1299
|
+
destination_type=InputVertexType.RUN,
|
1300
|
+
destination_id=run_id,
|
1301
|
+
tags={},
|
1302
|
+
)
|
1303
|
+
fs_input.write_yaml(input_dir, FileStore.META_DATA_FILE_NAME)
|
1304
|
+
|
1305
|
+
def log_outputs(self, run_id: str, models: list[LoggedModelOutput]):
|
1306
|
+
"""
|
1307
|
+
Log outputs, such as models, to the specified run.
|
1308
|
+
|
1309
|
+
Args:
|
1310
|
+
run_id: String id for the run
|
1311
|
+
models: List of :py:class:`mlflow.entities.LoggedModelOutput` instances to log
|
1312
|
+
as outputs of the run.
|
1313
|
+
|
1314
|
+
Returns:
|
1315
|
+
None.
|
1316
|
+
"""
|
1317
|
+
_validate_run_id(run_id)
|
1318
|
+
run_info = self._get_run_info(run_id)
|
1319
|
+
check_run_is_active(run_info)
|
1320
|
+
|
1321
|
+
if models is None:
|
1322
|
+
return
|
1323
|
+
|
1324
|
+
run_dir = self._get_run_dir(run_info.experiment_id, run_id)
|
1325
|
+
|
1326
|
+
for model_output in models:
|
1327
|
+
model_id = model_output.model_id
|
1328
|
+
output_dir = os.path.join(run_dir, FileStore.OUTPUTS_FOLDER_NAME, model_id)
|
1329
|
+
if not os.path.exists(output_dir):
|
1330
|
+
os.makedirs(output_dir, exist_ok=True)
|
1331
|
+
fs_output = FileStore._FileStoreOutput(
|
1332
|
+
source_type=OutputVertexType.RUN_OUTPUT,
|
1333
|
+
source_id=model_id,
|
1334
|
+
destination_type=OutputVertexType.MODEL_OUTPUT,
|
1335
|
+
destination_id=run_id,
|
1336
|
+
tags={},
|
1337
|
+
step=model_output.step,
|
1338
|
+
)
|
1339
|
+
fs_output.write_yaml(output_dir, FileStore.META_DATA_FILE_NAME)
|
1340
|
+
|
1341
|
+
@staticmethod
|
1342
|
+
def _get_dataset_id(dataset_name: str, dataset_digest: str) -> str:
|
1343
|
+
md5 = hashlib.md5(dataset_name.encode("utf-8"), usedforsecurity=False)
|
1344
|
+
md5.update(dataset_digest.encode("utf-8"))
|
1345
|
+
return md5.hexdigest()
|
1346
|
+
|
1347
|
+
@staticmethod
|
1348
|
+
def _get_dataset_input_id(dataset_id: str, run_id: str) -> str:
|
1349
|
+
md5 = hashlib.md5(dataset_id.encode("utf-8"), usedforsecurity=False)
|
1350
|
+
md5.update(run_id.encode("utf-8"))
|
1351
|
+
return md5.hexdigest()
|
1352
|
+
|
1353
|
+
@staticmethod
|
1354
|
+
def _get_model_input_id(model_id: str, run_id: str) -> str:
|
1355
|
+
md5 = hashlib.md5(model_id.encode("utf-8"), usedforsecurity=False)
|
1356
|
+
md5.update(run_id.encode("utf-8"))
|
1357
|
+
return md5.hexdigest()
|
1358
|
+
|
1359
|
+
class _FileStoreInput(NamedTuple):
|
1360
|
+
source_type: int
|
1361
|
+
source_id: str
|
1362
|
+
destination_type: int
|
1363
|
+
destination_id: str
|
1364
|
+
tags: dict[str, str]
|
1365
|
+
|
1366
|
+
def write_yaml(self, root: str, file_name: str):
|
1367
|
+
dict_for_yaml = {
|
1368
|
+
"source_type": InputVertexType.Name(self.source_type),
|
1369
|
+
"source_id": self.source_id,
|
1370
|
+
"destination_type": InputVertexType.Name(self.destination_type),
|
1371
|
+
"destination_id": self.source_id,
|
1372
|
+
"tags": self.tags,
|
1373
|
+
}
|
1374
|
+
write_yaml(root, file_name, dict_for_yaml)
|
1375
|
+
|
1376
|
+
@classmethod
|
1377
|
+
def from_yaml(cls, root, file_name):
|
1378
|
+
dict_from_yaml = FileStore._read_yaml(root, file_name)
|
1379
|
+
return cls(
|
1380
|
+
source_type=InputVertexType.Value(dict_from_yaml["source_type"]),
|
1381
|
+
source_id=dict_from_yaml["source_id"],
|
1382
|
+
destination_type=InputVertexType.Value(dict_from_yaml["destination_type"]),
|
1383
|
+
destination_id=dict_from_yaml["destination_id"],
|
1384
|
+
tags=dict_from_yaml["tags"],
|
1385
|
+
)
|
1386
|
+
|
1387
|
+
class _FileStoreOutput(NamedTuple):
|
1388
|
+
source_type: int
|
1389
|
+
source_id: str
|
1390
|
+
destination_type: int
|
1391
|
+
destination_id: str
|
1392
|
+
tags: dict[str, str]
|
1393
|
+
step: int
|
1394
|
+
|
1395
|
+
def write_yaml(self, root: str, file_name: str):
|
1396
|
+
dict_for_yaml = {
|
1397
|
+
"source_type": OutputVertexType.Name(self.source_type),
|
1398
|
+
"source_id": self.source_id,
|
1399
|
+
"destination_type": OutputVertexType.Name(self.destination_type),
|
1400
|
+
"destination_id": self.source_id,
|
1401
|
+
"tags": self.tags,
|
1402
|
+
"step": self.step,
|
1403
|
+
}
|
1404
|
+
write_yaml(root, file_name, dict_for_yaml)
|
1405
|
+
|
1406
|
+
@classmethod
|
1407
|
+
def from_yaml(cls, root, file_name):
|
1408
|
+
dict_from_yaml = FileStore._read_yaml(root, file_name)
|
1409
|
+
return cls(
|
1410
|
+
source_type=OutputVertexType.Value(dict_from_yaml["source_type"]),
|
1411
|
+
source_id=dict_from_yaml["source_id"],
|
1412
|
+
destination_type=OutputVertexType.Value(dict_from_yaml["destination_type"]),
|
1413
|
+
destination_id=dict_from_yaml["destination_id"],
|
1414
|
+
tags=dict_from_yaml["tags"],
|
1415
|
+
step=dict_from_yaml["step"],
|
1416
|
+
)
|
1417
|
+
|
1418
|
+
def _get_all_inputs(self, run_info: RunInfo) -> RunInputs:
|
1419
|
+
run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_id)
|
1420
|
+
inputs_parent_path = os.path.join(run_dir, FileStore.INPUTS_FOLDER_NAME)
|
1421
|
+
if not os.path.exists(inputs_parent_path):
|
1422
|
+
return RunInputs(dataset_inputs=[], model_inputs=[])
|
1423
|
+
|
1424
|
+
experiment_dir = self._get_experiment_path(run_info.experiment_id, assert_exists=True)
|
1425
|
+
dataset_inputs = self._get_dataset_inputs(run_info, inputs_parent_path, experiment_dir)
|
1426
|
+
model_inputs = self._get_model_inputs(inputs_parent_path, experiment_dir)
|
1427
|
+
return RunInputs(dataset_inputs=dataset_inputs, model_inputs=model_inputs)
|
1428
|
+
|
1429
|
+
def _get_dataset_inputs(
|
1430
|
+
self, run_info: RunInfo, inputs_parent_path: str, experiment_dir_path: str
|
1431
|
+
) -> list[DatasetInput]:
|
1432
|
+
datasets_parent_path = os.path.join(experiment_dir_path, FileStore.DATASETS_FOLDER_NAME)
|
1433
|
+
if not os.path.exists(datasets_parent_path):
|
1434
|
+
return []
|
1435
|
+
|
1436
|
+
dataset_dirs = os.listdir(datasets_parent_path)
|
1437
|
+
dataset_inputs = []
|
1438
|
+
for input_dir in os.listdir(inputs_parent_path):
|
1439
|
+
input_dir_full_path = os.path.join(inputs_parent_path, input_dir)
|
1440
|
+
fs_input = FileStore._FileStoreInput.from_yaml(
|
1441
|
+
input_dir_full_path, FileStore.META_DATA_FILE_NAME
|
1442
|
+
)
|
1443
|
+
if fs_input.source_type != InputVertexType.DATASET:
|
1444
|
+
continue
|
1445
|
+
|
1446
|
+
matching_dataset_dirs = [d for d in dataset_dirs if d == fs_input.source_id]
|
1447
|
+
if not matching_dataset_dirs:
|
1448
|
+
logging.warning(
|
1449
|
+
f"Failed to find dataset with ID '{fs_input.source_id}' referenced as an input"
|
1450
|
+
f" of the run with ID '{run_info.run_id}'. Skipping."
|
1451
|
+
)
|
1452
|
+
continue
|
1453
|
+
elif len(matching_dataset_dirs) > 1:
|
1454
|
+
logging.warning(
|
1455
|
+
f"Found multiple datasets with ID '{fs_input.source_id}'. Using the first one."
|
1456
|
+
)
|
1457
|
+
|
1458
|
+
dataset_dir = matching_dataset_dirs[0]
|
1459
|
+
dataset = FileStore._get_dataset_from_dir(datasets_parent_path, dataset_dir)
|
1460
|
+
dataset_input = DatasetInput(
|
1461
|
+
dataset=dataset,
|
1462
|
+
tags=[InputTag(key=key, value=value) for key, value in fs_input.tags.items()],
|
1463
|
+
)
|
1464
|
+
dataset_inputs.append(dataset_input)
|
1465
|
+
|
1466
|
+
return dataset_inputs
|
1467
|
+
|
1468
|
+
def _get_model_inputs(
|
1469
|
+
self, inputs_parent_path: str, experiment_dir_path: str
|
1470
|
+
) -> list[LoggedModelInput]:
|
1471
|
+
model_inputs = []
|
1472
|
+
for input_dir in os.listdir(inputs_parent_path):
|
1473
|
+
input_dir_full_path = os.path.join(inputs_parent_path, input_dir)
|
1474
|
+
fs_input = FileStore._FileStoreInput.from_yaml(
|
1475
|
+
input_dir_full_path, FileStore.META_DATA_FILE_NAME
|
1476
|
+
)
|
1477
|
+
if fs_input.source_type != InputVertexType.MODEL:
|
1478
|
+
continue
|
1479
|
+
|
1480
|
+
model_input = LoggedModelInput(model_id=fs_input.source_id)
|
1481
|
+
model_inputs.append(model_input)
|
1482
|
+
|
1483
|
+
return model_inputs
|
1484
|
+
|
1485
|
+
def _get_all_outputs(self, run_info: RunInfo) -> RunOutputs:
|
1486
|
+
run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_id)
|
1487
|
+
outputs_parent_path = os.path.join(run_dir, FileStore.OUTPUTS_FOLDER_NAME)
|
1488
|
+
if not os.path.exists(outputs_parent_path):
|
1489
|
+
return RunOutputs(model_outputs=[])
|
1490
|
+
|
1491
|
+
experiment_dir = self._get_experiment_path(run_info.experiment_id, assert_exists=True)
|
1492
|
+
model_outputs = self._get_model_outputs(outputs_parent_path, experiment_dir)
|
1493
|
+
return RunOutputs(model_outputs=model_outputs)
|
1494
|
+
|
1495
|
+
def _get_model_outputs(
|
1496
|
+
self, outputs_parent_path: str, experiment_dir: str
|
1497
|
+
) -> list[LoggedModelOutput]:
|
1498
|
+
model_outputs = []
|
1499
|
+
for output_dir in os.listdir(outputs_parent_path):
|
1500
|
+
output_dir_full_path = os.path.join(outputs_parent_path, output_dir)
|
1501
|
+
fs_output = FileStore._FileStoreOutput.from_yaml(
|
1502
|
+
output_dir_full_path, FileStore.META_DATA_FILE_NAME
|
1503
|
+
)
|
1504
|
+
if fs_output.destination_type != OutputVertexType.MODEL_OUTPUT:
|
1505
|
+
continue
|
1506
|
+
|
1507
|
+
model_output = LoggedModelOutput(model_id=fs_output.destination_id, step=fs_output.step)
|
1508
|
+
model_outputs.append(model_output)
|
1509
|
+
|
1510
|
+
return model_outputs
|
1511
|
+
|
1512
|
+
def _search_datasets(self, experiment_ids) -> list[_DatasetSummary]:
|
1513
|
+
"""
|
1514
|
+
Return all dataset summaries associated to the given experiments.
|
1515
|
+
|
1516
|
+
Args:
|
1517
|
+
experiment_ids: List of experiment ids to scope the search
|
1518
|
+
|
1519
|
+
Returns:
|
1520
|
+
A List of :py:class:`mlflow.entities.DatasetSummary` entities.
|
1521
|
+
|
1522
|
+
"""
|
1523
|
+
|
1524
|
+
@dataclass(frozen=True)
|
1525
|
+
class _SummaryTuple:
|
1526
|
+
experiment_id: str
|
1527
|
+
name: str
|
1528
|
+
digest: str
|
1529
|
+
context: str
|
1530
|
+
|
1531
|
+
MAX_DATASET_SUMMARIES_RESULTS = 1000
|
1532
|
+
summaries = set()
|
1533
|
+
for experiment_id in experiment_ids:
|
1534
|
+
experiment_dir = self._get_experiment_path(experiment_id, assert_exists=True)
|
1535
|
+
run_dirs = list_all(
|
1536
|
+
experiment_dir,
|
1537
|
+
filter_func=lambda x: all(
|
1538
|
+
os.path.basename(os.path.normpath(x)) != reservedFolderName
|
1539
|
+
for reservedFolderName in FileStore.RESERVED_EXPERIMENT_FOLDERS
|
1540
|
+
)
|
1541
|
+
and os.path.isdir(x),
|
1542
|
+
full_path=True,
|
1543
|
+
)
|
1544
|
+
for run_dir in run_dirs:
|
1545
|
+
run_info = self._get_run_info_from_dir(run_dir)
|
1546
|
+
run_inputs = self._get_all_inputs(run_info)
|
1547
|
+
for dataset_input in run_inputs.dataset_inputs:
|
1548
|
+
context = None
|
1549
|
+
for input_tag in dataset_input.tags:
|
1550
|
+
if input_tag.key == MLFLOW_DATASET_CONTEXT:
|
1551
|
+
context = input_tag.value
|
1552
|
+
break
|
1553
|
+
dataset = dataset_input.dataset
|
1554
|
+
summaries.add(
|
1555
|
+
_SummaryTuple(experiment_id, dataset.name, dataset.digest, context)
|
1556
|
+
)
|
1557
|
+
# If we reached MAX_DATASET_SUMMARIES_RESULTS entries, then return right away.
|
1558
|
+
if len(summaries) == MAX_DATASET_SUMMARIES_RESULTS:
|
1559
|
+
return [
|
1560
|
+
_DatasetSummary(
|
1561
|
+
experiment_id=summary.experiment_id,
|
1562
|
+
name=summary.name,
|
1563
|
+
digest=summary.digest,
|
1564
|
+
context=summary.context,
|
1565
|
+
)
|
1566
|
+
for summary in summaries
|
1567
|
+
]
|
1568
|
+
|
1569
|
+
return [
|
1570
|
+
_DatasetSummary(
|
1571
|
+
experiment_id=summary.experiment_id,
|
1572
|
+
name=summary.name,
|
1573
|
+
digest=summary.digest,
|
1574
|
+
context=summary.context,
|
1575
|
+
)
|
1576
|
+
for summary in summaries
|
1577
|
+
]
|
1578
|
+
|
1579
|
+
@staticmethod
|
1580
|
+
def _get_dataset_from_dir(parent_path, dataset_dir) -> Dataset:
|
1581
|
+
dataset_dict = FileStore._read_yaml(
|
1582
|
+
os.path.join(parent_path, dataset_dir), FileStore.META_DATA_FILE_NAME
|
1583
|
+
)
|
1584
|
+
return Dataset.from_dictionary(dataset_dict)
|
1585
|
+
|
1586
|
+
@staticmethod
|
1587
|
+
def _read_yaml(root, file_name, retries=2):
|
1588
|
+
"""
|
1589
|
+
Read data from yaml file and return as dictionary, retrying up to
|
1590
|
+
a specified number of times if the file contents are unexpectedly
|
1591
|
+
empty due to a concurrent write.
|
1592
|
+
|
1593
|
+
Args:
|
1594
|
+
root: Directory name.
|
1595
|
+
file_name: File name. Expects to have '.yaml' extension.
|
1596
|
+
retries: The number of times to retry for unexpected empty content.
|
1597
|
+
|
1598
|
+
Returns:
|
1599
|
+
Data in yaml file as dictionary.
|
1600
|
+
"""
|
1601
|
+
|
1602
|
+
def _read_helper(root, file_name, attempts_remaining=2):
|
1603
|
+
result = read_yaml(root, file_name)
|
1604
|
+
if result is not None or attempts_remaining == 0:
|
1605
|
+
return result
|
1606
|
+
else:
|
1607
|
+
time.sleep(0.1 * (3 - attempts_remaining))
|
1608
|
+
return _read_helper(root, file_name, attempts_remaining - 1)
|
1609
|
+
|
1610
|
+
return _read_helper(root, file_name, attempts_remaining=retries)
|
1611
|
+
|
1612
|
+
def _get_traces_artifact_dir(self, experiment_id, trace_id):
|
1613
|
+
return append_to_uri_path(
|
1614
|
+
self.get_experiment(experiment_id).artifact_location,
|
1615
|
+
FileStore.TRACES_FOLDER_NAME,
|
1616
|
+
trace_id,
|
1617
|
+
FileStore.ARTIFACTS_FOLDER_NAME,
|
1618
|
+
)
|
1619
|
+
|
1620
|
+
def _save_trace_info(self, trace_info: TraceInfo, trace_dir, overwrite=False):
|
1621
|
+
"""
|
1622
|
+
TraceInfo is saved into `traces` folder under the experiment, each trace
|
1623
|
+
is saved in the folder named by its trace_id.
|
1624
|
+
`request_metadata` and `tags` folder store their key-value pairs such that each
|
1625
|
+
key is the file name, and value is written as the string value.
|
1626
|
+
Detailed directories structure is as below:
|
1627
|
+
| - experiment_id
|
1628
|
+
| - traces
|
1629
|
+
| - trace_id1
|
1630
|
+
| - trace_info.yaml
|
1631
|
+
| - request_metadata
|
1632
|
+
| - key
|
1633
|
+
| - tags
|
1634
|
+
| - trace_id2
|
1635
|
+
| - ...
|
1636
|
+
| - run_id1 ...
|
1637
|
+
| - run_id2 ...
|
1638
|
+
"""
|
1639
|
+
# Save basic trace info to TRACE_INFO_FILE_NAME
|
1640
|
+
trace_info_dict = self._convert_trace_info_to_dict(trace_info)
|
1641
|
+
write_yaml(
|
1642
|
+
trace_dir,
|
1643
|
+
FileStore.TRACE_INFO_FILE_NAME,
|
1644
|
+
trace_info_dict,
|
1645
|
+
overwrite=overwrite,
|
1646
|
+
)
|
1647
|
+
# Save trace_metadata to its own folder
|
1648
|
+
self._write_dict_to_trace_sub_folder(
|
1649
|
+
trace_dir,
|
1650
|
+
FileStore.TRACE_TRACE_METADATA_FOLDER_NAME,
|
1651
|
+
trace_info.trace_metadata,
|
1652
|
+
)
|
1653
|
+
# Save tags to its own folder
|
1654
|
+
self._write_dict_to_trace_sub_folder(
|
1655
|
+
trace_dir, FileStore.TRACE_TAGS_FOLDER_NAME, trace_info.tags
|
1656
|
+
)
|
1657
|
+
|
1658
|
+
def _convert_trace_info_to_dict(self, trace_info: TraceInfo):
|
1659
|
+
"""
|
1660
|
+
Convert trace info to a dictionary for persistence.
|
1661
|
+
Drop request_metadata and tags as they're saved into separate files.
|
1662
|
+
"""
|
1663
|
+
trace_info_dict = trace_info.to_dict()
|
1664
|
+
trace_info_dict.pop("trace_metadata", None)
|
1665
|
+
trace_info_dict.pop("tags", None)
|
1666
|
+
return trace_info_dict
|
1667
|
+
|
1668
|
+
def _write_dict_to_trace_sub_folder(self, trace_dir, sub_folder, dictionary):
|
1669
|
+
mkdir(trace_dir, sub_folder)
|
1670
|
+
for key, value in dictionary.items():
|
1671
|
+
# always validate as tag name to make sure the file name is valid
|
1672
|
+
_validate_tag_name(key)
|
1673
|
+
tag_path = os.path.join(trace_dir, sub_folder, key)
|
1674
|
+
# value are written as strings
|
1675
|
+
write_to(tag_path, self._writeable_value(value))
|
1676
|
+
|
1677
|
+
def _get_dict_from_trace_sub_folder(self, trace_dir, sub_folder):
|
1678
|
+
parent_path, files = self._get_resource_files(trace_dir, sub_folder)
|
1679
|
+
dictionary = {}
|
1680
|
+
for file_name in files:
|
1681
|
+
_validate_tag_name(file_name)
|
1682
|
+
value = read_file(parent_path, file_name)
|
1683
|
+
dictionary[file_name] = value
|
1684
|
+
return dictionary
|
1685
|
+
|
1686
|
+
def start_trace(self, trace_info: TraceInfo) -> TraceInfo:
|
1687
|
+
"""
|
1688
|
+
Create a trace using the V3 API format with a complete Trace object.
|
1689
|
+
|
1690
|
+
Args:
|
1691
|
+
trace_info: The TraceInfo object to create in the backend.
|
1692
|
+
|
1693
|
+
Returns:
|
1694
|
+
The created TraceInfo object from the backend.
|
1695
|
+
"""
|
1696
|
+
_validate_experiment_id(trace_info.experiment_id)
|
1697
|
+
experiment_dir = self._get_experiment_path(
|
1698
|
+
trace_info.experiment_id, view_type=ViewType.ACTIVE_ONLY, assert_exists=True
|
1699
|
+
)
|
1700
|
+
|
1701
|
+
# Create traces directory structure
|
1702
|
+
mkdir(experiment_dir, FileStore.TRACES_FOLDER_NAME)
|
1703
|
+
traces_dir = os.path.join(experiment_dir, FileStore.TRACES_FOLDER_NAME)
|
1704
|
+
mkdir(traces_dir, trace_info.trace_id)
|
1705
|
+
trace_dir = os.path.join(traces_dir, trace_info.trace_id)
|
1706
|
+
|
1707
|
+
# Add artifact location to tags
|
1708
|
+
artifact_uri = self._get_traces_artifact_dir(trace_info.experiment_id, trace_info.trace_id)
|
1709
|
+
tags = dict(trace_info.tags)
|
1710
|
+
tags[MLFLOW_ARTIFACT_LOCATION] = artifact_uri
|
1711
|
+
|
1712
|
+
# Create updated TraceInfo with artifact location tag
|
1713
|
+
trace_info.tags.update(tags)
|
1714
|
+
self._save_trace_info(trace_info, trace_dir)
|
1715
|
+
return trace_info
|
1716
|
+
|
1717
|
+
def get_trace_info(self, trace_id: str) -> TraceInfo:
|
1718
|
+
"""
|
1719
|
+
Get the trace matching the `trace_id`.
|
1720
|
+
|
1721
|
+
Args:
|
1722
|
+
trace_id: String id of the trace to fetch.
|
1723
|
+
|
1724
|
+
Returns:
|
1725
|
+
The fetched Trace object, of type ``mlflow.entities.TraceInfo``.
|
1726
|
+
"""
|
1727
|
+
return self._get_trace_info_and_dir(trace_id)[0]
|
1728
|
+
|
1729
|
+
def _get_trace_info_and_dir(self, trace_id: str) -> tuple[TraceInfo, str]:
|
1730
|
+
trace_dir = self._find_trace_dir(trace_id, assert_exists=True)
|
1731
|
+
trace_info = self._get_trace_info_from_dir(trace_dir)
|
1732
|
+
if trace_info and trace_info.trace_id != trace_id:
|
1733
|
+
raise MlflowException(
|
1734
|
+
f"Trace with ID '{trace_id}' metadata is in invalid state.",
|
1735
|
+
databricks_pb2.INVALID_STATE,
|
1736
|
+
)
|
1737
|
+
return trace_info, trace_dir
|
1738
|
+
|
1739
|
+
def _find_trace_dir(self, trace_id, assert_exists=False):
|
1740
|
+
self._check_root_dir()
|
1741
|
+
all_experiments = self._get_active_experiments(True) + self._get_deleted_experiments(True)
|
1742
|
+
for experiment_dir in all_experiments:
|
1743
|
+
traces_dir = os.path.join(experiment_dir, FileStore.TRACES_FOLDER_NAME)
|
1744
|
+
if exists(traces_dir):
|
1745
|
+
if traces := find(traces_dir, trace_id, full_path=True):
|
1746
|
+
return traces[0]
|
1747
|
+
if assert_exists:
|
1748
|
+
raise MlflowException(
|
1749
|
+
f"Trace with ID '{trace_id}' not found",
|
1750
|
+
RESOURCE_DOES_NOT_EXIST,
|
1751
|
+
)
|
1752
|
+
|
1753
|
+
def _get_trace_info_from_dir(self, trace_dir) -> Optional[TraceInfo]:
|
1754
|
+
if not os.path.exists(os.path.join(trace_dir, FileStore.TRACE_INFO_FILE_NAME)):
|
1755
|
+
return None
|
1756
|
+
trace_info_dict = FileStore._read_yaml(trace_dir, FileStore.TRACE_INFO_FILE_NAME)
|
1757
|
+
trace_info = TraceInfo.from_dict(trace_info_dict)
|
1758
|
+
trace_info.trace_metadata = self._get_dict_from_trace_sub_folder(
|
1759
|
+
trace_dir, FileStore.TRACE_TRACE_METADATA_FOLDER_NAME
|
1760
|
+
)
|
1761
|
+
trace_info.tags = self._get_dict_from_trace_sub_folder(
|
1762
|
+
trace_dir, FileStore.TRACE_TAGS_FOLDER_NAME
|
1763
|
+
)
|
1764
|
+
return trace_info
|
1765
|
+
|
1766
|
+
def set_trace_tag(self, trace_id: str, key: str, value: str):
|
1767
|
+
"""
|
1768
|
+
Set a tag on the trace with the given trace_id.
|
1769
|
+
|
1770
|
+
Args:
|
1771
|
+
trace_id: The ID of the trace.
|
1772
|
+
key: The string key of the tag.
|
1773
|
+
value: The string value of the tag.
|
1774
|
+
"""
|
1775
|
+
trace_dir = self._find_trace_dir(trace_id, assert_exists=True)
|
1776
|
+
self._write_dict_to_trace_sub_folder(
|
1777
|
+
trace_dir, FileStore.TRACE_TAGS_FOLDER_NAME, {key: value}
|
1778
|
+
)
|
1779
|
+
|
1780
|
+
def delete_trace_tag(self, trace_id: str, key: str):
|
1781
|
+
"""
|
1782
|
+
Delete a tag on the trace with the given trace_id.
|
1783
|
+
|
1784
|
+
Args:
|
1785
|
+
trace_id: The ID of the trace.
|
1786
|
+
key: The string key of the tag.
|
1787
|
+
"""
|
1788
|
+
_validate_tag_name(key)
|
1789
|
+
trace_dir = self._find_trace_dir(trace_id, assert_exists=True)
|
1790
|
+
tag_path = os.path.join(trace_dir, FileStore.TRACE_TAGS_FOLDER_NAME, key)
|
1791
|
+
if not exists(tag_path):
|
1792
|
+
raise MlflowException(
|
1793
|
+
f"No tag with name: {key} in trace with ID {trace_id}.",
|
1794
|
+
RESOURCE_DOES_NOT_EXIST,
|
1795
|
+
)
|
1796
|
+
os.remove(tag_path)
|
1797
|
+
|
1798
|
+
def _delete_traces(
|
1799
|
+
self,
|
1800
|
+
experiment_id: str,
|
1801
|
+
max_timestamp_millis: Optional[int] = None,
|
1802
|
+
max_traces: Optional[int] = None,
|
1803
|
+
trace_ids: Optional[list[str]] = None,
|
1804
|
+
) -> int:
|
1805
|
+
"""
|
1806
|
+
Delete traces based on the specified criteria.
|
1807
|
+
|
1808
|
+
- Either `max_timestamp_millis` or `trace_ids` must be specified, but not both.
|
1809
|
+
- `max_traces` can't be specified if `trace_ids` is specified.
|
1810
|
+
|
1811
|
+
Args:
|
1812
|
+
experiment_id: ID of the associated experiment.
|
1813
|
+
max_timestamp_millis: The maximum timestamp in milliseconds since the UNIX epoch for
|
1814
|
+
deleting traces. Traces older than or equal to this timestamp will be deleted.
|
1815
|
+
max_traces: The maximum number of traces to delete. If max_traces is specified, and
|
1816
|
+
it is less than the number of traces that would be deleted based on the
|
1817
|
+
max_timestamp_millis, the oldest traces will be deleted first.
|
1818
|
+
trace_ids: A set of trace IDs to delete.
|
1819
|
+
|
1820
|
+
Returns:
|
1821
|
+
The number of traces deleted.
|
1822
|
+
"""
|
1823
|
+
experiment_path = self._get_experiment_path(experiment_id, assert_exists=True)
|
1824
|
+
traces_path = os.path.join(experiment_path, FileStore.TRACES_FOLDER_NAME)
|
1825
|
+
deleted_traces = 0
|
1826
|
+
if max_timestamp_millis:
|
1827
|
+
trace_paths = list_all(traces_path, lambda x: os.path.isdir(x), full_path=True)
|
1828
|
+
trace_info_and_paths = []
|
1829
|
+
for trace_path in trace_paths:
|
1830
|
+
try:
|
1831
|
+
trace_info = self._get_trace_info_from_dir(trace_path)
|
1832
|
+
if trace_info and trace_info.timestamp_ms <= max_timestamp_millis:
|
1833
|
+
trace_info_and_paths.append((trace_info, trace_path))
|
1834
|
+
except MissingConfigException as e:
|
1835
|
+
# trap malformed trace exception and log warning
|
1836
|
+
trace_id = os.path.basename(trace_path)
|
1837
|
+
_logger.warning(
|
1838
|
+
f"Malformed trace with ID '{trace_id}'. Detailed error {e}",
|
1839
|
+
exc_info=_logger.isEnabledFor(logging.DEBUG),
|
1840
|
+
)
|
1841
|
+
trace_info_and_paths.sort(key=lambda x: x[0].timestamp_ms)
|
1842
|
+
# if max_traces is not None then it must > 0
|
1843
|
+
deleted_traces = min(len(trace_info_and_paths), max_traces or len(trace_info_and_paths))
|
1844
|
+
trace_info_and_paths = trace_info_and_paths[:deleted_traces]
|
1845
|
+
for _, trace_path in trace_info_and_paths:
|
1846
|
+
shutil.rmtree(trace_path)
|
1847
|
+
return deleted_traces
|
1848
|
+
if trace_ids:
|
1849
|
+
for trace_id in trace_ids:
|
1850
|
+
trace_path = os.path.join(traces_path, trace_id)
|
1851
|
+
# Do not throw if the trace doesn't exist
|
1852
|
+
if exists(trace_path):
|
1853
|
+
shutil.rmtree(trace_path)
|
1854
|
+
deleted_traces += 1
|
1855
|
+
return deleted_traces
|
1856
|
+
|
1857
|
+
def search_traces(
|
1858
|
+
self,
|
1859
|
+
experiment_ids: list[str],
|
1860
|
+
filter_string: Optional[str] = None,
|
1861
|
+
max_results: int = SEARCH_TRACES_DEFAULT_MAX_RESULTS,
|
1862
|
+
order_by: Optional[list[str]] = None,
|
1863
|
+
page_token: Optional[str] = None,
|
1864
|
+
model_id: Optional[str] = None,
|
1865
|
+
sql_warehouse_id: Optional[str] = None,
|
1866
|
+
) -> tuple[list[TraceInfo], Optional[str]]:
|
1867
|
+
"""
|
1868
|
+
Return traces that match the given list of search expressions within the experiments.
|
1869
|
+
|
1870
|
+
Args:
|
1871
|
+
experiment_ids: List of experiment ids to scope the search.
|
1872
|
+
filter_string: A search filter string. Supported filter keys are `name`,
|
1873
|
+
`status`, `timestamp_ms` and `tags`.
|
1874
|
+
max_results: Maximum number of traces desired.
|
1875
|
+
order_by: List of order_by clauses. Supported sort key is `timestamp_ms`. By default
|
1876
|
+
we sort by timestamp_ms DESC.
|
1877
|
+
page_token: Token specifying the next page of results. It should be obtained from
|
1878
|
+
a ``search_traces`` call.
|
1879
|
+
model_id: If specified, return traces associated with the model ID.
|
1880
|
+
sql_warehouse_id: Only used in Databricks. The ID of the SQL warehouse to use for
|
1881
|
+
searching traces in inference tables.
|
1882
|
+
|
1883
|
+
Returns:
|
1884
|
+
A tuple of a list of :py:class:`TraceInfo <mlflow.entities.TraceInfo>` objects that
|
1885
|
+
satisfy the search expressions and a pagination token for the next page of results.
|
1886
|
+
If the underlying tracking store supports pagination, the token for the
|
1887
|
+
next page may be obtained via the ``token`` attribute of the returned object; however,
|
1888
|
+
some store implementations may not support pagination and thus the returned token would
|
1889
|
+
not be meaningful in such cases.
|
1890
|
+
"""
|
1891
|
+
if max_results > SEARCH_MAX_RESULTS_THRESHOLD:
|
1892
|
+
raise MlflowException(
|
1893
|
+
"Invalid value for request parameter max_results. It must be at "
|
1894
|
+
f"most {SEARCH_MAX_RESULTS_THRESHOLD}, but got value {max_results}",
|
1895
|
+
INVALID_PARAMETER_VALUE,
|
1896
|
+
)
|
1897
|
+
traces = []
|
1898
|
+
for experiment_id in experiment_ids:
|
1899
|
+
trace_infos = self._list_trace_infos(experiment_id)
|
1900
|
+
traces.extend(trace_infos)
|
1901
|
+
filtered = SearchTraceUtils.filter(traces, filter_string)
|
1902
|
+
sorted_traces = SearchTraceUtils.sort(filtered, order_by)
|
1903
|
+
traces, next_page_token = SearchTraceUtils.paginate(sorted_traces, page_token, max_results)
|
1904
|
+
return traces, next_page_token
|
1905
|
+
|
1906
|
+
def _list_trace_infos(self, experiment_id):
|
1907
|
+
experiment_path = self._get_experiment_path(experiment_id, assert_exists=True)
|
1908
|
+
traces_path = os.path.join(experiment_path, FileStore.TRACES_FOLDER_NAME)
|
1909
|
+
if not os.path.exists(traces_path):
|
1910
|
+
return []
|
1911
|
+
trace_paths = list_all(traces_path, lambda x: os.path.isdir(x), full_path=True)
|
1912
|
+
trace_infos = []
|
1913
|
+
for trace_path in trace_paths:
|
1914
|
+
try:
|
1915
|
+
if trace_info := self._get_trace_info_from_dir(trace_path):
|
1916
|
+
trace_infos.append(trace_info)
|
1917
|
+
except MissingConfigException as e:
|
1918
|
+
# trap malformed trace exception and log warning
|
1919
|
+
trace_id = os.path.basename(trace_path)
|
1920
|
+
logging.warning(
|
1921
|
+
f"Malformed trace with ID '{trace_id}'. Detailed error {e}",
|
1922
|
+
exc_info=_logger.isEnabledFor(logging.DEBUG),
|
1923
|
+
)
|
1924
|
+
return trace_infos
|
1925
|
+
|
1926
|
+
def create_logged_model(
|
1927
|
+
self,
|
1928
|
+
experiment_id: str = DEFAULT_EXPERIMENT_ID,
|
1929
|
+
name: Optional[str] = None,
|
1930
|
+
source_run_id: Optional[str] = None,
|
1931
|
+
tags: Optional[list[LoggedModelTag]] = None,
|
1932
|
+
params: Optional[list[LoggedModelParameter]] = None,
|
1933
|
+
model_type: Optional[str] = None,
|
1934
|
+
) -> LoggedModel:
|
1935
|
+
"""
|
1936
|
+
Create a new logged model.
|
1937
|
+
|
1938
|
+
Args:
|
1939
|
+
experiment_id: ID of the experiment to which the model belongs.
|
1940
|
+
name: Name of the model. If not specified, a random name will be generated.
|
1941
|
+
source_run_id: ID of the run that produced the model.
|
1942
|
+
tags: Tags to set on the model.
|
1943
|
+
params: Parameters to set on the model.
|
1944
|
+
model_type: Type of the model.
|
1945
|
+
|
1946
|
+
Returns:
|
1947
|
+
The created model.
|
1948
|
+
"""
|
1949
|
+
_validate_logged_model_name(name)
|
1950
|
+
experiment = self.get_experiment(experiment_id)
|
1951
|
+
if experiment is None:
|
1952
|
+
raise MlflowException(
|
1953
|
+
f"Could not create model under experiment with ID {experiment_id} - no such "
|
1954
|
+
"experiment exists." % experiment_id,
|
1955
|
+
databricks_pb2.RESOURCE_DOES_NOT_EXIST,
|
1956
|
+
)
|
1957
|
+
if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
|
1958
|
+
raise MlflowException(
|
1959
|
+
f"Could not create model under non-active experiment with ID {experiment_id}.",
|
1960
|
+
databricks_pb2.INVALID_STATE,
|
1961
|
+
)
|
1962
|
+
for param in params or []:
|
1963
|
+
_validate_param(param.key, param.value)
|
1964
|
+
|
1965
|
+
name = name or _generate_random_name()
|
1966
|
+
model_id = f"m-{str(uuid.uuid4()).replace('-', '')}"
|
1967
|
+
artifact_location = self._get_model_artifact_dir(experiment_id, model_id)
|
1968
|
+
creation_timestamp = int(time.time() * 1000)
|
1969
|
+
model = LoggedModel(
|
1970
|
+
experiment_id=experiment_id,
|
1971
|
+
model_id=model_id,
|
1972
|
+
name=name,
|
1973
|
+
artifact_location=artifact_location,
|
1974
|
+
creation_timestamp=creation_timestamp,
|
1975
|
+
last_updated_timestamp=creation_timestamp,
|
1976
|
+
source_run_id=source_run_id,
|
1977
|
+
status=LoggedModelStatus.PENDING,
|
1978
|
+
tags=tags,
|
1979
|
+
params=params,
|
1980
|
+
model_type=model_type,
|
1981
|
+
)
|
1982
|
+
|
1983
|
+
# Persist model metadata and create directories for logging metrics, tags
|
1984
|
+
model_dir = self._get_model_dir(experiment_id, model_id)
|
1985
|
+
mkdir(model_dir)
|
1986
|
+
model_info_dict: dict[str, Any] = self._make_persisted_model_dict(model)
|
1987
|
+
model_info_dict["lifecycle_stage"] = LifecycleStage.ACTIVE
|
1988
|
+
write_yaml(model_dir, FileStore.META_DATA_FILE_NAME, model_info_dict)
|
1989
|
+
mkdir(model_dir, FileStore.METRICS_FOLDER_NAME)
|
1990
|
+
mkdir(model_dir, FileStore.PARAMS_FOLDER_NAME)
|
1991
|
+
self.log_logged_model_params(model_id=model_id, params=params or [])
|
1992
|
+
self.set_logged_model_tags(model_id=model_id, tags=tags or [])
|
1993
|
+
|
1994
|
+
return self.get_logged_model(model_id=model_id)
|
1995
|
+
|
1996
|
+
def log_logged_model_params(self, model_id: str, params: list[LoggedModelParameter]):
|
1997
|
+
"""
|
1998
|
+
Set parameters on the specified logged model.
|
1999
|
+
|
2000
|
+
Args:
|
2001
|
+
model_id: ID of the model.
|
2002
|
+
params: Parameters to set on the model.
|
2003
|
+
|
2004
|
+
Returns:
|
2005
|
+
None
|
2006
|
+
"""
|
2007
|
+
for param in params or []:
|
2008
|
+
_validate_param(param.key, param.value)
|
2009
|
+
|
2010
|
+
model = self.get_logged_model(model_id)
|
2011
|
+
for param in params:
|
2012
|
+
param_path = os.path.join(
|
2013
|
+
self._get_model_dir(model.experiment_id, model.model_id),
|
2014
|
+
FileStore.PARAMS_FOLDER_NAME,
|
2015
|
+
param.key,
|
2016
|
+
)
|
2017
|
+
make_containing_dirs(param_path)
|
2018
|
+
# Don't add trailing newline
|
2019
|
+
write_to(param_path, self._writeable_value(param.value))
|
2020
|
+
|
2021
|
+
def finalize_logged_model(self, model_id: str, status: LoggedModelStatus) -> LoggedModel:
|
2022
|
+
"""
|
2023
|
+
Finalize a model by updating its status.
|
2024
|
+
|
2025
|
+
Args:
|
2026
|
+
model_id: ID of the model to finalize.
|
2027
|
+
status: Final status to set on the model.
|
2028
|
+
|
2029
|
+
Returns:
|
2030
|
+
The updated model.
|
2031
|
+
"""
|
2032
|
+
model_dict = self._get_model_dict(model_id)
|
2033
|
+
model = LoggedModel.from_dictionary(model_dict)
|
2034
|
+
model.status = status
|
2035
|
+
model.last_updated_timestamp = int(time.time() * 1000)
|
2036
|
+
model_dir = self._get_model_dir(model.experiment_id, model.model_id)
|
2037
|
+
model_info_dict = self._make_persisted_model_dict(model)
|
2038
|
+
write_yaml(model_dir, FileStore.META_DATA_FILE_NAME, model_info_dict, overwrite=True)
|
2039
|
+
return self.get_logged_model(model_id)
|
2040
|
+
|
2041
|
+
def set_logged_model_tags(self, model_id: str, tags: list[LoggedModelTag]) -> None:
|
2042
|
+
"""
|
2043
|
+
Set tags on the specified logged model.
|
2044
|
+
|
2045
|
+
Args:
|
2046
|
+
model_id: ID of the model.
|
2047
|
+
tags: Tags to set on the model.
|
2048
|
+
|
2049
|
+
Returns:
|
2050
|
+
None
|
2051
|
+
"""
|
2052
|
+
model = self.get_logged_model(model_id)
|
2053
|
+
for tag in tags:
|
2054
|
+
_validate_tag_name(tag.key)
|
2055
|
+
tag_path = os.path.join(
|
2056
|
+
self._get_model_dir(model.experiment_id, model.model_id),
|
2057
|
+
FileStore.TAGS_FOLDER_NAME,
|
2058
|
+
tag.key,
|
2059
|
+
)
|
2060
|
+
make_containing_dirs(tag_path)
|
2061
|
+
# Don't add trailing newline
|
2062
|
+
write_to(tag_path, self._writeable_value(tag.value))
|
2063
|
+
|
2064
|
+
def delete_logged_model_tag(self, model_id: str, key: str) -> None:
|
2065
|
+
"""
|
2066
|
+
Delete a tag on the specified logged model.
|
2067
|
+
|
2068
|
+
Args:
|
2069
|
+
model_id: ID of the model.
|
2070
|
+
key: The string key of the tag.
|
2071
|
+
|
2072
|
+
Returns:
|
2073
|
+
None
|
2074
|
+
"""
|
2075
|
+
_validate_tag_name(key)
|
2076
|
+
model = self.get_logged_model(model_id)
|
2077
|
+
tag_path = os.path.join(
|
2078
|
+
self._get_model_dir(model.experiment_id, model.model_id),
|
2079
|
+
FileStore.TAGS_FOLDER_NAME,
|
2080
|
+
key,
|
2081
|
+
)
|
2082
|
+
if not exists(tag_path):
|
2083
|
+
raise MlflowException(
|
2084
|
+
f"No tag with key {key!r} found for model with ID {model_id!r}.",
|
2085
|
+
RESOURCE_DOES_NOT_EXIST,
|
2086
|
+
)
|
2087
|
+
os.remove(tag_path)
|
2088
|
+
|
2089
|
+
def get_logged_model(self, model_id: str) -> LoggedModel:
|
2090
|
+
"""
|
2091
|
+
Fetch the logged model with the specified ID.
|
2092
|
+
|
2093
|
+
Args:
|
2094
|
+
model_id: ID of the model to fetch.
|
2095
|
+
|
2096
|
+
Returns:
|
2097
|
+
The fetched model.
|
2098
|
+
"""
|
2099
|
+
return LoggedModel.from_dictionary(self._get_model_dict(model_id))
|
2100
|
+
|
2101
|
+
def delete_logged_model(self, model_id: str) -> None:
|
2102
|
+
model = self.get_logged_model(model_id)
|
2103
|
+
model_dict = self._make_persisted_model_dict(model)
|
2104
|
+
model_dict["lifecycle_stage"] = LifecycleStage.DELETED
|
2105
|
+
model_dir = self._get_model_dir(model.experiment_id, model.model_id)
|
2106
|
+
write_yaml(
|
2107
|
+
model_dir,
|
2108
|
+
FileStore.META_DATA_FILE_NAME,
|
2109
|
+
model_dict,
|
2110
|
+
overwrite=True,
|
2111
|
+
)
|
2112
|
+
|
2113
|
+
def _get_model_artifact_dir(self, experiment_id: str, model_id: str) -> str:
|
2114
|
+
return append_to_uri_path(
|
2115
|
+
self.get_experiment(experiment_id).artifact_location,
|
2116
|
+
FileStore.MODELS_FOLDER_NAME,
|
2117
|
+
model_id,
|
2118
|
+
FileStore.ARTIFACTS_FOLDER_NAME,
|
2119
|
+
)
|
2120
|
+
|
2121
|
+
def _make_persisted_model_dict(self, model: LoggedModel) -> dict[str, Any]:
|
2122
|
+
model_dict = model.to_dictionary()
|
2123
|
+
for field in ("tags", "params", "metrics"):
|
2124
|
+
model_dict.pop(field, None)
|
2125
|
+
return model_dict
|
2126
|
+
|
2127
|
+
def _get_model_dict(self, model_id: str) -> dict[str, Any]:
|
2128
|
+
exp_id, model_dir = self._find_model_root(model_id)
|
2129
|
+
if model_dir is None:
|
2130
|
+
raise MlflowException(
|
2131
|
+
f"Model '{model_id}' not found", databricks_pb2.RESOURCE_DOES_NOT_EXIST
|
2132
|
+
)
|
2133
|
+
model_dict: dict[str, Any] = self._get_model_info_from_dir(model_dir)
|
2134
|
+
if model_dict.get("lifecycle_stage") == LifecycleStage.DELETED:
|
2135
|
+
raise MlflowException(
|
2136
|
+
f"Model '{model_id}' not found", databricks_pb2.RESOURCE_DOES_NOT_EXIST
|
2137
|
+
)
|
2138
|
+
|
2139
|
+
if model_dict["experiment_id"] != exp_id:
|
2140
|
+
raise MlflowException(
|
2141
|
+
f"Model '{model_id}' metadata is in invalid state.", databricks_pb2.INVALID_STATE
|
2142
|
+
)
|
2143
|
+
return model_dict
|
2144
|
+
|
2145
|
+
def _get_model_dir(self, experiment_id: str, model_id: str) -> str:
|
2146
|
+
if not self._has_experiment(experiment_id):
|
2147
|
+
return None
|
2148
|
+
return os.path.join(
|
2149
|
+
self._get_experiment_path(experiment_id, assert_exists=True),
|
2150
|
+
FileStore.MODELS_FOLDER_NAME,
|
2151
|
+
model_id,
|
2152
|
+
)
|
2153
|
+
|
2154
|
+
def _find_model_root(self, model_id):
|
2155
|
+
self._check_root_dir()
|
2156
|
+
all_experiments = self._get_active_experiments(False) + self._get_deleted_experiments(False)
|
2157
|
+
for experiment_dir in all_experiments:
|
2158
|
+
models_dir_path = os.path.join(
|
2159
|
+
self.root_directory, experiment_dir, FileStore.MODELS_FOLDER_NAME
|
2160
|
+
)
|
2161
|
+
if not os.path.exists(models_dir_path):
|
2162
|
+
continue
|
2163
|
+
models = find(models_dir_path, model_id, full_path=True)
|
2164
|
+
if len(models) == 0:
|
2165
|
+
continue
|
2166
|
+
return os.path.basename(os.path.dirname(os.path.abspath(models_dir_path))), models[0]
|
2167
|
+
return None, None
|
2168
|
+
|
2169
|
+
def _get_model_from_dir(self, model_dir: str) -> LoggedModel:
|
2170
|
+
return LoggedModel.from_dictionary(self._get_model_info_from_dir(model_dir))
|
2171
|
+
|
2172
|
+
def _get_model_info_from_dir(self, model_dir: str) -> dict[str, Any]:
|
2173
|
+
model_dict = FileStore._read_yaml(model_dir, FileStore.META_DATA_FILE_NAME)
|
2174
|
+
model_dict["tags"] = self._get_all_model_tags(model_dir)
|
2175
|
+
model_dict["params"] = {p.key: p.value for p in self._get_all_model_params(model_dir)}
|
2176
|
+
model_dict["metrics"] = self._get_all_model_metrics(
|
2177
|
+
model_id=model_dict["model_id"], model_dir=model_dir
|
2178
|
+
)
|
2179
|
+
return model_dict
|
2180
|
+
|
2181
|
+
def _get_all_model_tags(self, model_dir: str) -> list[LoggedModelTag]:
|
2182
|
+
parent_path, tag_files = self._get_resource_files(model_dir, FileStore.TAGS_FOLDER_NAME)
|
2183
|
+
tags = []
|
2184
|
+
for tag_file in tag_files:
|
2185
|
+
tags.append(self._get_tag_from_file(parent_path, tag_file))
|
2186
|
+
return tags
|
2187
|
+
|
2188
|
+
def _get_all_model_params(self, model_dir: str) -> list[LoggedModelParameter]:
|
2189
|
+
parent_path, param_files = self._get_resource_files(model_dir, FileStore.PARAMS_FOLDER_NAME)
|
2190
|
+
return [self._get_param_from_file(parent_path, param_file) for param_file in param_files]
|
2191
|
+
|
2192
|
+
def _get_all_model_metrics(self, model_id: str, model_dir: str) -> list[Metric]:
|
2193
|
+
parent_path, metric_files = self._get_resource_files(
|
2194
|
+
model_dir, FileStore.METRICS_FOLDER_NAME
|
2195
|
+
)
|
2196
|
+
metrics = []
|
2197
|
+
for metric_file in metric_files:
|
2198
|
+
metrics.extend(
|
2199
|
+
FileStore._get_model_metrics_from_file(
|
2200
|
+
model_id=model_id, parent_path=parent_path, metric_name=metric_file
|
2201
|
+
)
|
2202
|
+
)
|
2203
|
+
return metrics
|
2204
|
+
|
2205
|
+
@staticmethod
|
2206
|
+
def _get_model_metrics_from_file(
|
2207
|
+
model_id: str, parent_path: str, metric_name: str
|
2208
|
+
) -> list[Metric]:
|
2209
|
+
_validate_metric_name(metric_name)
|
2210
|
+
metric_objs = [
|
2211
|
+
FileStore._get_model_metric_from_line(model_id, metric_name, line)
|
2212
|
+
for line in read_file_lines(parent_path, metric_name)
|
2213
|
+
]
|
2214
|
+
if len(metric_objs) == 0:
|
2215
|
+
raise ValueError(f"Metric '{metric_name}' is malformed. No data found.")
|
2216
|
+
|
2217
|
+
# Group metrics by (dataset_name, dataset_digest)
|
2218
|
+
grouped_metrics = defaultdict(list)
|
2219
|
+
for metric in metric_objs:
|
2220
|
+
key = (metric.dataset_name, metric.dataset_digest)
|
2221
|
+
grouped_metrics[key].append(metric)
|
2222
|
+
|
2223
|
+
# Compute the max for each group
|
2224
|
+
return [
|
2225
|
+
max(group, key=lambda m: (m.step, m.timestamp, m.value))
|
2226
|
+
for group in grouped_metrics.values()
|
2227
|
+
]
|
2228
|
+
|
2229
|
+
@staticmethod
|
2230
|
+
def _get_model_metric_from_line(model_id: str, metric_name: str, metric_line: str) -> Metric:
|
2231
|
+
metric_parts = metric_line.strip().split(" ")
|
2232
|
+
if len(metric_parts) not in [4, 6]:
|
2233
|
+
raise MlflowException(
|
2234
|
+
f"Metric '{metric_name}' is malformed; persisted metric data contained "
|
2235
|
+
f"{len(metric_parts)} fields. Expected 4 or 6 fields.",
|
2236
|
+
databricks_pb2.INTERNAL_ERROR,
|
2237
|
+
)
|
2238
|
+
ts = int(metric_parts[0])
|
2239
|
+
val = float(metric_parts[1])
|
2240
|
+
step = int(metric_parts[2])
|
2241
|
+
run_id = str(metric_parts[3])
|
2242
|
+
dataset_name = str(metric_parts[4]) if len(metric_parts) == 6 else None
|
2243
|
+
dataset_digest = str(metric_parts[5]) if len(metric_parts) == 6 else None
|
2244
|
+
# TODO: Read run ID from the metric file and pass it to the Metric constructor
|
2245
|
+
return Metric(
|
2246
|
+
key=metric_name,
|
2247
|
+
value=val,
|
2248
|
+
timestamp=ts,
|
2249
|
+
step=step,
|
2250
|
+
model_id=model_id,
|
2251
|
+
dataset_name=dataset_name,
|
2252
|
+
dataset_digest=dataset_digest,
|
2253
|
+
run_id=run_id,
|
2254
|
+
)
|
2255
|
+
|
2256
|
+
def search_logged_models(
|
2257
|
+
self,
|
2258
|
+
experiment_ids: list[str],
|
2259
|
+
filter_string: Optional[str] = None,
|
2260
|
+
datasets: Optional[list[DatasetFilter]] = None,
|
2261
|
+
max_results: Optional[int] = None,
|
2262
|
+
order_by: Optional[list[dict[str, Any]]] = None,
|
2263
|
+
page_token: Optional[str] = None,
|
2264
|
+
) -> PagedList[LoggedModel]:
|
2265
|
+
"""
|
2266
|
+
Search for logged models that match the specified search criteria.
|
2267
|
+
|
2268
|
+
Args:
|
2269
|
+
experiment_ids: List of experiment ids to scope the search.
|
2270
|
+
filter_string: A search filter string.
|
2271
|
+
datasets: List of dictionaries to specify datasets on which to apply metrics filters.
|
2272
|
+
The following fields are supported:
|
2273
|
+
|
2274
|
+
dataset_name (str): Required. Name of the dataset.
|
2275
|
+
dataset_digest (str): Optional. Digest of the dataset.
|
2276
|
+
max_results: Maximum number of logged models desired. Default is 100.
|
2277
|
+
order_by: List of dictionaries to specify the ordering of the search results.
|
2278
|
+
The following fields are supported:
|
2279
|
+
|
2280
|
+
field_name (str): Required. Name of the field to order by, e.g. "metrics.accuracy".
|
2281
|
+
ascending: (bool): Optional. Whether the order is ascending or not.
|
2282
|
+
dataset_name: (str): Optional. If ``field_name`` refers to a metric, this field
|
2283
|
+
specifies the name of the dataset associated with the metric. Only metrics
|
2284
|
+
associated with the specified dataset name will be considered for ordering.
|
2285
|
+
This field may only be set if ``field_name`` refers to a metric.
|
2286
|
+
dataset_digest (str): Optional. If ``field_name`` refers to a metric, this field
|
2287
|
+
specifies the digest of the dataset associated with the metric. Only metrics
|
2288
|
+
associated with the specified dataset name and digest will be considered for
|
2289
|
+
ordering. This field may only be set if ``dataset_name`` is also set.
|
2290
|
+
page_token: Token specifying the next page of results.
|
2291
|
+
|
2292
|
+
Returns:
|
2293
|
+
A :py:class:`PagedList <mlflow.store.entities.PagedList>` of
|
2294
|
+
:py:class:`LoggedModel <mlflow.entities.LoggedModel>` objects.
|
2295
|
+
"""
|
2296
|
+
if datasets and not all(d.get("dataset_name") for d in datasets):
|
2297
|
+
raise MlflowException(
|
2298
|
+
"`dataset_name` in the `datasets` clause must be specified.",
|
2299
|
+
INVALID_PARAMETER_VALUE,
|
2300
|
+
)
|
2301
|
+
max_results = max_results or SEARCH_LOGGED_MODEL_MAX_RESULTS_DEFAULT
|
2302
|
+
all_models = []
|
2303
|
+
for experiment_id in experiment_ids:
|
2304
|
+
models = self._list_models(experiment_id)
|
2305
|
+
all_models.extend(models)
|
2306
|
+
filtered = SearchLoggedModelsUtils.filter_logged_models(all_models, filter_string, datasets)
|
2307
|
+
sorted_logged_models = SearchLoggedModelsUtils.sort(filtered, order_by)
|
2308
|
+
logged_models, next_page_token = SearchLoggedModelsUtils.paginate(
|
2309
|
+
sorted_logged_models, page_token, max_results
|
2310
|
+
)
|
2311
|
+
return PagedList(logged_models, next_page_token)
|
2312
|
+
|
2313
|
+
def _list_models(self, experiment_id: str) -> list[LoggedModel]:
|
2314
|
+
self._check_root_dir()
|
2315
|
+
if not self._has_experiment(experiment_id):
|
2316
|
+
return []
|
2317
|
+
experiment_dir = self._get_experiment_path(experiment_id, assert_exists=True)
|
2318
|
+
models_folder = os.path.join(experiment_dir, FileStore.MODELS_FOLDER_NAME)
|
2319
|
+
if not exists(models_folder):
|
2320
|
+
return []
|
2321
|
+
model_dirs = list_all(
|
2322
|
+
models_folder,
|
2323
|
+
filter_func=lambda x: all(
|
2324
|
+
os.path.basename(os.path.normpath(x)) != reservedFolderName
|
2325
|
+
for reservedFolderName in FileStore.RESERVED_EXPERIMENT_FOLDERS
|
2326
|
+
)
|
2327
|
+
and os.path.isdir(x),
|
2328
|
+
full_path=True,
|
2329
|
+
)
|
2330
|
+
models = []
|
2331
|
+
for m_dir in model_dirs:
|
2332
|
+
try:
|
2333
|
+
# trap and warn known issues, will raise unexpected exceptions to caller
|
2334
|
+
m_dict = self._get_model_info_from_dir(m_dir)
|
2335
|
+
if m_dict.get("lifecycle_stage") == LifecycleStage.DELETED:
|
2336
|
+
continue
|
2337
|
+
model = LoggedModel.from_dictionary(m_dict)
|
2338
|
+
if model.experiment_id != experiment_id:
|
2339
|
+
logging.warning(
|
2340
|
+
"Wrong experiment ID (%s) recorded for model '%s'. "
|
2341
|
+
"It should be %s. Model will be ignored.",
|
2342
|
+
str(model.experiment_id),
|
2343
|
+
str(model.model_id),
|
2344
|
+
str(experiment_id),
|
2345
|
+
exc_info=True,
|
2346
|
+
)
|
2347
|
+
continue
|
2348
|
+
models.append(model)
|
2349
|
+
except MissingConfigException as exc:
|
2350
|
+
# trap malformed model exception and log
|
2351
|
+
# this is at debug level because if the same store is used for
|
2352
|
+
# artifact storage, it's common the folder is not a run folder
|
2353
|
+
m_id = os.path.basename(m_dir)
|
2354
|
+
logging.debug(
|
2355
|
+
"Malformed model '%s'. Detailed error %s", m_id, str(exc), exc_info=True
|
2356
|
+
)
|
2357
|
+
return models
|
2358
|
+
|
2359
|
+
#######################################################################################
|
2360
|
+
# Below are legacy V2 Tracing APIs. DO NOT USE. Use the V3 APIs instead.
|
2361
|
+
#######################################################################################
|
2362
|
+
def deprecated_start_trace_v2(
|
2363
|
+
self,
|
2364
|
+
experiment_id: str,
|
2365
|
+
timestamp_ms: int,
|
2366
|
+
request_metadata: dict[str, str],
|
2367
|
+
tags: dict[str, str],
|
2368
|
+
) -> TraceInfoV2:
|
2369
|
+
"""
|
2370
|
+
DEPRECATED. DO NOT USE.
|
2371
|
+
|
2372
|
+
Start an initial TraceInfo object in the backend store.
|
2373
|
+
|
2374
|
+
Args:
|
2375
|
+
experiment_id: String id of the experiment for this run.
|
2376
|
+
timestamp_ms: Start time of the trace, in milliseconds since the UNIX epoch.
|
2377
|
+
request_metadata: Metadata of the trace.
|
2378
|
+
tags: Tags of the trace.
|
2379
|
+
|
2380
|
+
Returns:
|
2381
|
+
The created TraceInfo object.
|
2382
|
+
"""
|
2383
|
+
request_id = generate_request_id_v2()
|
2384
|
+
_validate_experiment_id(experiment_id)
|
2385
|
+
experiment_dir = self._get_experiment_path(
|
2386
|
+
experiment_id, view_type=ViewType.ACTIVE_ONLY, assert_exists=True
|
2387
|
+
)
|
2388
|
+
mkdir(experiment_dir, FileStore.TRACES_FOLDER_NAME)
|
2389
|
+
traces_dir = os.path.join(experiment_dir, FileStore.TRACES_FOLDER_NAME)
|
2390
|
+
mkdir(traces_dir, request_id)
|
2391
|
+
trace_dir = os.path.join(traces_dir, request_id)
|
2392
|
+
artifact_uri = self._get_traces_artifact_dir(experiment_id, request_id)
|
2393
|
+
tags.update({MLFLOW_ARTIFACT_LOCATION: artifact_uri})
|
2394
|
+
trace_info = TraceInfoV2(
|
2395
|
+
request_id=request_id,
|
2396
|
+
experiment_id=experiment_id,
|
2397
|
+
timestamp_ms=timestamp_ms,
|
2398
|
+
execution_time_ms=None,
|
2399
|
+
status=TraceStatus.IN_PROGRESS,
|
2400
|
+
request_metadata=request_metadata,
|
2401
|
+
tags=tags,
|
2402
|
+
)
|
2403
|
+
self._save_trace_info(trace_info.to_v3(), trace_dir)
|
2404
|
+
return trace_info
|
2405
|
+
|
2406
|
+
def deprecated_end_trace_v2(
|
2407
|
+
self,
|
2408
|
+
request_id: str,
|
2409
|
+
timestamp_ms: int,
|
2410
|
+
status: TraceStatus,
|
2411
|
+
request_metadata: dict[str, str],
|
2412
|
+
tags: dict[str, str],
|
2413
|
+
) -> TraceInfoV2:
|
2414
|
+
"""
|
2415
|
+
DEPRECATED. DO NOT USE.
|
2416
|
+
|
2417
|
+
Update the TraceInfo object in the backend store with the completed trace info.
|
2418
|
+
|
2419
|
+
Args:
|
2420
|
+
request_id : Unique string identifier of the trace.
|
2421
|
+
timestamp_ms: End time of the trace, in milliseconds. The execution time field
|
2422
|
+
in the TraceInfo will be calculated by subtracting the start time from this.
|
2423
|
+
status: Status of the trace.
|
2424
|
+
request_metadata: Metadata of the trace. This will be merged with the existing
|
2425
|
+
metadata logged during the start_trace call.
|
2426
|
+
tags: Tags of the trace. This will be merged with the existing tags logged
|
2427
|
+
during the start_trace or set_trace_tag calls.
|
2428
|
+
|
2429
|
+
Returns:
|
2430
|
+
The updated TraceInfo object.
|
2431
|
+
"""
|
2432
|
+
trace_info, trace_dir = self._get_trace_info_and_dir(request_id)
|
2433
|
+
trace_info.execution_duration = timestamp_ms - trace_info.request_time
|
2434
|
+
trace_info.state = status.to_state()
|
2435
|
+
trace_info.trace_metadata.update(request_metadata)
|
2436
|
+
trace_info.tags.update(tags)
|
2437
|
+
self._save_trace_info(trace_info, trace_dir, overwrite=True)
|
2438
|
+
return TraceInfoV2.from_v3(trace_info)
|