genesis-flow 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genesis_flow-1.0.0.dist-info/METADATA +822 -0
- genesis_flow-1.0.0.dist-info/RECORD +645 -0
- genesis_flow-1.0.0.dist-info/WHEEL +5 -0
- genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
- genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
- genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
- mlflow/__init__.py +367 -0
- mlflow/__main__.py +3 -0
- mlflow/ag2/__init__.py +56 -0
- mlflow/ag2/ag2_logger.py +294 -0
- mlflow/anthropic/__init__.py +40 -0
- mlflow/anthropic/autolog.py +129 -0
- mlflow/anthropic/chat.py +144 -0
- mlflow/artifacts/__init__.py +268 -0
- mlflow/autogen/__init__.py +144 -0
- mlflow/autogen/chat.py +142 -0
- mlflow/azure/__init__.py +26 -0
- mlflow/azure/auth_handler.py +257 -0
- mlflow/azure/client.py +319 -0
- mlflow/azure/config.py +120 -0
- mlflow/azure/connection_factory.py +340 -0
- mlflow/azure/exceptions.py +27 -0
- mlflow/azure/stores.py +327 -0
- mlflow/azure/utils.py +183 -0
- mlflow/bedrock/__init__.py +45 -0
- mlflow/bedrock/_autolog.py +202 -0
- mlflow/bedrock/chat.py +122 -0
- mlflow/bedrock/stream.py +160 -0
- mlflow/bedrock/utils.py +43 -0
- mlflow/cli.py +707 -0
- mlflow/client.py +12 -0
- mlflow/config/__init__.py +56 -0
- mlflow/crewai/__init__.py +79 -0
- mlflow/crewai/autolog.py +253 -0
- mlflow/crewai/chat.py +29 -0
- mlflow/data/__init__.py +75 -0
- mlflow/data/artifact_dataset_sources.py +170 -0
- mlflow/data/code_dataset_source.py +40 -0
- mlflow/data/dataset.py +123 -0
- mlflow/data/dataset_registry.py +168 -0
- mlflow/data/dataset_source.py +110 -0
- mlflow/data/dataset_source_registry.py +219 -0
- mlflow/data/delta_dataset_source.py +167 -0
- mlflow/data/digest_utils.py +108 -0
- mlflow/data/evaluation_dataset.py +562 -0
- mlflow/data/filesystem_dataset_source.py +81 -0
- mlflow/data/http_dataset_source.py +145 -0
- mlflow/data/huggingface_dataset.py +258 -0
- mlflow/data/huggingface_dataset_source.py +118 -0
- mlflow/data/meta_dataset.py +104 -0
- mlflow/data/numpy_dataset.py +223 -0
- mlflow/data/pandas_dataset.py +231 -0
- mlflow/data/polars_dataset.py +352 -0
- mlflow/data/pyfunc_dataset_mixin.py +31 -0
- mlflow/data/schema.py +76 -0
- mlflow/data/sources.py +1 -0
- mlflow/data/spark_dataset.py +406 -0
- mlflow/data/spark_dataset_source.py +74 -0
- mlflow/data/spark_delta_utils.py +118 -0
- mlflow/data/tensorflow_dataset.py +350 -0
- mlflow/data/uc_volume_dataset_source.py +81 -0
- mlflow/db.py +27 -0
- mlflow/dspy/__init__.py +17 -0
- mlflow/dspy/autolog.py +197 -0
- mlflow/dspy/callback.py +398 -0
- mlflow/dspy/constant.py +1 -0
- mlflow/dspy/load.py +93 -0
- mlflow/dspy/save.py +393 -0
- mlflow/dspy/util.py +109 -0
- mlflow/dspy/wrapper.py +226 -0
- mlflow/entities/__init__.py +104 -0
- mlflow/entities/_mlflow_object.py +52 -0
- mlflow/entities/assessment.py +545 -0
- mlflow/entities/assessment_error.py +80 -0
- mlflow/entities/assessment_source.py +141 -0
- mlflow/entities/dataset.py +92 -0
- mlflow/entities/dataset_input.py +51 -0
- mlflow/entities/dataset_summary.py +62 -0
- mlflow/entities/document.py +48 -0
- mlflow/entities/experiment.py +109 -0
- mlflow/entities/experiment_tag.py +35 -0
- mlflow/entities/file_info.py +45 -0
- mlflow/entities/input_tag.py +35 -0
- mlflow/entities/lifecycle_stage.py +35 -0
- mlflow/entities/logged_model.py +228 -0
- mlflow/entities/logged_model_input.py +26 -0
- mlflow/entities/logged_model_output.py +32 -0
- mlflow/entities/logged_model_parameter.py +46 -0
- mlflow/entities/logged_model_status.py +74 -0
- mlflow/entities/logged_model_tag.py +33 -0
- mlflow/entities/metric.py +200 -0
- mlflow/entities/model_registry/__init__.py +29 -0
- mlflow/entities/model_registry/_model_registry_entity.py +13 -0
- mlflow/entities/model_registry/model_version.py +243 -0
- mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
- mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
- mlflow/entities/model_registry/model_version_search.py +25 -0
- mlflow/entities/model_registry/model_version_stages.py +25 -0
- mlflow/entities/model_registry/model_version_status.py +35 -0
- mlflow/entities/model_registry/model_version_tag.py +35 -0
- mlflow/entities/model_registry/prompt.py +73 -0
- mlflow/entities/model_registry/prompt_version.py +244 -0
- mlflow/entities/model_registry/registered_model.py +175 -0
- mlflow/entities/model_registry/registered_model_alias.py +35 -0
- mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
- mlflow/entities/model_registry/registered_model_search.py +25 -0
- mlflow/entities/model_registry/registered_model_tag.py +35 -0
- mlflow/entities/multipart_upload.py +74 -0
- mlflow/entities/param.py +49 -0
- mlflow/entities/run.py +97 -0
- mlflow/entities/run_data.py +84 -0
- mlflow/entities/run_info.py +188 -0
- mlflow/entities/run_inputs.py +59 -0
- mlflow/entities/run_outputs.py +43 -0
- mlflow/entities/run_status.py +41 -0
- mlflow/entities/run_tag.py +36 -0
- mlflow/entities/source_type.py +31 -0
- mlflow/entities/span.py +774 -0
- mlflow/entities/span_event.py +96 -0
- mlflow/entities/span_status.py +102 -0
- mlflow/entities/trace.py +317 -0
- mlflow/entities/trace_data.py +71 -0
- mlflow/entities/trace_info.py +220 -0
- mlflow/entities/trace_info_v2.py +162 -0
- mlflow/entities/trace_location.py +173 -0
- mlflow/entities/trace_state.py +39 -0
- mlflow/entities/trace_status.py +68 -0
- mlflow/entities/view_type.py +51 -0
- mlflow/environment_variables.py +866 -0
- mlflow/evaluation/__init__.py +16 -0
- mlflow/evaluation/assessment.py +369 -0
- mlflow/evaluation/evaluation.py +411 -0
- mlflow/evaluation/evaluation_tag.py +61 -0
- mlflow/evaluation/fluent.py +48 -0
- mlflow/evaluation/utils.py +201 -0
- mlflow/exceptions.py +213 -0
- mlflow/experiments.py +140 -0
- mlflow/gemini/__init__.py +81 -0
- mlflow/gemini/autolog.py +186 -0
- mlflow/gemini/chat.py +261 -0
- mlflow/genai/__init__.py +71 -0
- mlflow/genai/datasets/__init__.py +67 -0
- mlflow/genai/datasets/evaluation_dataset.py +131 -0
- mlflow/genai/evaluation/__init__.py +3 -0
- mlflow/genai/evaluation/base.py +411 -0
- mlflow/genai/evaluation/constant.py +23 -0
- mlflow/genai/evaluation/utils.py +244 -0
- mlflow/genai/judges/__init__.py +21 -0
- mlflow/genai/judges/databricks.py +404 -0
- mlflow/genai/label_schemas/__init__.py +153 -0
- mlflow/genai/label_schemas/label_schemas.py +209 -0
- mlflow/genai/labeling/__init__.py +159 -0
- mlflow/genai/labeling/labeling.py +250 -0
- mlflow/genai/optimize/__init__.py +13 -0
- mlflow/genai/optimize/base.py +198 -0
- mlflow/genai/optimize/optimizers/__init__.py +4 -0
- mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
- mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
- mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
- mlflow/genai/optimize/types.py +75 -0
- mlflow/genai/optimize/util.py +30 -0
- mlflow/genai/prompts/__init__.py +206 -0
- mlflow/genai/scheduled_scorers.py +431 -0
- mlflow/genai/scorers/__init__.py +26 -0
- mlflow/genai/scorers/base.py +492 -0
- mlflow/genai/scorers/builtin_scorers.py +765 -0
- mlflow/genai/scorers/scorer_utils.py +138 -0
- mlflow/genai/scorers/validation.py +165 -0
- mlflow/genai/utils/data_validation.py +146 -0
- mlflow/genai/utils/enum_utils.py +23 -0
- mlflow/genai/utils/trace_utils.py +211 -0
- mlflow/groq/__init__.py +42 -0
- mlflow/groq/_groq_autolog.py +74 -0
- mlflow/johnsnowlabs/__init__.py +888 -0
- mlflow/langchain/__init__.py +24 -0
- mlflow/langchain/api_request_parallel_processor.py +330 -0
- mlflow/langchain/autolog.py +147 -0
- mlflow/langchain/chat_agent_langgraph.py +340 -0
- mlflow/langchain/constant.py +1 -0
- mlflow/langchain/constants.py +1 -0
- mlflow/langchain/databricks_dependencies.py +444 -0
- mlflow/langchain/langchain_tracer.py +597 -0
- mlflow/langchain/model.py +919 -0
- mlflow/langchain/output_parsers.py +142 -0
- mlflow/langchain/retriever_chain.py +153 -0
- mlflow/langchain/runnables.py +527 -0
- mlflow/langchain/utils/chat.py +402 -0
- mlflow/langchain/utils/logging.py +671 -0
- mlflow/langchain/utils/serialization.py +36 -0
- mlflow/legacy_databricks_cli/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/provider.py +482 -0
- mlflow/litellm/__init__.py +175 -0
- mlflow/llama_index/__init__.py +22 -0
- mlflow/llama_index/autolog.py +55 -0
- mlflow/llama_index/chat.py +43 -0
- mlflow/llama_index/constant.py +1 -0
- mlflow/llama_index/model.py +577 -0
- mlflow/llama_index/pyfunc_wrapper.py +332 -0
- mlflow/llama_index/serialize_objects.py +188 -0
- mlflow/llama_index/tracer.py +561 -0
- mlflow/metrics/__init__.py +479 -0
- mlflow/metrics/base.py +39 -0
- mlflow/metrics/genai/__init__.py +25 -0
- mlflow/metrics/genai/base.py +101 -0
- mlflow/metrics/genai/genai_metric.py +771 -0
- mlflow/metrics/genai/metric_definitions.py +450 -0
- mlflow/metrics/genai/model_utils.py +371 -0
- mlflow/metrics/genai/prompt_template.py +68 -0
- mlflow/metrics/genai/prompts/__init__.py +0 -0
- mlflow/metrics/genai/prompts/v1.py +422 -0
- mlflow/metrics/genai/utils.py +6 -0
- mlflow/metrics/metric_definitions.py +619 -0
- mlflow/mismatch.py +34 -0
- mlflow/mistral/__init__.py +34 -0
- mlflow/mistral/autolog.py +71 -0
- mlflow/mistral/chat.py +135 -0
- mlflow/ml_package_versions.py +452 -0
- mlflow/models/__init__.py +97 -0
- mlflow/models/auth_policy.py +83 -0
- mlflow/models/cli.py +354 -0
- mlflow/models/container/__init__.py +294 -0
- mlflow/models/container/scoring_server/__init__.py +0 -0
- mlflow/models/container/scoring_server/nginx.conf +39 -0
- mlflow/models/dependencies_schemas.py +287 -0
- mlflow/models/display_utils.py +158 -0
- mlflow/models/docker_utils.py +211 -0
- mlflow/models/evaluation/__init__.py +23 -0
- mlflow/models/evaluation/_shap_patch.py +64 -0
- mlflow/models/evaluation/artifacts.py +194 -0
- mlflow/models/evaluation/base.py +1811 -0
- mlflow/models/evaluation/calibration_curve.py +109 -0
- mlflow/models/evaluation/default_evaluator.py +996 -0
- mlflow/models/evaluation/deprecated.py +23 -0
- mlflow/models/evaluation/evaluator_registry.py +80 -0
- mlflow/models/evaluation/evaluators/classifier.py +704 -0
- mlflow/models/evaluation/evaluators/default.py +233 -0
- mlflow/models/evaluation/evaluators/regressor.py +96 -0
- mlflow/models/evaluation/evaluators/shap.py +296 -0
- mlflow/models/evaluation/lift_curve.py +178 -0
- mlflow/models/evaluation/utils/metric.py +123 -0
- mlflow/models/evaluation/utils/trace.py +179 -0
- mlflow/models/evaluation/validation.py +434 -0
- mlflow/models/flavor_backend.py +93 -0
- mlflow/models/flavor_backend_registry.py +53 -0
- mlflow/models/model.py +1639 -0
- mlflow/models/model_config.py +150 -0
- mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
- mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
- mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
- mlflow/models/python_api.py +369 -0
- mlflow/models/rag_signatures.py +128 -0
- mlflow/models/resources.py +321 -0
- mlflow/models/signature.py +662 -0
- mlflow/models/utils.py +2054 -0
- mlflow/models/wheeled_model.py +280 -0
- mlflow/openai/__init__.py +57 -0
- mlflow/openai/_agent_tracer.py +364 -0
- mlflow/openai/api_request_parallel_processor.py +131 -0
- mlflow/openai/autolog.py +509 -0
- mlflow/openai/constant.py +1 -0
- mlflow/openai/model.py +824 -0
- mlflow/openai/utils/chat_schema.py +367 -0
- mlflow/optuna/__init__.py +3 -0
- mlflow/optuna/storage.py +646 -0
- mlflow/plugins/__init__.py +72 -0
- mlflow/plugins/base.py +358 -0
- mlflow/plugins/builtin/__init__.py +24 -0
- mlflow/plugins/builtin/pytorch_plugin.py +150 -0
- mlflow/plugins/builtin/sklearn_plugin.py +158 -0
- mlflow/plugins/builtin/transformers_plugin.py +187 -0
- mlflow/plugins/cli.py +321 -0
- mlflow/plugins/discovery.py +340 -0
- mlflow/plugins/manager.py +465 -0
- mlflow/plugins/registry.py +316 -0
- mlflow/plugins/templates/framework_plugin_template.py +329 -0
- mlflow/prompt/constants.py +20 -0
- mlflow/prompt/promptlab_model.py +197 -0
- mlflow/prompt/registry_utils.py +248 -0
- mlflow/promptflow/__init__.py +495 -0
- mlflow/protos/__init__.py +0 -0
- mlflow/protos/assessments_pb2.py +174 -0
- mlflow/protos/databricks_artifacts_pb2.py +489 -0
- mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
- mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
- mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
- mlflow/protos/databricks_pb2.py +267 -0
- mlflow/protos/databricks_trace_server_pb2.py +374 -0
- mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
- mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
- mlflow/protos/facet_feature_statistics_pb2.py +296 -0
- mlflow/protos/internal_pb2.py +77 -0
- mlflow/protos/mlflow_artifacts_pb2.py +336 -0
- mlflow/protos/model_registry_pb2.py +1073 -0
- mlflow/protos/scalapb/__init__.py +0 -0
- mlflow/protos/scalapb/scalapb_pb2.py +104 -0
- mlflow/protos/service_pb2.py +2600 -0
- mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
- mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
- mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
- mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
- mlflow/py.typed +0 -0
- mlflow/pydantic_ai/__init__.py +57 -0
- mlflow/pydantic_ai/autolog.py +173 -0
- mlflow/pyfunc/__init__.py +3844 -0
- mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
- mlflow/pyfunc/backend.py +523 -0
- mlflow/pyfunc/context.py +78 -0
- mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
- mlflow/pyfunc/loaders/__init__.py +7 -0
- mlflow/pyfunc/loaders/chat_agent.py +117 -0
- mlflow/pyfunc/loaders/chat_model.py +125 -0
- mlflow/pyfunc/loaders/code_model.py +31 -0
- mlflow/pyfunc/loaders/responses_agent.py +112 -0
- mlflow/pyfunc/mlserver.py +46 -0
- mlflow/pyfunc/model.py +1473 -0
- mlflow/pyfunc/scoring_server/__init__.py +604 -0
- mlflow/pyfunc/scoring_server/app.py +7 -0
- mlflow/pyfunc/scoring_server/client.py +146 -0
- mlflow/pyfunc/spark_model_cache.py +48 -0
- mlflow/pyfunc/stdin_server.py +44 -0
- mlflow/pyfunc/utils/__init__.py +3 -0
- mlflow/pyfunc/utils/data_validation.py +224 -0
- mlflow/pyfunc/utils/environment.py +22 -0
- mlflow/pyfunc/utils/input_converter.py +47 -0
- mlflow/pyfunc/utils/serving_data_parser.py +11 -0
- mlflow/pytorch/__init__.py +1171 -0
- mlflow/pytorch/_lightning_autolog.py +580 -0
- mlflow/pytorch/_pytorch_autolog.py +50 -0
- mlflow/pytorch/pickle_module.py +35 -0
- mlflow/rfunc/__init__.py +42 -0
- mlflow/rfunc/backend.py +134 -0
- mlflow/runs.py +89 -0
- mlflow/server/__init__.py +302 -0
- mlflow/server/auth/__init__.py +1224 -0
- mlflow/server/auth/__main__.py +4 -0
- mlflow/server/auth/basic_auth.ini +6 -0
- mlflow/server/auth/cli.py +11 -0
- mlflow/server/auth/client.py +537 -0
- mlflow/server/auth/config.py +34 -0
- mlflow/server/auth/db/__init__.py +0 -0
- mlflow/server/auth/db/cli.py +18 -0
- mlflow/server/auth/db/migrations/__init__.py +0 -0
- mlflow/server/auth/db/migrations/alembic.ini +110 -0
- mlflow/server/auth/db/migrations/env.py +76 -0
- mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
- mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
- mlflow/server/auth/db/models.py +67 -0
- mlflow/server/auth/db/utils.py +37 -0
- mlflow/server/auth/entities.py +165 -0
- mlflow/server/auth/logo.py +14 -0
- mlflow/server/auth/permissions.py +65 -0
- mlflow/server/auth/routes.py +18 -0
- mlflow/server/auth/sqlalchemy_store.py +263 -0
- mlflow/server/graphql/__init__.py +0 -0
- mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
- mlflow/server/graphql/graphql_custom_scalars.py +24 -0
- mlflow/server/graphql/graphql_errors.py +15 -0
- mlflow/server/graphql/graphql_no_batching.py +89 -0
- mlflow/server/graphql/graphql_schema_extensions.py +74 -0
- mlflow/server/handlers.py +3217 -0
- mlflow/server/prometheus_exporter.py +17 -0
- mlflow/server/validation.py +30 -0
- mlflow/shap/__init__.py +691 -0
- mlflow/sklearn/__init__.py +1994 -0
- mlflow/sklearn/utils.py +1041 -0
- mlflow/smolagents/__init__.py +66 -0
- mlflow/smolagents/autolog.py +139 -0
- mlflow/smolagents/chat.py +29 -0
- mlflow/store/__init__.py +10 -0
- mlflow/store/_unity_catalog/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/constants.py +2 -0
- mlflow/store/_unity_catalog/registry/__init__.py +6 -0
- mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
- mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
- mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
- mlflow/store/_unity_catalog/registry/utils.py +121 -0
- mlflow/store/artifact/__init__.py +0 -0
- mlflow/store/artifact/artifact_repo.py +472 -0
- mlflow/store/artifact/artifact_repository_registry.py +154 -0
- mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
- mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
- mlflow/store/artifact/cli.py +141 -0
- mlflow/store/artifact/cloud_artifact_repo.py +332 -0
- mlflow/store/artifact/databricks_artifact_repo.py +729 -0
- mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
- mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
- mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
- mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
- mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
- mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
- mlflow/store/artifact/ftp_artifact_repo.py +132 -0
- mlflow/store/artifact/gcs_artifact_repo.py +296 -0
- mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
- mlflow/store/artifact/http_artifact_repo.py +218 -0
- mlflow/store/artifact/local_artifact_repo.py +142 -0
- mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
- mlflow/store/artifact/models_artifact_repo.py +259 -0
- mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
- mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
- mlflow/store/artifact/r2_artifact_repo.py +70 -0
- mlflow/store/artifact/runs_artifact_repo.py +265 -0
- mlflow/store/artifact/s3_artifact_repo.py +330 -0
- mlflow/store/artifact/sftp_artifact_repo.py +141 -0
- mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
- mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
- mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
- mlflow/store/artifact/utils/__init__.py +0 -0
- mlflow/store/artifact/utils/models.py +148 -0
- mlflow/store/db/__init__.py +0 -0
- mlflow/store/db/base_sql_model.py +3 -0
- mlflow/store/db/db_types.py +10 -0
- mlflow/store/db/utils.py +314 -0
- mlflow/store/db_migrations/__init__.py +0 -0
- mlflow/store/db_migrations/alembic.ini +74 -0
- mlflow/store/db_migrations/env.py +84 -0
- mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
- mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
- mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
- mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
- mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
- mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
- mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
- mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
- mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
- mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
- mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
- mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
- mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
- mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
- mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
- mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
- mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
- mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
- mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
- mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
- mlflow/store/db_migrations/versions/__init__.py +0 -0
- mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
- mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
- mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
- mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
- mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
- mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
- mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
- mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
- mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
- mlflow/store/entities/__init__.py +3 -0
- mlflow/store/entities/paged_list.py +18 -0
- mlflow/store/model_registry/__init__.py +10 -0
- mlflow/store/model_registry/abstract_store.py +1081 -0
- mlflow/store/model_registry/base_rest_store.py +44 -0
- mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
- mlflow/store/model_registry/dbmodels/__init__.py +0 -0
- mlflow/store/model_registry/dbmodels/models.py +206 -0
- mlflow/store/model_registry/file_store.py +1091 -0
- mlflow/store/model_registry/rest_store.py +481 -0
- mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
- mlflow/store/tracking/__init__.py +23 -0
- mlflow/store/tracking/abstract_store.py +816 -0
- mlflow/store/tracking/dbmodels/__init__.py +0 -0
- mlflow/store/tracking/dbmodels/initial_models.py +243 -0
- mlflow/store/tracking/dbmodels/models.py +1073 -0
- mlflow/store/tracking/file_store.py +2438 -0
- mlflow/store/tracking/postgres_managed_identity.py +146 -0
- mlflow/store/tracking/rest_store.py +1131 -0
- mlflow/store/tracking/sqlalchemy_store.py +2785 -0
- mlflow/system_metrics/__init__.py +61 -0
- mlflow/system_metrics/metrics/__init__.py +0 -0
- mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
- mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
- mlflow/system_metrics/metrics/disk_monitor.py +21 -0
- mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
- mlflow/system_metrics/metrics/network_monitor.py +34 -0
- mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
- mlflow/system_metrics/system_metrics_monitor.py +198 -0
- mlflow/tracing/__init__.py +16 -0
- mlflow/tracing/assessment.py +356 -0
- mlflow/tracing/client.py +531 -0
- mlflow/tracing/config.py +125 -0
- mlflow/tracing/constant.py +105 -0
- mlflow/tracing/destination.py +81 -0
- mlflow/tracing/display/__init__.py +40 -0
- mlflow/tracing/display/display_handler.py +196 -0
- mlflow/tracing/export/async_export_queue.py +186 -0
- mlflow/tracing/export/inference_table.py +138 -0
- mlflow/tracing/export/mlflow_v3.py +137 -0
- mlflow/tracing/export/utils.py +70 -0
- mlflow/tracing/fluent.py +1417 -0
- mlflow/tracing/processor/base_mlflow.py +199 -0
- mlflow/tracing/processor/inference_table.py +175 -0
- mlflow/tracing/processor/mlflow_v3.py +47 -0
- mlflow/tracing/processor/otel.py +73 -0
- mlflow/tracing/provider.py +487 -0
- mlflow/tracing/trace_manager.py +200 -0
- mlflow/tracing/utils/__init__.py +616 -0
- mlflow/tracing/utils/artifact_utils.py +28 -0
- mlflow/tracing/utils/copy.py +55 -0
- mlflow/tracing/utils/environment.py +55 -0
- mlflow/tracing/utils/exception.py +21 -0
- mlflow/tracing/utils/once.py +35 -0
- mlflow/tracing/utils/otlp.py +63 -0
- mlflow/tracing/utils/processor.py +54 -0
- mlflow/tracing/utils/search.py +292 -0
- mlflow/tracing/utils/timeout.py +250 -0
- mlflow/tracing/utils/token.py +19 -0
- mlflow/tracing/utils/truncation.py +124 -0
- mlflow/tracing/utils/warning.py +76 -0
- mlflow/tracking/__init__.py +39 -0
- mlflow/tracking/_model_registry/__init__.py +1 -0
- mlflow/tracking/_model_registry/client.py +764 -0
- mlflow/tracking/_model_registry/fluent.py +853 -0
- mlflow/tracking/_model_registry/registry.py +67 -0
- mlflow/tracking/_model_registry/utils.py +251 -0
- mlflow/tracking/_tracking_service/__init__.py +0 -0
- mlflow/tracking/_tracking_service/client.py +883 -0
- mlflow/tracking/_tracking_service/registry.py +56 -0
- mlflow/tracking/_tracking_service/utils.py +275 -0
- mlflow/tracking/artifact_utils.py +179 -0
- mlflow/tracking/client.py +5900 -0
- mlflow/tracking/context/__init__.py +0 -0
- mlflow/tracking/context/abstract_context.py +35 -0
- mlflow/tracking/context/databricks_cluster_context.py +15 -0
- mlflow/tracking/context/databricks_command_context.py +15 -0
- mlflow/tracking/context/databricks_job_context.py +49 -0
- mlflow/tracking/context/databricks_notebook_context.py +41 -0
- mlflow/tracking/context/databricks_repo_context.py +43 -0
- mlflow/tracking/context/default_context.py +51 -0
- mlflow/tracking/context/git_context.py +32 -0
- mlflow/tracking/context/registry.py +98 -0
- mlflow/tracking/context/system_environment_context.py +15 -0
- mlflow/tracking/default_experiment/__init__.py +1 -0
- mlflow/tracking/default_experiment/abstract_context.py +43 -0
- mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
- mlflow/tracking/default_experiment/registry.py +75 -0
- mlflow/tracking/fluent.py +3595 -0
- mlflow/tracking/metric_value_conversion_utils.py +93 -0
- mlflow/tracking/multimedia.py +206 -0
- mlflow/tracking/registry.py +86 -0
- mlflow/tracking/request_auth/__init__.py +0 -0
- mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
- mlflow/tracking/request_auth/registry.py +60 -0
- mlflow/tracking/request_header/__init__.py +0 -0
- mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
- mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
- mlflow/tracking/request_header/default_request_header_provider.py +17 -0
- mlflow/tracking/request_header/registry.py +79 -0
- mlflow/transformers/__init__.py +2982 -0
- mlflow/transformers/flavor_config.py +258 -0
- mlflow/transformers/hub_utils.py +83 -0
- mlflow/transformers/llm_inference_utils.py +468 -0
- mlflow/transformers/model_io.py +301 -0
- mlflow/transformers/peft.py +51 -0
- mlflow/transformers/signature.py +183 -0
- mlflow/transformers/torch_utils.py +55 -0
- mlflow/types/__init__.py +21 -0
- mlflow/types/agent.py +270 -0
- mlflow/types/chat.py +240 -0
- mlflow/types/llm.py +935 -0
- mlflow/types/responses.py +139 -0
- mlflow/types/responses_helpers.py +416 -0
- mlflow/types/schema.py +1505 -0
- mlflow/types/type_hints.py +647 -0
- mlflow/types/utils.py +753 -0
- mlflow/utils/__init__.py +283 -0
- mlflow/utils/_capture_modules.py +256 -0
- mlflow/utils/_capture_transformers_modules.py +75 -0
- mlflow/utils/_spark_utils.py +201 -0
- mlflow/utils/_unity_catalog_oss_utils.py +97 -0
- mlflow/utils/_unity_catalog_utils.py +479 -0
- mlflow/utils/annotations.py +218 -0
- mlflow/utils/arguments_utils.py +16 -0
- mlflow/utils/async_logging/__init__.py +1 -0
- mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
- mlflow/utils/async_logging/async_logging_queue.py +366 -0
- mlflow/utils/async_logging/run_artifact.py +38 -0
- mlflow/utils/async_logging/run_batch.py +58 -0
- mlflow/utils/async_logging/run_operations.py +49 -0
- mlflow/utils/autologging_utils/__init__.py +737 -0
- mlflow/utils/autologging_utils/client.py +432 -0
- mlflow/utils/autologging_utils/config.py +33 -0
- mlflow/utils/autologging_utils/events.py +294 -0
- mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
- mlflow/utils/autologging_utils/metrics_queue.py +71 -0
- mlflow/utils/autologging_utils/safety.py +1104 -0
- mlflow/utils/autologging_utils/versioning.py +95 -0
- mlflow/utils/checkpoint_utils.py +206 -0
- mlflow/utils/class_utils.py +6 -0
- mlflow/utils/cli_args.py +257 -0
- mlflow/utils/conda.py +354 -0
- mlflow/utils/credentials.py +231 -0
- mlflow/utils/data_utils.py +17 -0
- mlflow/utils/databricks_utils.py +1436 -0
- mlflow/utils/docstring_utils.py +477 -0
- mlflow/utils/doctor.py +133 -0
- mlflow/utils/download_cloud_file_chunk.py +43 -0
- mlflow/utils/env_manager.py +16 -0
- mlflow/utils/env_pack.py +131 -0
- mlflow/utils/environment.py +1009 -0
- mlflow/utils/exception_utils.py +14 -0
- mlflow/utils/file_utils.py +978 -0
- mlflow/utils/git_utils.py +77 -0
- mlflow/utils/gorilla.py +797 -0
- mlflow/utils/import_hooks/__init__.py +363 -0
- mlflow/utils/lazy_load.py +51 -0
- mlflow/utils/logging_utils.py +168 -0
- mlflow/utils/mime_type_utils.py +58 -0
- mlflow/utils/mlflow_tags.py +103 -0
- mlflow/utils/model_utils.py +486 -0
- mlflow/utils/name_utils.py +346 -0
- mlflow/utils/nfs_on_spark.py +62 -0
- mlflow/utils/openai_utils.py +164 -0
- mlflow/utils/os.py +12 -0
- mlflow/utils/oss_registry_utils.py +29 -0
- mlflow/utils/plugins.py +17 -0
- mlflow/utils/process.py +182 -0
- mlflow/utils/promptlab_utils.py +146 -0
- mlflow/utils/proto_json_utils.py +743 -0
- mlflow/utils/pydantic_utils.py +54 -0
- mlflow/utils/request_utils.py +279 -0
- mlflow/utils/requirements_utils.py +704 -0
- mlflow/utils/rest_utils.py +673 -0
- mlflow/utils/search_logged_model_utils.py +127 -0
- mlflow/utils/search_utils.py +2111 -0
- mlflow/utils/secure_loading.py +221 -0
- mlflow/utils/security_validation.py +384 -0
- mlflow/utils/server_cli_utils.py +61 -0
- mlflow/utils/spark_utils.py +15 -0
- mlflow/utils/string_utils.py +138 -0
- mlflow/utils/thread_utils.py +63 -0
- mlflow/utils/time.py +54 -0
- mlflow/utils/timeout.py +42 -0
- mlflow/utils/uri.py +572 -0
- mlflow/utils/validation.py +662 -0
- mlflow/utils/virtualenv.py +458 -0
- mlflow/utils/warnings_utils.py +25 -0
- mlflow/utils/yaml_utils.py +179 -0
- mlflow/version.py +24 -0
@@ -0,0 +1,134 @@
|
|
1
|
+
import importlib.metadata
|
2
|
+
import logging
|
3
|
+
import posixpath
|
4
|
+
from concurrent.futures import Future
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import TYPE_CHECKING, Optional
|
7
|
+
|
8
|
+
from packaging.version import Version
|
9
|
+
|
10
|
+
from mlflow.entities import FileInfo
|
11
|
+
from mlflow.environment_variables import MLFLOW_MULTIPART_UPLOAD_CHUNK_SIZE
|
12
|
+
from mlflow.exceptions import MlflowException
|
13
|
+
from mlflow.store.artifact.artifact_repo import ArtifactRepository
|
14
|
+
|
15
|
+
if TYPE_CHECKING:
|
16
|
+
from databricks.sdk.service.files import FilesAPI
|
17
|
+
|
18
|
+
|
19
|
+
def _sdk_supports_large_file_uploads() -> bool:
|
20
|
+
# https://github.com/databricks/databricks-sdk-py/commit/7ca3fb7e8643126b74c9f5779dc01fb20c1741fb
|
21
|
+
return Version(importlib.metadata.version("databricks-sdk")) >= Version("0.45.0")
|
22
|
+
|
23
|
+
|
24
|
+
_logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
|
27
|
+
# TODO: The following artifact repositories should use this class. Migrate them.
|
28
|
+
# - databricks_sdk_models_artifact_repo.py
|
29
|
+
class DatabricksSdkArtifactRepository(ArtifactRepository):
|
30
|
+
def __init__(self, artifact_uri: str, tracking_uri: Optional[str] = None) -> None:
|
31
|
+
from databricks.sdk import WorkspaceClient
|
32
|
+
from databricks.sdk.config import Config
|
33
|
+
|
34
|
+
super().__init__(artifact_uri, tracking_uri)
|
35
|
+
supports_large_file_uploads = _sdk_supports_large_file_uploads()
|
36
|
+
wc = WorkspaceClient(
|
37
|
+
config=(
|
38
|
+
Config(enable_experimental_files_api_client=True)
|
39
|
+
if supports_large_file_uploads
|
40
|
+
else None
|
41
|
+
)
|
42
|
+
)
|
43
|
+
if supports_large_file_uploads:
|
44
|
+
# `Config` has a `multipart_upload_min_stream_size` parameter but the constructor
|
45
|
+
# doesn't set it. This is a bug in databricks-sdk.
|
46
|
+
# >>> from databricks.sdk.config import Config
|
47
|
+
# >>> config = Config(multipart_upload_chunk_size=123)
|
48
|
+
# >>> assert config.multipart_upload_chunk_size != 123
|
49
|
+
try:
|
50
|
+
wc.files._config.multipart_upload_chunk_size = (
|
51
|
+
MLFLOW_MULTIPART_UPLOAD_CHUNK_SIZE.get()
|
52
|
+
)
|
53
|
+
except AttributeError:
|
54
|
+
_logger.debug("Failed to set multipart_upload_chunk_size in Config", exc_info=True)
|
55
|
+
self.wc = wc
|
56
|
+
|
57
|
+
@property
|
58
|
+
def files_api(self) -> "FilesAPI":
|
59
|
+
return self.wc.files
|
60
|
+
|
61
|
+
def _is_dir(self, path: str) -> bool:
|
62
|
+
from databricks.sdk.errors.platform import NotFound
|
63
|
+
|
64
|
+
try:
|
65
|
+
self.files_api.get_directory_metadata(path)
|
66
|
+
except NotFound:
|
67
|
+
return False
|
68
|
+
return True
|
69
|
+
|
70
|
+
def full_path(self, artifact_path: Optional[str]) -> str:
|
71
|
+
return f"{self.artifact_uri}/{artifact_path}" if artifact_path else self.artifact_uri
|
72
|
+
|
73
|
+
def log_artifact(self, local_file: str, artifact_path: Optional[str] = None) -> None:
|
74
|
+
if Path(local_file).stat().st_size > 5 * (1024**3) and not _sdk_supports_large_file_uploads:
|
75
|
+
raise MlflowException.invalid_parameter_value(
|
76
|
+
"Databricks SDK version < 0.41.0 does not support uploading files larger than 5GB. "
|
77
|
+
"Please upgrade the databricks-sdk package to version >= 0.41.0."
|
78
|
+
)
|
79
|
+
|
80
|
+
with open(local_file, "rb") as f:
|
81
|
+
name = Path(local_file).name
|
82
|
+
self.files_api.upload(
|
83
|
+
self.full_path(posixpath.join(artifact_path, name) if artifact_path else name),
|
84
|
+
f,
|
85
|
+
overwrite=True,
|
86
|
+
)
|
87
|
+
|
88
|
+
def log_artifacts(self, local_dir: str, artifact_path: Optional[str] = None) -> None:
|
89
|
+
local_dir = Path(local_dir).resolve()
|
90
|
+
futures: list[Future[None]] = []
|
91
|
+
with self._create_thread_pool() as executor:
|
92
|
+
for f in local_dir.rglob("*"):
|
93
|
+
if not f.is_file():
|
94
|
+
continue
|
95
|
+
|
96
|
+
paths: list[str] = []
|
97
|
+
if artifact_path:
|
98
|
+
paths.append(artifact_path)
|
99
|
+
if f.parent != local_dir:
|
100
|
+
paths.append(str(f.parent.relative_to(local_dir)))
|
101
|
+
|
102
|
+
fut = executor.submit(
|
103
|
+
self.log_artifact,
|
104
|
+
local_file=f,
|
105
|
+
artifact_path=posixpath.join(*paths) if paths else None,
|
106
|
+
)
|
107
|
+
futures.append(fut)
|
108
|
+
|
109
|
+
for fut in futures:
|
110
|
+
fut.result()
|
111
|
+
|
112
|
+
def list_artifacts(self, path: Optional[str] = None) -> list[FileInfo]:
|
113
|
+
dest_path = self.full_path(path)
|
114
|
+
if not self._is_dir(dest_path):
|
115
|
+
return []
|
116
|
+
|
117
|
+
file_infos: list[FileInfo] = []
|
118
|
+
for directory_entry in self.files_api.list_directory_contents(dest_path):
|
119
|
+
relative_path = posixpath.relpath(directory_entry.path, self.artifact_uri)
|
120
|
+
file_infos.append(
|
121
|
+
FileInfo(
|
122
|
+
path=relative_path,
|
123
|
+
is_dir=directory_entry.is_directory,
|
124
|
+
file_size=directory_entry.file_size,
|
125
|
+
)
|
126
|
+
)
|
127
|
+
|
128
|
+
return sorted(file_infos, key=lambda f: f.path)
|
129
|
+
|
130
|
+
def _download_file(self, remote_file_path: str, local_path: str) -> None:
|
131
|
+
download_resp = self.files_api.download(self.full_path(remote_file_path))
|
132
|
+
with open(local_path, "wb") as f:
|
133
|
+
while chunk := download_resp.contents.read(10 * 1024 * 1024):
|
134
|
+
f.write(chunk)
|
@@ -0,0 +1,97 @@
|
|
1
|
+
import posixpath
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from mlflow.entities import FileInfo
|
5
|
+
from mlflow.environment_variables import (
|
6
|
+
MLFLOW_MULTIPART_DOWNLOAD_CHUNK_SIZE,
|
7
|
+
)
|
8
|
+
from mlflow.store.artifact.cloud_artifact_repo import CloudArtifactRepository
|
9
|
+
|
10
|
+
|
11
|
+
def _get_databricks_workspace_client():
|
12
|
+
from databricks.sdk import WorkspaceClient
|
13
|
+
|
14
|
+
return WorkspaceClient()
|
15
|
+
|
16
|
+
|
17
|
+
class DatabricksSDKModelsArtifactRepository(CloudArtifactRepository):
|
18
|
+
"""
|
19
|
+
Stores and retrieves model artifacts via Databricks SDK, agnostic to the underlying cloud
|
20
|
+
that stores the model artifacts.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, model_name, model_version):
|
24
|
+
self.model_name = model_name
|
25
|
+
self.model_version = model_version
|
26
|
+
self.model_base_path = f"/Models/{model_name.replace('.', '/')}/{model_version}"
|
27
|
+
self.client = _get_databricks_workspace_client()
|
28
|
+
super().__init__(self.model_base_path)
|
29
|
+
|
30
|
+
def list_artifacts(self, path: Optional[str] = None) -> list[FileInfo]:
|
31
|
+
dest_path = self.model_base_path
|
32
|
+
if path:
|
33
|
+
dest_path = posixpath.join(dest_path, path)
|
34
|
+
|
35
|
+
file_infos = []
|
36
|
+
|
37
|
+
# check if dest_path is file, if so return empty dir
|
38
|
+
if not self._is_dir(dest_path):
|
39
|
+
return file_infos
|
40
|
+
|
41
|
+
resp = self.client.files.list_directory_contents(dest_path)
|
42
|
+
for directory_entry in resp:
|
43
|
+
relative_path = posixpath.relpath(directory_entry.path, self.model_base_path)
|
44
|
+
file_infos.append(
|
45
|
+
FileInfo(
|
46
|
+
path=relative_path,
|
47
|
+
is_dir=directory_entry.is_directory,
|
48
|
+
file_size=directory_entry.file_size,
|
49
|
+
)
|
50
|
+
)
|
51
|
+
|
52
|
+
return sorted(file_infos, key=lambda f: f.path)
|
53
|
+
|
54
|
+
def _is_dir(self, artifact_path):
|
55
|
+
from databricks.sdk.errors.platform import NotFound
|
56
|
+
|
57
|
+
try:
|
58
|
+
self.client.files.get_directory_metadata(artifact_path)
|
59
|
+
except NotFound:
|
60
|
+
return False
|
61
|
+
return True
|
62
|
+
|
63
|
+
def _upload_to_cloud(self, cloud_credential_info, src_file_path, artifact_file_path=None):
|
64
|
+
dest_path = self.model_base_path
|
65
|
+
if artifact_file_path:
|
66
|
+
dest_path = posixpath.join(dest_path, artifact_file_path)
|
67
|
+
|
68
|
+
with open(src_file_path, "rb") as f:
|
69
|
+
self.client.files.upload(dest_path, f, overwrite=True)
|
70
|
+
|
71
|
+
def log_artifact(self, local_file, artifact_path=None):
|
72
|
+
self._upload_to_cloud(
|
73
|
+
cloud_credential_info=None,
|
74
|
+
src_file_path=local_file,
|
75
|
+
artifact_file_path=artifact_path,
|
76
|
+
)
|
77
|
+
|
78
|
+
def _download_from_cloud(self, remote_file_path, local_path):
|
79
|
+
dest_path = self.model_base_path
|
80
|
+
if remote_file_path:
|
81
|
+
dest_path = posixpath.join(dest_path, remote_file_path)
|
82
|
+
|
83
|
+
resp = self.client.files.download(dest_path)
|
84
|
+
contents = resp.contents
|
85
|
+
chunk_size = MLFLOW_MULTIPART_DOWNLOAD_CHUNK_SIZE.get()
|
86
|
+
|
87
|
+
with open(local_path, "wb") as f:
|
88
|
+
while chunk := contents.read(chunk_size):
|
89
|
+
f.write(chunk)
|
90
|
+
|
91
|
+
def _get_write_credential_infos(self, remote_file_paths):
|
92
|
+
# Databricks sdk based model download/upload don't need any extra credentials
|
93
|
+
return [None] * len(remote_file_paths)
|
94
|
+
|
95
|
+
def _get_read_credential_infos(self, remote_file_paths):
|
96
|
+
# Databricks sdk based model download/upload don't need any extra credentials
|
97
|
+
return [None] * len(remote_file_paths)
|
@@ -0,0 +1,240 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
import posixpath
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
import mlflow.utils.databricks_utils
|
7
|
+
from mlflow.entities import FileInfo
|
8
|
+
from mlflow.environment_variables import MLFLOW_ENABLE_DBFS_FUSE_ARTIFACT_REPO
|
9
|
+
from mlflow.exceptions import MlflowException
|
10
|
+
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
|
11
|
+
from mlflow.store.artifact.artifact_repo import ArtifactRepository
|
12
|
+
from mlflow.store.artifact.databricks_artifact_repo import DatabricksArtifactRepository
|
13
|
+
from mlflow.store.artifact.databricks_logged_model_artifact_repo import (
|
14
|
+
DatabricksLoggedModelArtifactRepository,
|
15
|
+
)
|
16
|
+
from mlflow.store.artifact.local_artifact_repo import LocalArtifactRepository
|
17
|
+
from mlflow.store.tracking.rest_store import RestStore
|
18
|
+
from mlflow.tracking._tracking_service import utils
|
19
|
+
from mlflow.utils.databricks_utils import get_databricks_host_creds
|
20
|
+
from mlflow.utils.file_utils import relative_path_to_artifact_path
|
21
|
+
from mlflow.utils.rest_utils import (
|
22
|
+
RESOURCE_NON_EXISTENT,
|
23
|
+
http_request,
|
24
|
+
http_request_safe,
|
25
|
+
)
|
26
|
+
from mlflow.utils.string_utils import strip_prefix
|
27
|
+
from mlflow.utils.uri import (
|
28
|
+
get_databricks_profile_uri_from_artifact_uri,
|
29
|
+
is_databricks_acled_artifacts_uri,
|
30
|
+
is_databricks_model_registry_artifacts_uri,
|
31
|
+
is_valid_dbfs_uri,
|
32
|
+
remove_databricks_profile_info_from_artifact_uri,
|
33
|
+
strip_scheme,
|
34
|
+
)
|
35
|
+
|
36
|
+
# The following constants are defined as @developer_stable
|
37
|
+
LIST_API_ENDPOINT = "/api/2.0/dbfs/list"
|
38
|
+
GET_STATUS_ENDPOINT = "/api/2.0/dbfs/get-status"
|
39
|
+
DOWNLOAD_CHUNK_SIZE = 1024
|
40
|
+
|
41
|
+
|
42
|
+
class DbfsRestArtifactRepository(ArtifactRepository):
|
43
|
+
"""
|
44
|
+
Stores artifacts on DBFS using the DBFS REST API.
|
45
|
+
|
46
|
+
This repository is used with URIs of the form ``dbfs:/<path>``. The repository can only be used
|
47
|
+
together with the RestStore.
|
48
|
+
"""
|
49
|
+
|
50
|
+
def __init__(self, artifact_uri: str, tracking_uri: Optional[str] = None) -> None:
|
51
|
+
if not is_valid_dbfs_uri(artifact_uri):
|
52
|
+
raise MlflowException(
|
53
|
+
message="DBFS URI must be of the form dbfs:/<path> or "
|
54
|
+
+ "dbfs://profile@databricks/<path>",
|
55
|
+
error_code=INVALID_PARAMETER_VALUE,
|
56
|
+
)
|
57
|
+
|
58
|
+
# The dbfs:/ path ultimately used for artifact operations should not contain the
|
59
|
+
# Databricks profile info, so strip it before setting ``artifact_uri``.
|
60
|
+
super().__init__(
|
61
|
+
remove_databricks_profile_info_from_artifact_uri(artifact_uri), tracking_uri
|
62
|
+
)
|
63
|
+
|
64
|
+
databricks_profile_uri = get_databricks_profile_uri_from_artifact_uri(artifact_uri)
|
65
|
+
if databricks_profile_uri:
|
66
|
+
hostcreds_from_uri = get_databricks_host_creds(databricks_profile_uri)
|
67
|
+
self.get_host_creds = lambda: hostcreds_from_uri
|
68
|
+
else:
|
69
|
+
self.get_host_creds = _get_host_creds_from_default_store()
|
70
|
+
|
71
|
+
def _databricks_api_request(self, endpoint, method, **kwargs):
|
72
|
+
host_creds = self.get_host_creds()
|
73
|
+
return http_request_safe(host_creds=host_creds, endpoint=endpoint, method=method, **kwargs)
|
74
|
+
|
75
|
+
def _dbfs_list_api(self, json):
|
76
|
+
host_creds = self.get_host_creds()
|
77
|
+
return http_request(
|
78
|
+
host_creds=host_creds, endpoint=LIST_API_ENDPOINT, method="GET", params=json
|
79
|
+
)
|
80
|
+
|
81
|
+
def _dbfs_download(self, output_path, endpoint):
|
82
|
+
with open(output_path, "wb") as f:
|
83
|
+
response = self._databricks_api_request(endpoint=endpoint, method="GET", stream=True)
|
84
|
+
try:
|
85
|
+
for content in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
|
86
|
+
f.write(content)
|
87
|
+
finally:
|
88
|
+
response.close()
|
89
|
+
|
90
|
+
def _is_directory(self, artifact_path):
|
91
|
+
dbfs_path = self._get_dbfs_path(artifact_path) if artifact_path else self._get_dbfs_path("")
|
92
|
+
return self._dbfs_is_dir(dbfs_path)
|
93
|
+
|
94
|
+
def _dbfs_is_dir(self, dbfs_path):
|
95
|
+
response = self._databricks_api_request(
|
96
|
+
endpoint=GET_STATUS_ENDPOINT, method="GET", params={"path": dbfs_path}
|
97
|
+
)
|
98
|
+
json_response = json.loads(response.text)
|
99
|
+
try:
|
100
|
+
return json_response["is_dir"]
|
101
|
+
except KeyError:
|
102
|
+
raise MlflowException(f"DBFS path {dbfs_path} does not exist")
|
103
|
+
|
104
|
+
def _get_dbfs_path(self, artifact_path):
|
105
|
+
return "/{}/{}".format(
|
106
|
+
strip_scheme(self.artifact_uri).lstrip("/"),
|
107
|
+
artifact_path.lstrip("/"),
|
108
|
+
)
|
109
|
+
|
110
|
+
def _get_dbfs_endpoint(self, artifact_path):
|
111
|
+
return f"/dbfs{self._get_dbfs_path(artifact_path)}"
|
112
|
+
|
113
|
+
def log_artifact(self, local_file, artifact_path=None):
|
114
|
+
basename = os.path.basename(local_file)
|
115
|
+
if artifact_path:
|
116
|
+
http_endpoint = self._get_dbfs_endpoint(posixpath.join(artifact_path, basename))
|
117
|
+
else:
|
118
|
+
http_endpoint = self._get_dbfs_endpoint(basename)
|
119
|
+
if os.stat(local_file).st_size == 0:
|
120
|
+
# The API frontend doesn't like it when we post empty files to it using
|
121
|
+
# `requests.request`, potentially due to the bug described in
|
122
|
+
# https://github.com/requests/requests/issues/4215
|
123
|
+
self._databricks_api_request(
|
124
|
+
endpoint=http_endpoint, method="POST", data="", allow_redirects=False
|
125
|
+
)
|
126
|
+
else:
|
127
|
+
with open(local_file, "rb") as f:
|
128
|
+
self._databricks_api_request(
|
129
|
+
endpoint=http_endpoint, method="POST", data=f, allow_redirects=False
|
130
|
+
)
|
131
|
+
|
132
|
+
def log_artifacts(self, local_dir, artifact_path=None):
|
133
|
+
artifact_path = artifact_path or ""
|
134
|
+
for dirpath, _, filenames in os.walk(local_dir):
|
135
|
+
artifact_subdir = artifact_path
|
136
|
+
if dirpath != local_dir:
|
137
|
+
rel_path = os.path.relpath(dirpath, local_dir)
|
138
|
+
rel_path = relative_path_to_artifact_path(rel_path)
|
139
|
+
artifact_subdir = posixpath.join(artifact_path, rel_path)
|
140
|
+
for name in filenames:
|
141
|
+
file_path = os.path.join(dirpath, name)
|
142
|
+
self.log_artifact(file_path, artifact_subdir)
|
143
|
+
|
144
|
+
def list_artifacts(self, path: Optional[str] = None) -> list[FileInfo]:
|
145
|
+
dbfs_path = self._get_dbfs_path(path) if path else self._get_dbfs_path("")
|
146
|
+
dbfs_list_json = {"path": dbfs_path}
|
147
|
+
response = self._dbfs_list_api(dbfs_list_json)
|
148
|
+
try:
|
149
|
+
json_response = json.loads(response.text)
|
150
|
+
except ValueError:
|
151
|
+
raise MlflowException(
|
152
|
+
f"API request to list files under DBFS path {dbfs_path} failed with "
|
153
|
+
f"status code {response.status_code}. Response body: {response.text}"
|
154
|
+
)
|
155
|
+
# /api/2.0/dbfs/list will not have the 'files' key in the response for empty directories.
|
156
|
+
infos = []
|
157
|
+
artifact_prefix = strip_prefix(self.artifact_uri, "dbfs:")
|
158
|
+
if json_response.get("error_code", None) == RESOURCE_NON_EXISTENT:
|
159
|
+
return []
|
160
|
+
dbfs_files = json_response.get("files", [])
|
161
|
+
for dbfs_file in dbfs_files:
|
162
|
+
stripped_path = strip_prefix(dbfs_file["path"], artifact_prefix + "/")
|
163
|
+
# If `path` is a file, the DBFS list API returns a single list element with the
|
164
|
+
# same name as `path`. The list_artifacts API expects us to return an empty list in this
|
165
|
+
# case, so we do so here.
|
166
|
+
if stripped_path == path:
|
167
|
+
return []
|
168
|
+
is_dir = dbfs_file["is_dir"]
|
169
|
+
artifact_size = None if is_dir else dbfs_file["file_size"]
|
170
|
+
infos.append(FileInfo(stripped_path, is_dir, artifact_size))
|
171
|
+
return sorted(infos, key=lambda f: f.path)
|
172
|
+
|
173
|
+
def _download_file(self, remote_file_path, local_path):
|
174
|
+
self._dbfs_download(
|
175
|
+
output_path=local_path, endpoint=self._get_dbfs_endpoint(remote_file_path)
|
176
|
+
)
|
177
|
+
|
178
|
+
def delete_artifacts(self, artifact_path=None):
|
179
|
+
raise MlflowException("Not implemented yet")
|
180
|
+
|
181
|
+
|
182
|
+
def _get_host_creds_from_default_store():
|
183
|
+
store = utils._get_store()
|
184
|
+
if not isinstance(store, RestStore):
|
185
|
+
raise MlflowException(
|
186
|
+
"Failed to get credentials for DBFS; they are read from the "
|
187
|
+
+ "Databricks CLI credentials or MLFLOW_TRACKING* environment "
|
188
|
+
+ "variables."
|
189
|
+
)
|
190
|
+
return store.get_host_creds
|
191
|
+
|
192
|
+
|
193
|
+
def dbfs_artifact_repo_factory(artifact_uri: str, tracking_uri: Optional[str] = None):
|
194
|
+
"""
|
195
|
+
Returns an ArtifactRepository subclass for storing artifacts on DBFS.
|
196
|
+
|
197
|
+
This factory method is used with URIs of the form ``dbfs:/<path>``. DBFS-backed artifact
|
198
|
+
storage can only be used together with the RestStore.
|
199
|
+
|
200
|
+
In the special case where the URI is of the form
|
201
|
+
`dbfs:/databricks/mlflow-tracking/<Exp-ID>/<Run-ID>/<path>',
|
202
|
+
a DatabricksArtifactRepository is returned. This is capable of storing access controlled
|
203
|
+
artifacts.
|
204
|
+
|
205
|
+
Args:
|
206
|
+
artifact_uri: DBFS root artifact URI.
|
207
|
+
tracking_uri: The tracking URI.
|
208
|
+
|
209
|
+
Returns:
|
210
|
+
Subclass of ArtifactRepository capable of storing artifacts on DBFS.
|
211
|
+
"""
|
212
|
+
if not is_valid_dbfs_uri(artifact_uri):
|
213
|
+
raise MlflowException(
|
214
|
+
"DBFS URI must be of the form dbfs:/<path> or "
|
215
|
+
+ "dbfs://profile@databricks/<path>, but received "
|
216
|
+
+ artifact_uri
|
217
|
+
)
|
218
|
+
|
219
|
+
cleaned_artifact_uri = artifact_uri.rstrip("/")
|
220
|
+
db_profile_uri = get_databricks_profile_uri_from_artifact_uri(cleaned_artifact_uri)
|
221
|
+
if is_databricks_acled_artifacts_uri(artifact_uri):
|
222
|
+
if DatabricksLoggedModelArtifactRepository.is_logged_model_uri(artifact_uri):
|
223
|
+
return DatabricksLoggedModelArtifactRepository(cleaned_artifact_uri, tracking_uri)
|
224
|
+
return DatabricksArtifactRepository(cleaned_artifact_uri, tracking_uri)
|
225
|
+
elif (
|
226
|
+
mlflow.utils.databricks_utils.is_dbfs_fuse_available()
|
227
|
+
and MLFLOW_ENABLE_DBFS_FUSE_ARTIFACT_REPO.get()
|
228
|
+
and not is_databricks_model_registry_artifacts_uri(artifact_uri)
|
229
|
+
and (db_profile_uri is None or db_profile_uri == "databricks")
|
230
|
+
):
|
231
|
+
# If the DBFS FUSE mount is available, write artifacts directly to
|
232
|
+
# /dbfs/... using local filesystem APIs.
|
233
|
+
# Note: it is possible for a named Databricks profile to point to the current workspace,
|
234
|
+
# but we're going to avoid doing a complex check and assume users will use `databricks`
|
235
|
+
# to mean the current workspace. Using `DbfsRestArtifactRepository` to access the current
|
236
|
+
# workspace's DBFS should still work; it just may be slower.
|
237
|
+
final_artifact_uri = remove_databricks_profile_info_from_artifact_uri(cleaned_artifact_uri)
|
238
|
+
file_uri = "file:///dbfs/{}".format(strip_prefix(final_artifact_uri, "dbfs:/"))
|
239
|
+
return LocalArtifactRepository(file_uri, tracking_uri)
|
240
|
+
return DbfsRestArtifactRepository(cleaned_artifact_uri, tracking_uri)
|
@@ -0,0 +1,132 @@
|
|
1
|
+
import ftplib
|
2
|
+
import os
|
3
|
+
import posixpath
|
4
|
+
import urllib.parse
|
5
|
+
from contextlib import contextmanager
|
6
|
+
from ftplib import FTP
|
7
|
+
from typing import Optional
|
8
|
+
from urllib.parse import unquote
|
9
|
+
|
10
|
+
from mlflow.entities.file_info import FileInfo
|
11
|
+
from mlflow.exceptions import MlflowException
|
12
|
+
from mlflow.store.artifact.artifact_repo import ArtifactRepository
|
13
|
+
from mlflow.utils.file_utils import relative_path_to_artifact_path
|
14
|
+
|
15
|
+
|
16
|
+
class FTPArtifactRepository(ArtifactRepository):
|
17
|
+
"""Stores artifacts as files in a remote directory, via ftp."""
|
18
|
+
|
19
|
+
def __init__(self, artifact_uri: str, tracking_uri: Optional[str] = None) -> None:
|
20
|
+
super().__init__(artifact_uri, tracking_uri)
|
21
|
+
parsed = urllib.parse.urlparse(artifact_uri)
|
22
|
+
self.config = {
|
23
|
+
"host": parsed.hostname,
|
24
|
+
"port": 21 if parsed.port is None else parsed.port,
|
25
|
+
"username": parsed.username,
|
26
|
+
"password": parsed.password,
|
27
|
+
}
|
28
|
+
self.path = parsed.path or "/"
|
29
|
+
|
30
|
+
if self.config["host"] is None:
|
31
|
+
self.config["host"] = "localhost"
|
32
|
+
if self.config["password"] is None:
|
33
|
+
self.config["password"] = ""
|
34
|
+
else:
|
35
|
+
self.config["password"] = unquote(parsed.password)
|
36
|
+
|
37
|
+
@contextmanager
|
38
|
+
def get_ftp_client(self):
|
39
|
+
ftp = FTP()
|
40
|
+
ftp.connect(self.config["host"], self.config["port"])
|
41
|
+
ftp.login(self.config["username"], self.config["password"])
|
42
|
+
yield ftp
|
43
|
+
ftp.close()
|
44
|
+
|
45
|
+
@staticmethod
|
46
|
+
def _is_dir(ftp, full_file_path):
|
47
|
+
try:
|
48
|
+
ftp.cwd(full_file_path)
|
49
|
+
return True
|
50
|
+
except ftplib.error_perm:
|
51
|
+
return False
|
52
|
+
|
53
|
+
@staticmethod
|
54
|
+
def _mkdir(ftp, artifact_dir):
|
55
|
+
try:
|
56
|
+
if not FTPArtifactRepository._is_dir(ftp, artifact_dir):
|
57
|
+
ftp.mkd(artifact_dir)
|
58
|
+
except ftplib.error_perm:
|
59
|
+
head, _ = posixpath.split(artifact_dir)
|
60
|
+
FTPArtifactRepository._mkdir(ftp, head)
|
61
|
+
FTPArtifactRepository._mkdir(ftp, artifact_dir)
|
62
|
+
|
63
|
+
@staticmethod
|
64
|
+
def _size(ftp, full_file_path):
|
65
|
+
ftp.voidcmd("TYPE I")
|
66
|
+
size = ftp.size(full_file_path)
|
67
|
+
ftp.voidcmd("TYPE A")
|
68
|
+
return size
|
69
|
+
|
70
|
+
def log_artifact(self, local_file, artifact_path=None):
|
71
|
+
with self.get_ftp_client() as ftp:
|
72
|
+
artifact_dir = posixpath.join(self.path, artifact_path) if artifact_path else self.path
|
73
|
+
self._mkdir(ftp, artifact_dir)
|
74
|
+
with open(local_file, "rb") as f:
|
75
|
+
ftp.cwd(artifact_dir)
|
76
|
+
ftp.storbinary("STOR " + os.path.basename(local_file), f)
|
77
|
+
|
78
|
+
def log_artifacts(self, local_dir, artifact_path=None):
|
79
|
+
dest_path = posixpath.join(self.path, artifact_path) if artifact_path else self.path
|
80
|
+
|
81
|
+
local_dir = os.path.abspath(local_dir)
|
82
|
+
for root, _, filenames in os.walk(local_dir):
|
83
|
+
upload_path = dest_path
|
84
|
+
if root != local_dir:
|
85
|
+
rel_path = os.path.relpath(root, local_dir)
|
86
|
+
rel_upload_path = relative_path_to_artifact_path(rel_path)
|
87
|
+
upload_path = posixpath.join(dest_path, rel_upload_path)
|
88
|
+
if not filenames:
|
89
|
+
with self.get_ftp_client() as ftp:
|
90
|
+
self._mkdir(ftp, upload_path)
|
91
|
+
for f in filenames:
|
92
|
+
if os.path.isfile(os.path.join(root, f)):
|
93
|
+
self.log_artifact(os.path.join(root, f), upload_path)
|
94
|
+
|
95
|
+
def _is_directory(self, artifact_path):
|
96
|
+
artifact_dir = self.path
|
97
|
+
list_dir = posixpath.join(artifact_dir, artifact_path) if artifact_path else artifact_dir
|
98
|
+
with self.get_ftp_client() as ftp:
|
99
|
+
return self._is_dir(ftp, list_dir)
|
100
|
+
|
101
|
+
def list_artifacts(self, path=None):
|
102
|
+
with self.get_ftp_client() as ftp:
|
103
|
+
artifact_dir = self.path
|
104
|
+
list_dir = posixpath.join(artifact_dir, path) if path else artifact_dir
|
105
|
+
if not self._is_dir(ftp, list_dir):
|
106
|
+
return []
|
107
|
+
artifact_files = ftp.nlst(list_dir)
|
108
|
+
# Make sure artifact_files is a list of file names because ftp.nlst
|
109
|
+
# may return absolute paths.
|
110
|
+
artifact_files = [os.path.basename(f) for f in artifact_files]
|
111
|
+
artifact_files = list(filter(lambda x: x != "." and x != "..", artifact_files))
|
112
|
+
infos = []
|
113
|
+
for file_name in artifact_files:
|
114
|
+
file_path = file_name if path is None else posixpath.join(path, file_name)
|
115
|
+
full_file_path = posixpath.join(list_dir, file_name)
|
116
|
+
if self._is_dir(ftp, full_file_path):
|
117
|
+
infos.append(FileInfo(file_path, True, None))
|
118
|
+
else:
|
119
|
+
size = self._size(ftp, full_file_path)
|
120
|
+
infos.append(FileInfo(file_path, False, size))
|
121
|
+
return infos
|
122
|
+
|
123
|
+
def _download_file(self, remote_file_path, local_path):
|
124
|
+
remote_full_path = (
|
125
|
+
posixpath.join(self.path, remote_file_path) if remote_file_path else self.path
|
126
|
+
)
|
127
|
+
with self.get_ftp_client() as ftp:
|
128
|
+
with open(local_path, "wb") as f:
|
129
|
+
ftp.retrbinary("RETR " + remote_full_path, f.write)
|
130
|
+
|
131
|
+
def delete_artifacts(self, artifact_path=None):
|
132
|
+
raise MlflowException("Not implemented yet")
|