genesis-flow 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genesis_flow-1.0.0.dist-info/METADATA +822 -0
- genesis_flow-1.0.0.dist-info/RECORD +645 -0
- genesis_flow-1.0.0.dist-info/WHEEL +5 -0
- genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
- genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
- genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
- mlflow/__init__.py +367 -0
- mlflow/__main__.py +3 -0
- mlflow/ag2/__init__.py +56 -0
- mlflow/ag2/ag2_logger.py +294 -0
- mlflow/anthropic/__init__.py +40 -0
- mlflow/anthropic/autolog.py +129 -0
- mlflow/anthropic/chat.py +144 -0
- mlflow/artifacts/__init__.py +268 -0
- mlflow/autogen/__init__.py +144 -0
- mlflow/autogen/chat.py +142 -0
- mlflow/azure/__init__.py +26 -0
- mlflow/azure/auth_handler.py +257 -0
- mlflow/azure/client.py +319 -0
- mlflow/azure/config.py +120 -0
- mlflow/azure/connection_factory.py +340 -0
- mlflow/azure/exceptions.py +27 -0
- mlflow/azure/stores.py +327 -0
- mlflow/azure/utils.py +183 -0
- mlflow/bedrock/__init__.py +45 -0
- mlflow/bedrock/_autolog.py +202 -0
- mlflow/bedrock/chat.py +122 -0
- mlflow/bedrock/stream.py +160 -0
- mlflow/bedrock/utils.py +43 -0
- mlflow/cli.py +707 -0
- mlflow/client.py +12 -0
- mlflow/config/__init__.py +56 -0
- mlflow/crewai/__init__.py +79 -0
- mlflow/crewai/autolog.py +253 -0
- mlflow/crewai/chat.py +29 -0
- mlflow/data/__init__.py +75 -0
- mlflow/data/artifact_dataset_sources.py +170 -0
- mlflow/data/code_dataset_source.py +40 -0
- mlflow/data/dataset.py +123 -0
- mlflow/data/dataset_registry.py +168 -0
- mlflow/data/dataset_source.py +110 -0
- mlflow/data/dataset_source_registry.py +219 -0
- mlflow/data/delta_dataset_source.py +167 -0
- mlflow/data/digest_utils.py +108 -0
- mlflow/data/evaluation_dataset.py +562 -0
- mlflow/data/filesystem_dataset_source.py +81 -0
- mlflow/data/http_dataset_source.py +145 -0
- mlflow/data/huggingface_dataset.py +258 -0
- mlflow/data/huggingface_dataset_source.py +118 -0
- mlflow/data/meta_dataset.py +104 -0
- mlflow/data/numpy_dataset.py +223 -0
- mlflow/data/pandas_dataset.py +231 -0
- mlflow/data/polars_dataset.py +352 -0
- mlflow/data/pyfunc_dataset_mixin.py +31 -0
- mlflow/data/schema.py +76 -0
- mlflow/data/sources.py +1 -0
- mlflow/data/spark_dataset.py +406 -0
- mlflow/data/spark_dataset_source.py +74 -0
- mlflow/data/spark_delta_utils.py +118 -0
- mlflow/data/tensorflow_dataset.py +350 -0
- mlflow/data/uc_volume_dataset_source.py +81 -0
- mlflow/db.py +27 -0
- mlflow/dspy/__init__.py +17 -0
- mlflow/dspy/autolog.py +197 -0
- mlflow/dspy/callback.py +398 -0
- mlflow/dspy/constant.py +1 -0
- mlflow/dspy/load.py +93 -0
- mlflow/dspy/save.py +393 -0
- mlflow/dspy/util.py +109 -0
- mlflow/dspy/wrapper.py +226 -0
- mlflow/entities/__init__.py +104 -0
- mlflow/entities/_mlflow_object.py +52 -0
- mlflow/entities/assessment.py +545 -0
- mlflow/entities/assessment_error.py +80 -0
- mlflow/entities/assessment_source.py +141 -0
- mlflow/entities/dataset.py +92 -0
- mlflow/entities/dataset_input.py +51 -0
- mlflow/entities/dataset_summary.py +62 -0
- mlflow/entities/document.py +48 -0
- mlflow/entities/experiment.py +109 -0
- mlflow/entities/experiment_tag.py +35 -0
- mlflow/entities/file_info.py +45 -0
- mlflow/entities/input_tag.py +35 -0
- mlflow/entities/lifecycle_stage.py +35 -0
- mlflow/entities/logged_model.py +228 -0
- mlflow/entities/logged_model_input.py +26 -0
- mlflow/entities/logged_model_output.py +32 -0
- mlflow/entities/logged_model_parameter.py +46 -0
- mlflow/entities/logged_model_status.py +74 -0
- mlflow/entities/logged_model_tag.py +33 -0
- mlflow/entities/metric.py +200 -0
- mlflow/entities/model_registry/__init__.py +29 -0
- mlflow/entities/model_registry/_model_registry_entity.py +13 -0
- mlflow/entities/model_registry/model_version.py +243 -0
- mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
- mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
- mlflow/entities/model_registry/model_version_search.py +25 -0
- mlflow/entities/model_registry/model_version_stages.py +25 -0
- mlflow/entities/model_registry/model_version_status.py +35 -0
- mlflow/entities/model_registry/model_version_tag.py +35 -0
- mlflow/entities/model_registry/prompt.py +73 -0
- mlflow/entities/model_registry/prompt_version.py +244 -0
- mlflow/entities/model_registry/registered_model.py +175 -0
- mlflow/entities/model_registry/registered_model_alias.py +35 -0
- mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
- mlflow/entities/model_registry/registered_model_search.py +25 -0
- mlflow/entities/model_registry/registered_model_tag.py +35 -0
- mlflow/entities/multipart_upload.py +74 -0
- mlflow/entities/param.py +49 -0
- mlflow/entities/run.py +97 -0
- mlflow/entities/run_data.py +84 -0
- mlflow/entities/run_info.py +188 -0
- mlflow/entities/run_inputs.py +59 -0
- mlflow/entities/run_outputs.py +43 -0
- mlflow/entities/run_status.py +41 -0
- mlflow/entities/run_tag.py +36 -0
- mlflow/entities/source_type.py +31 -0
- mlflow/entities/span.py +774 -0
- mlflow/entities/span_event.py +96 -0
- mlflow/entities/span_status.py +102 -0
- mlflow/entities/trace.py +317 -0
- mlflow/entities/trace_data.py +71 -0
- mlflow/entities/trace_info.py +220 -0
- mlflow/entities/trace_info_v2.py +162 -0
- mlflow/entities/trace_location.py +173 -0
- mlflow/entities/trace_state.py +39 -0
- mlflow/entities/trace_status.py +68 -0
- mlflow/entities/view_type.py +51 -0
- mlflow/environment_variables.py +866 -0
- mlflow/evaluation/__init__.py +16 -0
- mlflow/evaluation/assessment.py +369 -0
- mlflow/evaluation/evaluation.py +411 -0
- mlflow/evaluation/evaluation_tag.py +61 -0
- mlflow/evaluation/fluent.py +48 -0
- mlflow/evaluation/utils.py +201 -0
- mlflow/exceptions.py +213 -0
- mlflow/experiments.py +140 -0
- mlflow/gemini/__init__.py +81 -0
- mlflow/gemini/autolog.py +186 -0
- mlflow/gemini/chat.py +261 -0
- mlflow/genai/__init__.py +71 -0
- mlflow/genai/datasets/__init__.py +67 -0
- mlflow/genai/datasets/evaluation_dataset.py +131 -0
- mlflow/genai/evaluation/__init__.py +3 -0
- mlflow/genai/evaluation/base.py +411 -0
- mlflow/genai/evaluation/constant.py +23 -0
- mlflow/genai/evaluation/utils.py +244 -0
- mlflow/genai/judges/__init__.py +21 -0
- mlflow/genai/judges/databricks.py +404 -0
- mlflow/genai/label_schemas/__init__.py +153 -0
- mlflow/genai/label_schemas/label_schemas.py +209 -0
- mlflow/genai/labeling/__init__.py +159 -0
- mlflow/genai/labeling/labeling.py +250 -0
- mlflow/genai/optimize/__init__.py +13 -0
- mlflow/genai/optimize/base.py +198 -0
- mlflow/genai/optimize/optimizers/__init__.py +4 -0
- mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
- mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
- mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
- mlflow/genai/optimize/types.py +75 -0
- mlflow/genai/optimize/util.py +30 -0
- mlflow/genai/prompts/__init__.py +206 -0
- mlflow/genai/scheduled_scorers.py +431 -0
- mlflow/genai/scorers/__init__.py +26 -0
- mlflow/genai/scorers/base.py +492 -0
- mlflow/genai/scorers/builtin_scorers.py +765 -0
- mlflow/genai/scorers/scorer_utils.py +138 -0
- mlflow/genai/scorers/validation.py +165 -0
- mlflow/genai/utils/data_validation.py +146 -0
- mlflow/genai/utils/enum_utils.py +23 -0
- mlflow/genai/utils/trace_utils.py +211 -0
- mlflow/groq/__init__.py +42 -0
- mlflow/groq/_groq_autolog.py +74 -0
- mlflow/johnsnowlabs/__init__.py +888 -0
- mlflow/langchain/__init__.py +24 -0
- mlflow/langchain/api_request_parallel_processor.py +330 -0
- mlflow/langchain/autolog.py +147 -0
- mlflow/langchain/chat_agent_langgraph.py +340 -0
- mlflow/langchain/constant.py +1 -0
- mlflow/langchain/constants.py +1 -0
- mlflow/langchain/databricks_dependencies.py +444 -0
- mlflow/langchain/langchain_tracer.py +597 -0
- mlflow/langchain/model.py +919 -0
- mlflow/langchain/output_parsers.py +142 -0
- mlflow/langchain/retriever_chain.py +153 -0
- mlflow/langchain/runnables.py +527 -0
- mlflow/langchain/utils/chat.py +402 -0
- mlflow/langchain/utils/logging.py +671 -0
- mlflow/langchain/utils/serialization.py +36 -0
- mlflow/legacy_databricks_cli/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/provider.py +482 -0
- mlflow/litellm/__init__.py +175 -0
- mlflow/llama_index/__init__.py +22 -0
- mlflow/llama_index/autolog.py +55 -0
- mlflow/llama_index/chat.py +43 -0
- mlflow/llama_index/constant.py +1 -0
- mlflow/llama_index/model.py +577 -0
- mlflow/llama_index/pyfunc_wrapper.py +332 -0
- mlflow/llama_index/serialize_objects.py +188 -0
- mlflow/llama_index/tracer.py +561 -0
- mlflow/metrics/__init__.py +479 -0
- mlflow/metrics/base.py +39 -0
- mlflow/metrics/genai/__init__.py +25 -0
- mlflow/metrics/genai/base.py +101 -0
- mlflow/metrics/genai/genai_metric.py +771 -0
- mlflow/metrics/genai/metric_definitions.py +450 -0
- mlflow/metrics/genai/model_utils.py +371 -0
- mlflow/metrics/genai/prompt_template.py +68 -0
- mlflow/metrics/genai/prompts/__init__.py +0 -0
- mlflow/metrics/genai/prompts/v1.py +422 -0
- mlflow/metrics/genai/utils.py +6 -0
- mlflow/metrics/metric_definitions.py +619 -0
- mlflow/mismatch.py +34 -0
- mlflow/mistral/__init__.py +34 -0
- mlflow/mistral/autolog.py +71 -0
- mlflow/mistral/chat.py +135 -0
- mlflow/ml_package_versions.py +452 -0
- mlflow/models/__init__.py +97 -0
- mlflow/models/auth_policy.py +83 -0
- mlflow/models/cli.py +354 -0
- mlflow/models/container/__init__.py +294 -0
- mlflow/models/container/scoring_server/__init__.py +0 -0
- mlflow/models/container/scoring_server/nginx.conf +39 -0
- mlflow/models/dependencies_schemas.py +287 -0
- mlflow/models/display_utils.py +158 -0
- mlflow/models/docker_utils.py +211 -0
- mlflow/models/evaluation/__init__.py +23 -0
- mlflow/models/evaluation/_shap_patch.py +64 -0
- mlflow/models/evaluation/artifacts.py +194 -0
- mlflow/models/evaluation/base.py +1811 -0
- mlflow/models/evaluation/calibration_curve.py +109 -0
- mlflow/models/evaluation/default_evaluator.py +996 -0
- mlflow/models/evaluation/deprecated.py +23 -0
- mlflow/models/evaluation/evaluator_registry.py +80 -0
- mlflow/models/evaluation/evaluators/classifier.py +704 -0
- mlflow/models/evaluation/evaluators/default.py +233 -0
- mlflow/models/evaluation/evaluators/regressor.py +96 -0
- mlflow/models/evaluation/evaluators/shap.py +296 -0
- mlflow/models/evaluation/lift_curve.py +178 -0
- mlflow/models/evaluation/utils/metric.py +123 -0
- mlflow/models/evaluation/utils/trace.py +179 -0
- mlflow/models/evaluation/validation.py +434 -0
- mlflow/models/flavor_backend.py +93 -0
- mlflow/models/flavor_backend_registry.py +53 -0
- mlflow/models/model.py +1639 -0
- mlflow/models/model_config.py +150 -0
- mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
- mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
- mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
- mlflow/models/python_api.py +369 -0
- mlflow/models/rag_signatures.py +128 -0
- mlflow/models/resources.py +321 -0
- mlflow/models/signature.py +662 -0
- mlflow/models/utils.py +2054 -0
- mlflow/models/wheeled_model.py +280 -0
- mlflow/openai/__init__.py +57 -0
- mlflow/openai/_agent_tracer.py +364 -0
- mlflow/openai/api_request_parallel_processor.py +131 -0
- mlflow/openai/autolog.py +509 -0
- mlflow/openai/constant.py +1 -0
- mlflow/openai/model.py +824 -0
- mlflow/openai/utils/chat_schema.py +367 -0
- mlflow/optuna/__init__.py +3 -0
- mlflow/optuna/storage.py +646 -0
- mlflow/plugins/__init__.py +72 -0
- mlflow/plugins/base.py +358 -0
- mlflow/plugins/builtin/__init__.py +24 -0
- mlflow/plugins/builtin/pytorch_plugin.py +150 -0
- mlflow/plugins/builtin/sklearn_plugin.py +158 -0
- mlflow/plugins/builtin/transformers_plugin.py +187 -0
- mlflow/plugins/cli.py +321 -0
- mlflow/plugins/discovery.py +340 -0
- mlflow/plugins/manager.py +465 -0
- mlflow/plugins/registry.py +316 -0
- mlflow/plugins/templates/framework_plugin_template.py +329 -0
- mlflow/prompt/constants.py +20 -0
- mlflow/prompt/promptlab_model.py +197 -0
- mlflow/prompt/registry_utils.py +248 -0
- mlflow/promptflow/__init__.py +495 -0
- mlflow/protos/__init__.py +0 -0
- mlflow/protos/assessments_pb2.py +174 -0
- mlflow/protos/databricks_artifacts_pb2.py +489 -0
- mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
- mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
- mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
- mlflow/protos/databricks_pb2.py +267 -0
- mlflow/protos/databricks_trace_server_pb2.py +374 -0
- mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
- mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
- mlflow/protos/facet_feature_statistics_pb2.py +296 -0
- mlflow/protos/internal_pb2.py +77 -0
- mlflow/protos/mlflow_artifacts_pb2.py +336 -0
- mlflow/protos/model_registry_pb2.py +1073 -0
- mlflow/protos/scalapb/__init__.py +0 -0
- mlflow/protos/scalapb/scalapb_pb2.py +104 -0
- mlflow/protos/service_pb2.py +2600 -0
- mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
- mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
- mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
- mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
- mlflow/py.typed +0 -0
- mlflow/pydantic_ai/__init__.py +57 -0
- mlflow/pydantic_ai/autolog.py +173 -0
- mlflow/pyfunc/__init__.py +3844 -0
- mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
- mlflow/pyfunc/backend.py +523 -0
- mlflow/pyfunc/context.py +78 -0
- mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
- mlflow/pyfunc/loaders/__init__.py +7 -0
- mlflow/pyfunc/loaders/chat_agent.py +117 -0
- mlflow/pyfunc/loaders/chat_model.py +125 -0
- mlflow/pyfunc/loaders/code_model.py +31 -0
- mlflow/pyfunc/loaders/responses_agent.py +112 -0
- mlflow/pyfunc/mlserver.py +46 -0
- mlflow/pyfunc/model.py +1473 -0
- mlflow/pyfunc/scoring_server/__init__.py +604 -0
- mlflow/pyfunc/scoring_server/app.py +7 -0
- mlflow/pyfunc/scoring_server/client.py +146 -0
- mlflow/pyfunc/spark_model_cache.py +48 -0
- mlflow/pyfunc/stdin_server.py +44 -0
- mlflow/pyfunc/utils/__init__.py +3 -0
- mlflow/pyfunc/utils/data_validation.py +224 -0
- mlflow/pyfunc/utils/environment.py +22 -0
- mlflow/pyfunc/utils/input_converter.py +47 -0
- mlflow/pyfunc/utils/serving_data_parser.py +11 -0
- mlflow/pytorch/__init__.py +1171 -0
- mlflow/pytorch/_lightning_autolog.py +580 -0
- mlflow/pytorch/_pytorch_autolog.py +50 -0
- mlflow/pytorch/pickle_module.py +35 -0
- mlflow/rfunc/__init__.py +42 -0
- mlflow/rfunc/backend.py +134 -0
- mlflow/runs.py +89 -0
- mlflow/server/__init__.py +302 -0
- mlflow/server/auth/__init__.py +1224 -0
- mlflow/server/auth/__main__.py +4 -0
- mlflow/server/auth/basic_auth.ini +6 -0
- mlflow/server/auth/cli.py +11 -0
- mlflow/server/auth/client.py +537 -0
- mlflow/server/auth/config.py +34 -0
- mlflow/server/auth/db/__init__.py +0 -0
- mlflow/server/auth/db/cli.py +18 -0
- mlflow/server/auth/db/migrations/__init__.py +0 -0
- mlflow/server/auth/db/migrations/alembic.ini +110 -0
- mlflow/server/auth/db/migrations/env.py +76 -0
- mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
- mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
- mlflow/server/auth/db/models.py +67 -0
- mlflow/server/auth/db/utils.py +37 -0
- mlflow/server/auth/entities.py +165 -0
- mlflow/server/auth/logo.py +14 -0
- mlflow/server/auth/permissions.py +65 -0
- mlflow/server/auth/routes.py +18 -0
- mlflow/server/auth/sqlalchemy_store.py +263 -0
- mlflow/server/graphql/__init__.py +0 -0
- mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
- mlflow/server/graphql/graphql_custom_scalars.py +24 -0
- mlflow/server/graphql/graphql_errors.py +15 -0
- mlflow/server/graphql/graphql_no_batching.py +89 -0
- mlflow/server/graphql/graphql_schema_extensions.py +74 -0
- mlflow/server/handlers.py +3217 -0
- mlflow/server/prometheus_exporter.py +17 -0
- mlflow/server/validation.py +30 -0
- mlflow/shap/__init__.py +691 -0
- mlflow/sklearn/__init__.py +1994 -0
- mlflow/sklearn/utils.py +1041 -0
- mlflow/smolagents/__init__.py +66 -0
- mlflow/smolagents/autolog.py +139 -0
- mlflow/smolagents/chat.py +29 -0
- mlflow/store/__init__.py +10 -0
- mlflow/store/_unity_catalog/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/constants.py +2 -0
- mlflow/store/_unity_catalog/registry/__init__.py +6 -0
- mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
- mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
- mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
- mlflow/store/_unity_catalog/registry/utils.py +121 -0
- mlflow/store/artifact/__init__.py +0 -0
- mlflow/store/artifact/artifact_repo.py +472 -0
- mlflow/store/artifact/artifact_repository_registry.py +154 -0
- mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
- mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
- mlflow/store/artifact/cli.py +141 -0
- mlflow/store/artifact/cloud_artifact_repo.py +332 -0
- mlflow/store/artifact/databricks_artifact_repo.py +729 -0
- mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
- mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
- mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
- mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
- mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
- mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
- mlflow/store/artifact/ftp_artifact_repo.py +132 -0
- mlflow/store/artifact/gcs_artifact_repo.py +296 -0
- mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
- mlflow/store/artifact/http_artifact_repo.py +218 -0
- mlflow/store/artifact/local_artifact_repo.py +142 -0
- mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
- mlflow/store/artifact/models_artifact_repo.py +259 -0
- mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
- mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
- mlflow/store/artifact/r2_artifact_repo.py +70 -0
- mlflow/store/artifact/runs_artifact_repo.py +265 -0
- mlflow/store/artifact/s3_artifact_repo.py +330 -0
- mlflow/store/artifact/sftp_artifact_repo.py +141 -0
- mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
- mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
- mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
- mlflow/store/artifact/utils/__init__.py +0 -0
- mlflow/store/artifact/utils/models.py +148 -0
- mlflow/store/db/__init__.py +0 -0
- mlflow/store/db/base_sql_model.py +3 -0
- mlflow/store/db/db_types.py +10 -0
- mlflow/store/db/utils.py +314 -0
- mlflow/store/db_migrations/__init__.py +0 -0
- mlflow/store/db_migrations/alembic.ini +74 -0
- mlflow/store/db_migrations/env.py +84 -0
- mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
- mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
- mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
- mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
- mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
- mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
- mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
- mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
- mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
- mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
- mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
- mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
- mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
- mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
- mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
- mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
- mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
- mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
- mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
- mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
- mlflow/store/db_migrations/versions/__init__.py +0 -0
- mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
- mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
- mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
- mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
- mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
- mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
- mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
- mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
- mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
- mlflow/store/entities/__init__.py +3 -0
- mlflow/store/entities/paged_list.py +18 -0
- mlflow/store/model_registry/__init__.py +10 -0
- mlflow/store/model_registry/abstract_store.py +1081 -0
- mlflow/store/model_registry/base_rest_store.py +44 -0
- mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
- mlflow/store/model_registry/dbmodels/__init__.py +0 -0
- mlflow/store/model_registry/dbmodels/models.py +206 -0
- mlflow/store/model_registry/file_store.py +1091 -0
- mlflow/store/model_registry/rest_store.py +481 -0
- mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
- mlflow/store/tracking/__init__.py +23 -0
- mlflow/store/tracking/abstract_store.py +816 -0
- mlflow/store/tracking/dbmodels/__init__.py +0 -0
- mlflow/store/tracking/dbmodels/initial_models.py +243 -0
- mlflow/store/tracking/dbmodels/models.py +1073 -0
- mlflow/store/tracking/file_store.py +2438 -0
- mlflow/store/tracking/postgres_managed_identity.py +146 -0
- mlflow/store/tracking/rest_store.py +1131 -0
- mlflow/store/tracking/sqlalchemy_store.py +2785 -0
- mlflow/system_metrics/__init__.py +61 -0
- mlflow/system_metrics/metrics/__init__.py +0 -0
- mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
- mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
- mlflow/system_metrics/metrics/disk_monitor.py +21 -0
- mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
- mlflow/system_metrics/metrics/network_monitor.py +34 -0
- mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
- mlflow/system_metrics/system_metrics_monitor.py +198 -0
- mlflow/tracing/__init__.py +16 -0
- mlflow/tracing/assessment.py +356 -0
- mlflow/tracing/client.py +531 -0
- mlflow/tracing/config.py +125 -0
- mlflow/tracing/constant.py +105 -0
- mlflow/tracing/destination.py +81 -0
- mlflow/tracing/display/__init__.py +40 -0
- mlflow/tracing/display/display_handler.py +196 -0
- mlflow/tracing/export/async_export_queue.py +186 -0
- mlflow/tracing/export/inference_table.py +138 -0
- mlflow/tracing/export/mlflow_v3.py +137 -0
- mlflow/tracing/export/utils.py +70 -0
- mlflow/tracing/fluent.py +1417 -0
- mlflow/tracing/processor/base_mlflow.py +199 -0
- mlflow/tracing/processor/inference_table.py +175 -0
- mlflow/tracing/processor/mlflow_v3.py +47 -0
- mlflow/tracing/processor/otel.py +73 -0
- mlflow/tracing/provider.py +487 -0
- mlflow/tracing/trace_manager.py +200 -0
- mlflow/tracing/utils/__init__.py +616 -0
- mlflow/tracing/utils/artifact_utils.py +28 -0
- mlflow/tracing/utils/copy.py +55 -0
- mlflow/tracing/utils/environment.py +55 -0
- mlflow/tracing/utils/exception.py +21 -0
- mlflow/tracing/utils/once.py +35 -0
- mlflow/tracing/utils/otlp.py +63 -0
- mlflow/tracing/utils/processor.py +54 -0
- mlflow/tracing/utils/search.py +292 -0
- mlflow/tracing/utils/timeout.py +250 -0
- mlflow/tracing/utils/token.py +19 -0
- mlflow/tracing/utils/truncation.py +124 -0
- mlflow/tracing/utils/warning.py +76 -0
- mlflow/tracking/__init__.py +39 -0
- mlflow/tracking/_model_registry/__init__.py +1 -0
- mlflow/tracking/_model_registry/client.py +764 -0
- mlflow/tracking/_model_registry/fluent.py +853 -0
- mlflow/tracking/_model_registry/registry.py +67 -0
- mlflow/tracking/_model_registry/utils.py +251 -0
- mlflow/tracking/_tracking_service/__init__.py +0 -0
- mlflow/tracking/_tracking_service/client.py +883 -0
- mlflow/tracking/_tracking_service/registry.py +56 -0
- mlflow/tracking/_tracking_service/utils.py +275 -0
- mlflow/tracking/artifact_utils.py +179 -0
- mlflow/tracking/client.py +5900 -0
- mlflow/tracking/context/__init__.py +0 -0
- mlflow/tracking/context/abstract_context.py +35 -0
- mlflow/tracking/context/databricks_cluster_context.py +15 -0
- mlflow/tracking/context/databricks_command_context.py +15 -0
- mlflow/tracking/context/databricks_job_context.py +49 -0
- mlflow/tracking/context/databricks_notebook_context.py +41 -0
- mlflow/tracking/context/databricks_repo_context.py +43 -0
- mlflow/tracking/context/default_context.py +51 -0
- mlflow/tracking/context/git_context.py +32 -0
- mlflow/tracking/context/registry.py +98 -0
- mlflow/tracking/context/system_environment_context.py +15 -0
- mlflow/tracking/default_experiment/__init__.py +1 -0
- mlflow/tracking/default_experiment/abstract_context.py +43 -0
- mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
- mlflow/tracking/default_experiment/registry.py +75 -0
- mlflow/tracking/fluent.py +3595 -0
- mlflow/tracking/metric_value_conversion_utils.py +93 -0
- mlflow/tracking/multimedia.py +206 -0
- mlflow/tracking/registry.py +86 -0
- mlflow/tracking/request_auth/__init__.py +0 -0
- mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
- mlflow/tracking/request_auth/registry.py +60 -0
- mlflow/tracking/request_header/__init__.py +0 -0
- mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
- mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
- mlflow/tracking/request_header/default_request_header_provider.py +17 -0
- mlflow/tracking/request_header/registry.py +79 -0
- mlflow/transformers/__init__.py +2982 -0
- mlflow/transformers/flavor_config.py +258 -0
- mlflow/transformers/hub_utils.py +83 -0
- mlflow/transformers/llm_inference_utils.py +468 -0
- mlflow/transformers/model_io.py +301 -0
- mlflow/transformers/peft.py +51 -0
- mlflow/transformers/signature.py +183 -0
- mlflow/transformers/torch_utils.py +55 -0
- mlflow/types/__init__.py +21 -0
- mlflow/types/agent.py +270 -0
- mlflow/types/chat.py +240 -0
- mlflow/types/llm.py +935 -0
- mlflow/types/responses.py +139 -0
- mlflow/types/responses_helpers.py +416 -0
- mlflow/types/schema.py +1505 -0
- mlflow/types/type_hints.py +647 -0
- mlflow/types/utils.py +753 -0
- mlflow/utils/__init__.py +283 -0
- mlflow/utils/_capture_modules.py +256 -0
- mlflow/utils/_capture_transformers_modules.py +75 -0
- mlflow/utils/_spark_utils.py +201 -0
- mlflow/utils/_unity_catalog_oss_utils.py +97 -0
- mlflow/utils/_unity_catalog_utils.py +479 -0
- mlflow/utils/annotations.py +218 -0
- mlflow/utils/arguments_utils.py +16 -0
- mlflow/utils/async_logging/__init__.py +1 -0
- mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
- mlflow/utils/async_logging/async_logging_queue.py +366 -0
- mlflow/utils/async_logging/run_artifact.py +38 -0
- mlflow/utils/async_logging/run_batch.py +58 -0
- mlflow/utils/async_logging/run_operations.py +49 -0
- mlflow/utils/autologging_utils/__init__.py +737 -0
- mlflow/utils/autologging_utils/client.py +432 -0
- mlflow/utils/autologging_utils/config.py +33 -0
- mlflow/utils/autologging_utils/events.py +294 -0
- mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
- mlflow/utils/autologging_utils/metrics_queue.py +71 -0
- mlflow/utils/autologging_utils/safety.py +1104 -0
- mlflow/utils/autologging_utils/versioning.py +95 -0
- mlflow/utils/checkpoint_utils.py +206 -0
- mlflow/utils/class_utils.py +6 -0
- mlflow/utils/cli_args.py +257 -0
- mlflow/utils/conda.py +354 -0
- mlflow/utils/credentials.py +231 -0
- mlflow/utils/data_utils.py +17 -0
- mlflow/utils/databricks_utils.py +1436 -0
- mlflow/utils/docstring_utils.py +477 -0
- mlflow/utils/doctor.py +133 -0
- mlflow/utils/download_cloud_file_chunk.py +43 -0
- mlflow/utils/env_manager.py +16 -0
- mlflow/utils/env_pack.py +131 -0
- mlflow/utils/environment.py +1009 -0
- mlflow/utils/exception_utils.py +14 -0
- mlflow/utils/file_utils.py +978 -0
- mlflow/utils/git_utils.py +77 -0
- mlflow/utils/gorilla.py +797 -0
- mlflow/utils/import_hooks/__init__.py +363 -0
- mlflow/utils/lazy_load.py +51 -0
- mlflow/utils/logging_utils.py +168 -0
- mlflow/utils/mime_type_utils.py +58 -0
- mlflow/utils/mlflow_tags.py +103 -0
- mlflow/utils/model_utils.py +486 -0
- mlflow/utils/name_utils.py +346 -0
- mlflow/utils/nfs_on_spark.py +62 -0
- mlflow/utils/openai_utils.py +164 -0
- mlflow/utils/os.py +12 -0
- mlflow/utils/oss_registry_utils.py +29 -0
- mlflow/utils/plugins.py +17 -0
- mlflow/utils/process.py +182 -0
- mlflow/utils/promptlab_utils.py +146 -0
- mlflow/utils/proto_json_utils.py +743 -0
- mlflow/utils/pydantic_utils.py +54 -0
- mlflow/utils/request_utils.py +279 -0
- mlflow/utils/requirements_utils.py +704 -0
- mlflow/utils/rest_utils.py +673 -0
- mlflow/utils/search_logged_model_utils.py +127 -0
- mlflow/utils/search_utils.py +2111 -0
- mlflow/utils/secure_loading.py +221 -0
- mlflow/utils/security_validation.py +384 -0
- mlflow/utils/server_cli_utils.py +61 -0
- mlflow/utils/spark_utils.py +15 -0
- mlflow/utils/string_utils.py +138 -0
- mlflow/utils/thread_utils.py +63 -0
- mlflow/utils/time.py +54 -0
- mlflow/utils/timeout.py +42 -0
- mlflow/utils/uri.py +572 -0
- mlflow/utils/validation.py +662 -0
- mlflow/utils/virtualenv.py +458 -0
- mlflow/utils/warnings_utils.py +25 -0
- mlflow/utils/yaml_utils.py +179 -0
- mlflow/version.py +24 -0
@@ -0,0 +1,296 @@
|
|
1
|
+
import datetime
|
2
|
+
import importlib.metadata
|
3
|
+
import os
|
4
|
+
import posixpath
|
5
|
+
import urllib.parse
|
6
|
+
from collections import namedtuple
|
7
|
+
from typing import Optional
|
8
|
+
|
9
|
+
from packaging.version import Version
|
10
|
+
|
11
|
+
from mlflow.entities import FileInfo
|
12
|
+
from mlflow.entities.multipart_upload import (
|
13
|
+
CreateMultipartUploadResponse,
|
14
|
+
MultipartUploadCredential,
|
15
|
+
)
|
16
|
+
from mlflow.environment_variables import (
|
17
|
+
MLFLOW_ARTIFACT_UPLOAD_DOWNLOAD_TIMEOUT,
|
18
|
+
MLFLOW_GCS_DOWNLOAD_CHUNK_SIZE,
|
19
|
+
MLFLOW_GCS_UPLOAD_CHUNK_SIZE,
|
20
|
+
)
|
21
|
+
from mlflow.exceptions import _UnsupportedMultipartUploadException
|
22
|
+
from mlflow.store.artifact.artifact_repo import (
|
23
|
+
ArtifactRepository,
|
24
|
+
MultipartUploadMixin,
|
25
|
+
_retry_with_new_creds,
|
26
|
+
)
|
27
|
+
from mlflow.utils.file_utils import relative_path_to_artifact_path
|
28
|
+
|
29
|
+
GCSMPUArguments = namedtuple("GCSMPUArguments", ["transport", "url", "headers", "content_type"])
|
30
|
+
|
31
|
+
|
32
|
+
class GCSArtifactRepository(ArtifactRepository, MultipartUploadMixin):
|
33
|
+
"""
|
34
|
+
Stores artifacts on Google Cloud Storage.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
artifact_uri: URI of GCS bucket
|
38
|
+
client: Optional. The client to use for GCS operations; a default
|
39
|
+
client object will be created if unspecified, using default
|
40
|
+
credentials as described in https://google-cloud.readthedocs.io/en/latest/core/auth.html
|
41
|
+
"""
|
42
|
+
|
43
|
+
def __init__(
|
44
|
+
self,
|
45
|
+
artifact_uri: str,
|
46
|
+
tracking_uri: Optional[str] = None,
|
47
|
+
client=None,
|
48
|
+
credential_refresh_def=None,
|
49
|
+
) -> None:
|
50
|
+
super().__init__(artifact_uri, tracking_uri)
|
51
|
+
from google.auth.exceptions import DefaultCredentialsError
|
52
|
+
from google.cloud import storage as gcs_storage
|
53
|
+
from google.cloud.storage.constants import _DEFAULT_TIMEOUT
|
54
|
+
|
55
|
+
self._GCS_DOWNLOAD_CHUNK_SIZE = MLFLOW_GCS_DOWNLOAD_CHUNK_SIZE.get()
|
56
|
+
self._GCS_UPLOAD_CHUNK_SIZE = MLFLOW_GCS_UPLOAD_CHUNK_SIZE.get()
|
57
|
+
self._GCS_DEFAULT_TIMEOUT = (
|
58
|
+
MLFLOW_ARTIFACT_UPLOAD_DOWNLOAD_TIMEOUT.get() or _DEFAULT_TIMEOUT
|
59
|
+
)
|
60
|
+
# Method to use for refresh
|
61
|
+
self.credential_refresh_def = credential_refresh_def
|
62
|
+
# If the user-supplied timeout environment variable value is -1,
|
63
|
+
# use `None` for `self._GCS_DEFAULT_TIMEOUT`
|
64
|
+
# to use indefinite timeout
|
65
|
+
self._GCS_DEFAULT_TIMEOUT = (
|
66
|
+
None if self._GCS_DEFAULT_TIMEOUT == -1 else self._GCS_DEFAULT_TIMEOUT
|
67
|
+
)
|
68
|
+
if client is not None:
|
69
|
+
self.client = client
|
70
|
+
else:
|
71
|
+
try:
|
72
|
+
self.client = gcs_storage.Client()
|
73
|
+
except DefaultCredentialsError:
|
74
|
+
self.client = gcs_storage.Client.create_anonymous_client()
|
75
|
+
|
76
|
+
@staticmethod
|
77
|
+
def parse_gcs_uri(uri):
|
78
|
+
"""Parse an GCS URI, returning (bucket, path)"""
|
79
|
+
parsed = urllib.parse.urlparse(uri)
|
80
|
+
if parsed.scheme != "gs":
|
81
|
+
raise Exception(f"Not a GCS URI: {uri}")
|
82
|
+
path = parsed.path
|
83
|
+
if path.startswith("/"):
|
84
|
+
path = path[1:]
|
85
|
+
return parsed.netloc, path
|
86
|
+
|
87
|
+
def _get_bucket(self, bucket):
|
88
|
+
return self.client.bucket(bucket)
|
89
|
+
|
90
|
+
def _refresh_credentials(self):
|
91
|
+
from google.cloud.storage import Client
|
92
|
+
from google.oauth2.credentials import Credentials
|
93
|
+
|
94
|
+
(bucket, _) = self.parse_gcs_uri(self.artifact_uri)
|
95
|
+
if not self.credential_refresh_def:
|
96
|
+
return self._get_bucket(bucket)
|
97
|
+
new_token = self.credential_refresh_def()
|
98
|
+
credentials = Credentials(new_token["oauth_token"])
|
99
|
+
self.client = Client(project="mlflow", credentials=credentials)
|
100
|
+
return self._get_bucket(bucket)
|
101
|
+
|
102
|
+
def log_artifact(self, local_file, artifact_path=None):
|
103
|
+
(bucket, dest_path) = self.parse_gcs_uri(self.artifact_uri)
|
104
|
+
if artifact_path:
|
105
|
+
dest_path = posixpath.join(dest_path, artifact_path)
|
106
|
+
dest_path = posixpath.join(dest_path, os.path.basename(local_file))
|
107
|
+
|
108
|
+
gcs_bucket = self._get_bucket(bucket)
|
109
|
+
blob = gcs_bucket.blob(dest_path, chunk_size=self._GCS_UPLOAD_CHUNK_SIZE)
|
110
|
+
blob.upload_from_filename(local_file, timeout=self._GCS_DEFAULT_TIMEOUT)
|
111
|
+
|
112
|
+
def log_artifacts(self, local_dir, artifact_path=None):
|
113
|
+
(bucket, dest_path) = self.parse_gcs_uri(self.artifact_uri)
|
114
|
+
if artifact_path:
|
115
|
+
dest_path = posixpath.join(dest_path, artifact_path)
|
116
|
+
|
117
|
+
local_dir = os.path.abspath(local_dir)
|
118
|
+
|
119
|
+
for root, _, filenames in os.walk(local_dir):
|
120
|
+
upload_path = dest_path
|
121
|
+
if root != local_dir:
|
122
|
+
rel_path = os.path.relpath(root, local_dir)
|
123
|
+
rel_path = relative_path_to_artifact_path(rel_path)
|
124
|
+
upload_path = posixpath.join(dest_path, rel_path)
|
125
|
+
for f in filenames:
|
126
|
+
gcs_bucket = self._get_bucket(bucket)
|
127
|
+
path = posixpath.join(upload_path, f)
|
128
|
+
# For large models, we need to speculatively retry a credential refresh
|
129
|
+
# and throw if it still fails. We cannot use the built-in refresh because UC
|
130
|
+
# does not return a refresh token with the oauth token
|
131
|
+
file_name = os.path.join(root, f)
|
132
|
+
|
133
|
+
def try_func(gcs_bucket):
|
134
|
+
gcs_bucket.blob(
|
135
|
+
path, chunk_size=self._GCS_UPLOAD_CHUNK_SIZE
|
136
|
+
).upload_from_filename(file_name, timeout=self._GCS_DEFAULT_TIMEOUT)
|
137
|
+
|
138
|
+
_retry_with_new_creds(
|
139
|
+
try_func=try_func, creds_func=self._refresh_credentials, orig_creds=gcs_bucket
|
140
|
+
)
|
141
|
+
|
142
|
+
def list_artifacts(self, path=None):
|
143
|
+
(bucket, artifact_path) = self.parse_gcs_uri(self.artifact_uri)
|
144
|
+
dest_path = artifact_path
|
145
|
+
if path:
|
146
|
+
dest_path = posixpath.join(dest_path, path)
|
147
|
+
prefix = dest_path if dest_path.endswith("/") else dest_path + "/"
|
148
|
+
|
149
|
+
bkt = self._get_bucket(bucket)
|
150
|
+
|
151
|
+
infos = self._list_folders(bkt, prefix, artifact_path)
|
152
|
+
|
153
|
+
results = bkt.list_blobs(prefix=prefix, delimiter="/")
|
154
|
+
for result in results:
|
155
|
+
# skip blobs matching current directory path as list_blobs api
|
156
|
+
# returns subdirectories as well
|
157
|
+
if result.name == prefix:
|
158
|
+
continue
|
159
|
+
blob_path = result.name[len(artifact_path) + 1 :]
|
160
|
+
infos.append(FileInfo(blob_path, False, result.size))
|
161
|
+
|
162
|
+
return sorted(infos, key=lambda f: f.path)
|
163
|
+
|
164
|
+
def _list_folders(self, bkt, prefix, artifact_path):
|
165
|
+
results = bkt.list_blobs(prefix=prefix, delimiter="/")
|
166
|
+
dir_paths = set()
|
167
|
+
for page in results.pages:
|
168
|
+
dir_paths.update(page.prefixes)
|
169
|
+
|
170
|
+
return [FileInfo(path[len(artifact_path) + 1 : -1], True, None) for path in dir_paths]
|
171
|
+
|
172
|
+
def _download_file(self, remote_file_path, local_path):
|
173
|
+
(bucket, remote_root_path) = self.parse_gcs_uri(self.artifact_uri)
|
174
|
+
remote_full_path = posixpath.join(remote_root_path, remote_file_path)
|
175
|
+
gcs_bucket = self._get_bucket(bucket)
|
176
|
+
gcs_bucket.blob(
|
177
|
+
remote_full_path, chunk_size=self._GCS_DOWNLOAD_CHUNK_SIZE
|
178
|
+
).download_to_filename(local_path, timeout=self._GCS_DEFAULT_TIMEOUT)
|
179
|
+
|
180
|
+
def delete_artifacts(self, artifact_path=None):
|
181
|
+
(bucket_name, dest_path) = self.parse_gcs_uri(self.artifact_uri)
|
182
|
+
if artifact_path:
|
183
|
+
dest_path = posixpath.join(dest_path, artifact_path)
|
184
|
+
|
185
|
+
gcs_bucket = self._get_bucket(bucket_name)
|
186
|
+
blobs = gcs_bucket.list_blobs(prefix=f"{dest_path}")
|
187
|
+
for blob in blobs:
|
188
|
+
blob.delete()
|
189
|
+
|
190
|
+
@staticmethod
|
191
|
+
def _validate_support_mpu():
|
192
|
+
if Version(importlib.metadata.version("google-cloud-storage")) < Version(
|
193
|
+
"2.12.0"
|
194
|
+
) or Version(importlib.metadata.version("google-resumable-media")) < Version("2.6.0"):
|
195
|
+
raise _UnsupportedMultipartUploadException()
|
196
|
+
|
197
|
+
@staticmethod
|
198
|
+
def _gcs_mpu_arguments(filename: str, blob) -> GCSMPUArguments:
|
199
|
+
"""See :py:func:`google.cloud.storage.transfer_manager.upload_chunks_concurrently`"""
|
200
|
+
from google.cloud.storage.transfer_manager import _headers_from_metadata
|
201
|
+
|
202
|
+
bucket = blob.bucket
|
203
|
+
client = blob.client
|
204
|
+
transport = blob._get_transport(client)
|
205
|
+
|
206
|
+
hostname = client._connection.get_api_base_url_for_mtls()
|
207
|
+
url = f"{hostname}/{bucket.name}/{blob.name}"
|
208
|
+
|
209
|
+
base_headers, object_metadata, content_type = blob._get_upload_arguments(
|
210
|
+
client, None, filename=filename, command="tm.upload_sharded"
|
211
|
+
)
|
212
|
+
headers = {**base_headers, **_headers_from_metadata(object_metadata)}
|
213
|
+
|
214
|
+
if blob.user_project is not None:
|
215
|
+
headers["x-goog-user-project"] = blob.user_project
|
216
|
+
|
217
|
+
if blob.kms_key_name is not None and "cryptoKeyVersions" not in blob.kms_key_name:
|
218
|
+
headers["x-goog-encryption-kms-key-name"] = blob.kms_key_name
|
219
|
+
|
220
|
+
return GCSMPUArguments(
|
221
|
+
transport=transport, url=url, headers=headers, content_type=content_type
|
222
|
+
)
|
223
|
+
|
224
|
+
def create_multipart_upload(self, local_file, num_parts=1, artifact_path=None):
|
225
|
+
self._validate_support_mpu()
|
226
|
+
from google.resumable_media.requests import XMLMPUContainer
|
227
|
+
|
228
|
+
(bucket, dest_path) = self.parse_gcs_uri(self.artifact_uri)
|
229
|
+
if artifact_path:
|
230
|
+
dest_path = posixpath.join(dest_path, artifact_path)
|
231
|
+
dest_path = posixpath.join(dest_path, os.path.basename(local_file))
|
232
|
+
|
233
|
+
gcs_bucket = self._get_bucket(bucket)
|
234
|
+
blob = gcs_bucket.blob(dest_path)
|
235
|
+
args = self._gcs_mpu_arguments(local_file, blob)
|
236
|
+
container = XMLMPUContainer(args.url, local_file, headers=args.headers)
|
237
|
+
container.initiate(transport=args.transport, content_type=args.content_type)
|
238
|
+
upload_id = container.upload_id
|
239
|
+
|
240
|
+
credentials = []
|
241
|
+
for i in range(1, num_parts + 1): # part number must be in [1, 10000]
|
242
|
+
signed_url = blob.generate_signed_url(
|
243
|
+
method="PUT",
|
244
|
+
version="v4",
|
245
|
+
expiration=datetime.timedelta(minutes=60),
|
246
|
+
query_parameters={
|
247
|
+
"partNumber": i,
|
248
|
+
"uploadId": upload_id,
|
249
|
+
},
|
250
|
+
)
|
251
|
+
credentials.append(
|
252
|
+
MultipartUploadCredential(
|
253
|
+
url=signed_url,
|
254
|
+
part_number=i,
|
255
|
+
headers={},
|
256
|
+
)
|
257
|
+
)
|
258
|
+
return CreateMultipartUploadResponse(
|
259
|
+
credentials=credentials,
|
260
|
+
upload_id=upload_id,
|
261
|
+
)
|
262
|
+
|
263
|
+
def complete_multipart_upload(self, local_file, upload_id, parts=None, artifact_path=None):
|
264
|
+
self._validate_support_mpu()
|
265
|
+
from google.resumable_media.requests import XMLMPUContainer
|
266
|
+
|
267
|
+
(bucket, dest_path) = self.parse_gcs_uri(self.artifact_uri)
|
268
|
+
if artifact_path:
|
269
|
+
dest_path = posixpath.join(dest_path, artifact_path)
|
270
|
+
dest_path = posixpath.join(dest_path, os.path.basename(local_file))
|
271
|
+
|
272
|
+
gcs_bucket = self._get_bucket(bucket)
|
273
|
+
blob = gcs_bucket.blob(dest_path)
|
274
|
+
args = self._gcs_mpu_arguments(local_file, blob)
|
275
|
+
container = XMLMPUContainer(args.url, local_file, headers=args.headers)
|
276
|
+
container._upload_id = upload_id
|
277
|
+
for part in parts:
|
278
|
+
container.register_part(part.part_number, part.etag)
|
279
|
+
|
280
|
+
container.finalize(transport=args.transport)
|
281
|
+
|
282
|
+
def abort_multipart_upload(self, local_file, upload_id, artifact_path=None):
|
283
|
+
self._validate_support_mpu()
|
284
|
+
from google.resumable_media.requests import XMLMPUContainer
|
285
|
+
|
286
|
+
(bucket, dest_path) = self.parse_gcs_uri(self.artifact_uri)
|
287
|
+
if artifact_path:
|
288
|
+
dest_path = posixpath.join(dest_path, artifact_path)
|
289
|
+
dest_path = posixpath.join(dest_path, os.path.basename(local_file))
|
290
|
+
|
291
|
+
gcs_bucket = self._get_bucket(bucket)
|
292
|
+
blob = gcs_bucket.blob(dest_path)
|
293
|
+
args = self._gcs_mpu_arguments(local_file, blob)
|
294
|
+
container = XMLMPUContainer(args.url, local_file, headers=args.headers)
|
295
|
+
container._upload_id = upload_id
|
296
|
+
container.cancel(transport=args.transport)
|
@@ -0,0 +1,209 @@
|
|
1
|
+
import os
|
2
|
+
import posixpath
|
3
|
+
import urllib.parse
|
4
|
+
from contextlib import contextmanager
|
5
|
+
from typing import Optional
|
6
|
+
|
7
|
+
try:
|
8
|
+
from pyarrow.fs import FileSelector, FileType, HadoopFileSystem
|
9
|
+
except ImportError:
|
10
|
+
pass
|
11
|
+
|
12
|
+
from mlflow.entities import FileInfo
|
13
|
+
from mlflow.environment_variables import (
|
14
|
+
MLFLOW_KERBEROS_TICKET_CACHE,
|
15
|
+
MLFLOW_KERBEROS_USER,
|
16
|
+
MLFLOW_PYARROW_EXTRA_CONF,
|
17
|
+
)
|
18
|
+
from mlflow.store.artifact.artifact_repo import ArtifactRepository
|
19
|
+
from mlflow.utils.file_utils import relative_path_to_artifact_path
|
20
|
+
|
21
|
+
|
22
|
+
class HdfsArtifactRepository(ArtifactRepository):
|
23
|
+
"""
|
24
|
+
Stores artifacts on HDFS.
|
25
|
+
|
26
|
+
This repository is used with URIs of the form ``hdfs:/<path>``. The repository can only be used
|
27
|
+
together with the RestStore.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(self, artifact_uri: str, tracking_uri: Optional[str] = None) -> None:
|
31
|
+
super().__init__(artifact_uri, tracking_uri)
|
32
|
+
self.scheme, self.host, self.port, self.path = _resolve_connection_params(artifact_uri)
|
33
|
+
|
34
|
+
def log_artifact(self, local_file, artifact_path=None):
|
35
|
+
"""
|
36
|
+
Log artifact in hdfs.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
local_file: Source file path.
|
40
|
+
artifact_path: When specified will attempt to write under artifact_uri/artifact_path.
|
41
|
+
"""
|
42
|
+
hdfs_base_path = _resolve_base_path(self.path, artifact_path)
|
43
|
+
|
44
|
+
with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:
|
45
|
+
_, file_name = os.path.split(local_file)
|
46
|
+
destination_path = posixpath.join(hdfs_base_path, file_name)
|
47
|
+
with open(local_file, "rb") as source:
|
48
|
+
with hdfs.open_output_stream(destination_path) as destination:
|
49
|
+
destination.write(source.read())
|
50
|
+
|
51
|
+
def log_artifacts(self, local_dir, artifact_path=None):
|
52
|
+
"""
|
53
|
+
Log artifacts in hdfs.
|
54
|
+
Missing remote sub-directories will be created if needed.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
local_dir: Source dir path.
|
58
|
+
artifact_path: When specified will attempt to write under artifact_uri/artifact_path.
|
59
|
+
"""
|
60
|
+
hdfs_base_path = _resolve_base_path(self.path, artifact_path)
|
61
|
+
|
62
|
+
with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:
|
63
|
+
if not hdfs.get_file_info(hdfs_base_path).type == FileType.Directory:
|
64
|
+
hdfs.create_dir(hdfs_base_path, recursive=True)
|
65
|
+
|
66
|
+
for subdir_path, _, files in os.walk(local_dir):
|
67
|
+
relative_path = _relative_path_local(local_dir, subdir_path)
|
68
|
+
|
69
|
+
hdfs_subdir_path = (
|
70
|
+
posixpath.join(hdfs_base_path, relative_path)
|
71
|
+
if relative_path
|
72
|
+
else hdfs_base_path
|
73
|
+
)
|
74
|
+
|
75
|
+
if not hdfs.get_file_info(hdfs_subdir_path).type == FileType.Directory:
|
76
|
+
hdfs.create_dir(hdfs_subdir_path, recursive=True)
|
77
|
+
|
78
|
+
for each_file in files:
|
79
|
+
source_path = os.path.join(subdir_path, each_file)
|
80
|
+
destination_path = posixpath.join(hdfs_subdir_path, each_file)
|
81
|
+
with open(source_path, "rb") as source:
|
82
|
+
with hdfs.open_output_stream(destination_path) as destination:
|
83
|
+
destination.write(source.read())
|
84
|
+
|
85
|
+
def list_artifacts(self, path=None):
|
86
|
+
"""
|
87
|
+
Lists files and directories under artifacts directory for the current run_id.
|
88
|
+
(self.path contains the base path - hdfs:/some/path/run_id/artifacts)
|
89
|
+
|
90
|
+
Args:
|
91
|
+
path: Relative source path. Possible subdirectory existing under
|
92
|
+
hdfs:/some/path/run_id/artifacts
|
93
|
+
|
94
|
+
Returns:
|
95
|
+
List of FileInfos under given path
|
96
|
+
"""
|
97
|
+
hdfs_base_path = _resolve_base_path(self.path, path)
|
98
|
+
|
99
|
+
with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:
|
100
|
+
paths = []
|
101
|
+
base_info = hdfs.get_file_info(hdfs_base_path)
|
102
|
+
if base_info.type == FileType.Directory:
|
103
|
+
selector = FileSelector(hdfs_base_path)
|
104
|
+
elif base_info.type == FileType.File:
|
105
|
+
selector = [hdfs_base_path]
|
106
|
+
else:
|
107
|
+
return []
|
108
|
+
|
109
|
+
for file_detail in hdfs.get_file_info(selector):
|
110
|
+
file_name = file_detail.path
|
111
|
+
|
112
|
+
# file_name is hdfs_base_path and not a child of that path
|
113
|
+
if file_name == hdfs_base_path:
|
114
|
+
continue
|
115
|
+
|
116
|
+
# Strip off anything that comes before the artifact root e.g. hdfs://name
|
117
|
+
offset = file_name.index(self.path)
|
118
|
+
rel_path = _relative_path_remote(self.path, file_name[offset:])
|
119
|
+
is_dir = file_detail.type == FileType.Directory
|
120
|
+
size = file_detail.size
|
121
|
+
paths.append(FileInfo(rel_path, is_dir=is_dir, file_size=size))
|
122
|
+
return sorted(paths, key=lambda f: paths)
|
123
|
+
|
124
|
+
def _is_directory(self, artifact_path):
|
125
|
+
hdfs_base_path = _resolve_base_path(self.path, artifact_path)
|
126
|
+
with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:
|
127
|
+
return hdfs.get_file_info(hdfs_base_path).type == FileType.Directory
|
128
|
+
|
129
|
+
def _download_file(self, remote_file_path, local_path):
|
130
|
+
hdfs_base_path = _resolve_base_path(self.path, remote_file_path)
|
131
|
+
with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:
|
132
|
+
with hdfs.open_input_stream(hdfs_base_path) as source:
|
133
|
+
with open(local_path, "wb") as destination:
|
134
|
+
destination.write(source.read())
|
135
|
+
|
136
|
+
def delete_artifacts(self, artifact_path=None):
|
137
|
+
path = posixpath.join(self.path, artifact_path) if artifact_path else self.path
|
138
|
+
with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:
|
139
|
+
file_info = hdfs.get_file_info(path)
|
140
|
+
if file_info.type == FileType.File:
|
141
|
+
hdfs.delete_file(path)
|
142
|
+
elif file_info.type == FileType.Directory:
|
143
|
+
hdfs.delete_dir_contents(path)
|
144
|
+
|
145
|
+
|
146
|
+
@contextmanager
|
147
|
+
def hdfs_system(scheme, host, port):
|
148
|
+
"""
|
149
|
+
hdfs system context - Attempt to establish the connection to hdfs
|
150
|
+
and yields HadoopFileSystem
|
151
|
+
|
152
|
+
Args:
|
153
|
+
scheme: scheme or use hdfs:// as default
|
154
|
+
host: hostname or when relaying on the core-site.xml config use 'default'
|
155
|
+
port: port or when relaying on the core-site.xml config use 0
|
156
|
+
"""
|
157
|
+
kerb_ticket = MLFLOW_KERBEROS_TICKET_CACHE.get()
|
158
|
+
kerberos_user = MLFLOW_KERBEROS_USER.get()
|
159
|
+
extra_conf = _parse_extra_conf(MLFLOW_PYARROW_EXTRA_CONF.get())
|
160
|
+
|
161
|
+
host = scheme + "://" + host if host else "default"
|
162
|
+
|
163
|
+
yield HadoopFileSystem(
|
164
|
+
host=host,
|
165
|
+
port=port or 0,
|
166
|
+
user=kerberos_user,
|
167
|
+
kerb_ticket=kerb_ticket,
|
168
|
+
extra_conf=extra_conf,
|
169
|
+
)
|
170
|
+
|
171
|
+
|
172
|
+
def _resolve_connection_params(artifact_uri):
|
173
|
+
parsed = urllib.parse.urlparse(artifact_uri)
|
174
|
+
|
175
|
+
return parsed.scheme, parsed.hostname, parsed.port, parsed.path
|
176
|
+
|
177
|
+
|
178
|
+
def _resolve_base_path(path, artifact_path):
|
179
|
+
if path == artifact_path:
|
180
|
+
return path
|
181
|
+
if artifact_path:
|
182
|
+
return posixpath.join(path, artifact_path)
|
183
|
+
return path
|
184
|
+
|
185
|
+
|
186
|
+
def _relative_path(base_dir, subdir_path, path_module):
|
187
|
+
relative_path = path_module.relpath(subdir_path, base_dir)
|
188
|
+
return relative_path if relative_path != "." else None
|
189
|
+
|
190
|
+
|
191
|
+
def _relative_path_local(base_dir, subdir_path):
|
192
|
+
rel_path = _relative_path(base_dir, subdir_path, os.path)
|
193
|
+
return relative_path_to_artifact_path(rel_path) if rel_path is not None else None
|
194
|
+
|
195
|
+
|
196
|
+
def _relative_path_remote(base_dir, subdir_path):
|
197
|
+
return _relative_path(base_dir, subdir_path, posixpath)
|
198
|
+
|
199
|
+
|
200
|
+
def _parse_extra_conf(extra_conf):
|
201
|
+
if extra_conf:
|
202
|
+
|
203
|
+
def as_pair(config):
|
204
|
+
key, val = config.split("=")
|
205
|
+
return key, val
|
206
|
+
|
207
|
+
list_of_key_val = [as_pair(conf) for conf in extra_conf.split(",")]
|
208
|
+
return dict(list_of_key_val)
|
209
|
+
return None
|