genesis-flow 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genesis_flow-1.0.0.dist-info/METADATA +822 -0
- genesis_flow-1.0.0.dist-info/RECORD +645 -0
- genesis_flow-1.0.0.dist-info/WHEEL +5 -0
- genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
- genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
- genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
- mlflow/__init__.py +367 -0
- mlflow/__main__.py +3 -0
- mlflow/ag2/__init__.py +56 -0
- mlflow/ag2/ag2_logger.py +294 -0
- mlflow/anthropic/__init__.py +40 -0
- mlflow/anthropic/autolog.py +129 -0
- mlflow/anthropic/chat.py +144 -0
- mlflow/artifacts/__init__.py +268 -0
- mlflow/autogen/__init__.py +144 -0
- mlflow/autogen/chat.py +142 -0
- mlflow/azure/__init__.py +26 -0
- mlflow/azure/auth_handler.py +257 -0
- mlflow/azure/client.py +319 -0
- mlflow/azure/config.py +120 -0
- mlflow/azure/connection_factory.py +340 -0
- mlflow/azure/exceptions.py +27 -0
- mlflow/azure/stores.py +327 -0
- mlflow/azure/utils.py +183 -0
- mlflow/bedrock/__init__.py +45 -0
- mlflow/bedrock/_autolog.py +202 -0
- mlflow/bedrock/chat.py +122 -0
- mlflow/bedrock/stream.py +160 -0
- mlflow/bedrock/utils.py +43 -0
- mlflow/cli.py +707 -0
- mlflow/client.py +12 -0
- mlflow/config/__init__.py +56 -0
- mlflow/crewai/__init__.py +79 -0
- mlflow/crewai/autolog.py +253 -0
- mlflow/crewai/chat.py +29 -0
- mlflow/data/__init__.py +75 -0
- mlflow/data/artifact_dataset_sources.py +170 -0
- mlflow/data/code_dataset_source.py +40 -0
- mlflow/data/dataset.py +123 -0
- mlflow/data/dataset_registry.py +168 -0
- mlflow/data/dataset_source.py +110 -0
- mlflow/data/dataset_source_registry.py +219 -0
- mlflow/data/delta_dataset_source.py +167 -0
- mlflow/data/digest_utils.py +108 -0
- mlflow/data/evaluation_dataset.py +562 -0
- mlflow/data/filesystem_dataset_source.py +81 -0
- mlflow/data/http_dataset_source.py +145 -0
- mlflow/data/huggingface_dataset.py +258 -0
- mlflow/data/huggingface_dataset_source.py +118 -0
- mlflow/data/meta_dataset.py +104 -0
- mlflow/data/numpy_dataset.py +223 -0
- mlflow/data/pandas_dataset.py +231 -0
- mlflow/data/polars_dataset.py +352 -0
- mlflow/data/pyfunc_dataset_mixin.py +31 -0
- mlflow/data/schema.py +76 -0
- mlflow/data/sources.py +1 -0
- mlflow/data/spark_dataset.py +406 -0
- mlflow/data/spark_dataset_source.py +74 -0
- mlflow/data/spark_delta_utils.py +118 -0
- mlflow/data/tensorflow_dataset.py +350 -0
- mlflow/data/uc_volume_dataset_source.py +81 -0
- mlflow/db.py +27 -0
- mlflow/dspy/__init__.py +17 -0
- mlflow/dspy/autolog.py +197 -0
- mlflow/dspy/callback.py +398 -0
- mlflow/dspy/constant.py +1 -0
- mlflow/dspy/load.py +93 -0
- mlflow/dspy/save.py +393 -0
- mlflow/dspy/util.py +109 -0
- mlflow/dspy/wrapper.py +226 -0
- mlflow/entities/__init__.py +104 -0
- mlflow/entities/_mlflow_object.py +52 -0
- mlflow/entities/assessment.py +545 -0
- mlflow/entities/assessment_error.py +80 -0
- mlflow/entities/assessment_source.py +141 -0
- mlflow/entities/dataset.py +92 -0
- mlflow/entities/dataset_input.py +51 -0
- mlflow/entities/dataset_summary.py +62 -0
- mlflow/entities/document.py +48 -0
- mlflow/entities/experiment.py +109 -0
- mlflow/entities/experiment_tag.py +35 -0
- mlflow/entities/file_info.py +45 -0
- mlflow/entities/input_tag.py +35 -0
- mlflow/entities/lifecycle_stage.py +35 -0
- mlflow/entities/logged_model.py +228 -0
- mlflow/entities/logged_model_input.py +26 -0
- mlflow/entities/logged_model_output.py +32 -0
- mlflow/entities/logged_model_parameter.py +46 -0
- mlflow/entities/logged_model_status.py +74 -0
- mlflow/entities/logged_model_tag.py +33 -0
- mlflow/entities/metric.py +200 -0
- mlflow/entities/model_registry/__init__.py +29 -0
- mlflow/entities/model_registry/_model_registry_entity.py +13 -0
- mlflow/entities/model_registry/model_version.py +243 -0
- mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
- mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
- mlflow/entities/model_registry/model_version_search.py +25 -0
- mlflow/entities/model_registry/model_version_stages.py +25 -0
- mlflow/entities/model_registry/model_version_status.py +35 -0
- mlflow/entities/model_registry/model_version_tag.py +35 -0
- mlflow/entities/model_registry/prompt.py +73 -0
- mlflow/entities/model_registry/prompt_version.py +244 -0
- mlflow/entities/model_registry/registered_model.py +175 -0
- mlflow/entities/model_registry/registered_model_alias.py +35 -0
- mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
- mlflow/entities/model_registry/registered_model_search.py +25 -0
- mlflow/entities/model_registry/registered_model_tag.py +35 -0
- mlflow/entities/multipart_upload.py +74 -0
- mlflow/entities/param.py +49 -0
- mlflow/entities/run.py +97 -0
- mlflow/entities/run_data.py +84 -0
- mlflow/entities/run_info.py +188 -0
- mlflow/entities/run_inputs.py +59 -0
- mlflow/entities/run_outputs.py +43 -0
- mlflow/entities/run_status.py +41 -0
- mlflow/entities/run_tag.py +36 -0
- mlflow/entities/source_type.py +31 -0
- mlflow/entities/span.py +774 -0
- mlflow/entities/span_event.py +96 -0
- mlflow/entities/span_status.py +102 -0
- mlflow/entities/trace.py +317 -0
- mlflow/entities/trace_data.py +71 -0
- mlflow/entities/trace_info.py +220 -0
- mlflow/entities/trace_info_v2.py +162 -0
- mlflow/entities/trace_location.py +173 -0
- mlflow/entities/trace_state.py +39 -0
- mlflow/entities/trace_status.py +68 -0
- mlflow/entities/view_type.py +51 -0
- mlflow/environment_variables.py +866 -0
- mlflow/evaluation/__init__.py +16 -0
- mlflow/evaluation/assessment.py +369 -0
- mlflow/evaluation/evaluation.py +411 -0
- mlflow/evaluation/evaluation_tag.py +61 -0
- mlflow/evaluation/fluent.py +48 -0
- mlflow/evaluation/utils.py +201 -0
- mlflow/exceptions.py +213 -0
- mlflow/experiments.py +140 -0
- mlflow/gemini/__init__.py +81 -0
- mlflow/gemini/autolog.py +186 -0
- mlflow/gemini/chat.py +261 -0
- mlflow/genai/__init__.py +71 -0
- mlflow/genai/datasets/__init__.py +67 -0
- mlflow/genai/datasets/evaluation_dataset.py +131 -0
- mlflow/genai/evaluation/__init__.py +3 -0
- mlflow/genai/evaluation/base.py +411 -0
- mlflow/genai/evaluation/constant.py +23 -0
- mlflow/genai/evaluation/utils.py +244 -0
- mlflow/genai/judges/__init__.py +21 -0
- mlflow/genai/judges/databricks.py +404 -0
- mlflow/genai/label_schemas/__init__.py +153 -0
- mlflow/genai/label_schemas/label_schemas.py +209 -0
- mlflow/genai/labeling/__init__.py +159 -0
- mlflow/genai/labeling/labeling.py +250 -0
- mlflow/genai/optimize/__init__.py +13 -0
- mlflow/genai/optimize/base.py +198 -0
- mlflow/genai/optimize/optimizers/__init__.py +4 -0
- mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
- mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
- mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
- mlflow/genai/optimize/types.py +75 -0
- mlflow/genai/optimize/util.py +30 -0
- mlflow/genai/prompts/__init__.py +206 -0
- mlflow/genai/scheduled_scorers.py +431 -0
- mlflow/genai/scorers/__init__.py +26 -0
- mlflow/genai/scorers/base.py +492 -0
- mlflow/genai/scorers/builtin_scorers.py +765 -0
- mlflow/genai/scorers/scorer_utils.py +138 -0
- mlflow/genai/scorers/validation.py +165 -0
- mlflow/genai/utils/data_validation.py +146 -0
- mlflow/genai/utils/enum_utils.py +23 -0
- mlflow/genai/utils/trace_utils.py +211 -0
- mlflow/groq/__init__.py +42 -0
- mlflow/groq/_groq_autolog.py +74 -0
- mlflow/johnsnowlabs/__init__.py +888 -0
- mlflow/langchain/__init__.py +24 -0
- mlflow/langchain/api_request_parallel_processor.py +330 -0
- mlflow/langchain/autolog.py +147 -0
- mlflow/langchain/chat_agent_langgraph.py +340 -0
- mlflow/langchain/constant.py +1 -0
- mlflow/langchain/constants.py +1 -0
- mlflow/langchain/databricks_dependencies.py +444 -0
- mlflow/langchain/langchain_tracer.py +597 -0
- mlflow/langchain/model.py +919 -0
- mlflow/langchain/output_parsers.py +142 -0
- mlflow/langchain/retriever_chain.py +153 -0
- mlflow/langchain/runnables.py +527 -0
- mlflow/langchain/utils/chat.py +402 -0
- mlflow/langchain/utils/logging.py +671 -0
- mlflow/langchain/utils/serialization.py +36 -0
- mlflow/legacy_databricks_cli/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/provider.py +482 -0
- mlflow/litellm/__init__.py +175 -0
- mlflow/llama_index/__init__.py +22 -0
- mlflow/llama_index/autolog.py +55 -0
- mlflow/llama_index/chat.py +43 -0
- mlflow/llama_index/constant.py +1 -0
- mlflow/llama_index/model.py +577 -0
- mlflow/llama_index/pyfunc_wrapper.py +332 -0
- mlflow/llama_index/serialize_objects.py +188 -0
- mlflow/llama_index/tracer.py +561 -0
- mlflow/metrics/__init__.py +479 -0
- mlflow/metrics/base.py +39 -0
- mlflow/metrics/genai/__init__.py +25 -0
- mlflow/metrics/genai/base.py +101 -0
- mlflow/metrics/genai/genai_metric.py +771 -0
- mlflow/metrics/genai/metric_definitions.py +450 -0
- mlflow/metrics/genai/model_utils.py +371 -0
- mlflow/metrics/genai/prompt_template.py +68 -0
- mlflow/metrics/genai/prompts/__init__.py +0 -0
- mlflow/metrics/genai/prompts/v1.py +422 -0
- mlflow/metrics/genai/utils.py +6 -0
- mlflow/metrics/metric_definitions.py +619 -0
- mlflow/mismatch.py +34 -0
- mlflow/mistral/__init__.py +34 -0
- mlflow/mistral/autolog.py +71 -0
- mlflow/mistral/chat.py +135 -0
- mlflow/ml_package_versions.py +452 -0
- mlflow/models/__init__.py +97 -0
- mlflow/models/auth_policy.py +83 -0
- mlflow/models/cli.py +354 -0
- mlflow/models/container/__init__.py +294 -0
- mlflow/models/container/scoring_server/__init__.py +0 -0
- mlflow/models/container/scoring_server/nginx.conf +39 -0
- mlflow/models/dependencies_schemas.py +287 -0
- mlflow/models/display_utils.py +158 -0
- mlflow/models/docker_utils.py +211 -0
- mlflow/models/evaluation/__init__.py +23 -0
- mlflow/models/evaluation/_shap_patch.py +64 -0
- mlflow/models/evaluation/artifacts.py +194 -0
- mlflow/models/evaluation/base.py +1811 -0
- mlflow/models/evaluation/calibration_curve.py +109 -0
- mlflow/models/evaluation/default_evaluator.py +996 -0
- mlflow/models/evaluation/deprecated.py +23 -0
- mlflow/models/evaluation/evaluator_registry.py +80 -0
- mlflow/models/evaluation/evaluators/classifier.py +704 -0
- mlflow/models/evaluation/evaluators/default.py +233 -0
- mlflow/models/evaluation/evaluators/regressor.py +96 -0
- mlflow/models/evaluation/evaluators/shap.py +296 -0
- mlflow/models/evaluation/lift_curve.py +178 -0
- mlflow/models/evaluation/utils/metric.py +123 -0
- mlflow/models/evaluation/utils/trace.py +179 -0
- mlflow/models/evaluation/validation.py +434 -0
- mlflow/models/flavor_backend.py +93 -0
- mlflow/models/flavor_backend_registry.py +53 -0
- mlflow/models/model.py +1639 -0
- mlflow/models/model_config.py +150 -0
- mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
- mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
- mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
- mlflow/models/python_api.py +369 -0
- mlflow/models/rag_signatures.py +128 -0
- mlflow/models/resources.py +321 -0
- mlflow/models/signature.py +662 -0
- mlflow/models/utils.py +2054 -0
- mlflow/models/wheeled_model.py +280 -0
- mlflow/openai/__init__.py +57 -0
- mlflow/openai/_agent_tracer.py +364 -0
- mlflow/openai/api_request_parallel_processor.py +131 -0
- mlflow/openai/autolog.py +509 -0
- mlflow/openai/constant.py +1 -0
- mlflow/openai/model.py +824 -0
- mlflow/openai/utils/chat_schema.py +367 -0
- mlflow/optuna/__init__.py +3 -0
- mlflow/optuna/storage.py +646 -0
- mlflow/plugins/__init__.py +72 -0
- mlflow/plugins/base.py +358 -0
- mlflow/plugins/builtin/__init__.py +24 -0
- mlflow/plugins/builtin/pytorch_plugin.py +150 -0
- mlflow/plugins/builtin/sklearn_plugin.py +158 -0
- mlflow/plugins/builtin/transformers_plugin.py +187 -0
- mlflow/plugins/cli.py +321 -0
- mlflow/plugins/discovery.py +340 -0
- mlflow/plugins/manager.py +465 -0
- mlflow/plugins/registry.py +316 -0
- mlflow/plugins/templates/framework_plugin_template.py +329 -0
- mlflow/prompt/constants.py +20 -0
- mlflow/prompt/promptlab_model.py +197 -0
- mlflow/prompt/registry_utils.py +248 -0
- mlflow/promptflow/__init__.py +495 -0
- mlflow/protos/__init__.py +0 -0
- mlflow/protos/assessments_pb2.py +174 -0
- mlflow/protos/databricks_artifacts_pb2.py +489 -0
- mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
- mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
- mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
- mlflow/protos/databricks_pb2.py +267 -0
- mlflow/protos/databricks_trace_server_pb2.py +374 -0
- mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
- mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
- mlflow/protos/facet_feature_statistics_pb2.py +296 -0
- mlflow/protos/internal_pb2.py +77 -0
- mlflow/protos/mlflow_artifacts_pb2.py +336 -0
- mlflow/protos/model_registry_pb2.py +1073 -0
- mlflow/protos/scalapb/__init__.py +0 -0
- mlflow/protos/scalapb/scalapb_pb2.py +104 -0
- mlflow/protos/service_pb2.py +2600 -0
- mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
- mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
- mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
- mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
- mlflow/py.typed +0 -0
- mlflow/pydantic_ai/__init__.py +57 -0
- mlflow/pydantic_ai/autolog.py +173 -0
- mlflow/pyfunc/__init__.py +3844 -0
- mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
- mlflow/pyfunc/backend.py +523 -0
- mlflow/pyfunc/context.py +78 -0
- mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
- mlflow/pyfunc/loaders/__init__.py +7 -0
- mlflow/pyfunc/loaders/chat_agent.py +117 -0
- mlflow/pyfunc/loaders/chat_model.py +125 -0
- mlflow/pyfunc/loaders/code_model.py +31 -0
- mlflow/pyfunc/loaders/responses_agent.py +112 -0
- mlflow/pyfunc/mlserver.py +46 -0
- mlflow/pyfunc/model.py +1473 -0
- mlflow/pyfunc/scoring_server/__init__.py +604 -0
- mlflow/pyfunc/scoring_server/app.py +7 -0
- mlflow/pyfunc/scoring_server/client.py +146 -0
- mlflow/pyfunc/spark_model_cache.py +48 -0
- mlflow/pyfunc/stdin_server.py +44 -0
- mlflow/pyfunc/utils/__init__.py +3 -0
- mlflow/pyfunc/utils/data_validation.py +224 -0
- mlflow/pyfunc/utils/environment.py +22 -0
- mlflow/pyfunc/utils/input_converter.py +47 -0
- mlflow/pyfunc/utils/serving_data_parser.py +11 -0
- mlflow/pytorch/__init__.py +1171 -0
- mlflow/pytorch/_lightning_autolog.py +580 -0
- mlflow/pytorch/_pytorch_autolog.py +50 -0
- mlflow/pytorch/pickle_module.py +35 -0
- mlflow/rfunc/__init__.py +42 -0
- mlflow/rfunc/backend.py +134 -0
- mlflow/runs.py +89 -0
- mlflow/server/__init__.py +302 -0
- mlflow/server/auth/__init__.py +1224 -0
- mlflow/server/auth/__main__.py +4 -0
- mlflow/server/auth/basic_auth.ini +6 -0
- mlflow/server/auth/cli.py +11 -0
- mlflow/server/auth/client.py +537 -0
- mlflow/server/auth/config.py +34 -0
- mlflow/server/auth/db/__init__.py +0 -0
- mlflow/server/auth/db/cli.py +18 -0
- mlflow/server/auth/db/migrations/__init__.py +0 -0
- mlflow/server/auth/db/migrations/alembic.ini +110 -0
- mlflow/server/auth/db/migrations/env.py +76 -0
- mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
- mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
- mlflow/server/auth/db/models.py +67 -0
- mlflow/server/auth/db/utils.py +37 -0
- mlflow/server/auth/entities.py +165 -0
- mlflow/server/auth/logo.py +14 -0
- mlflow/server/auth/permissions.py +65 -0
- mlflow/server/auth/routes.py +18 -0
- mlflow/server/auth/sqlalchemy_store.py +263 -0
- mlflow/server/graphql/__init__.py +0 -0
- mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
- mlflow/server/graphql/graphql_custom_scalars.py +24 -0
- mlflow/server/graphql/graphql_errors.py +15 -0
- mlflow/server/graphql/graphql_no_batching.py +89 -0
- mlflow/server/graphql/graphql_schema_extensions.py +74 -0
- mlflow/server/handlers.py +3217 -0
- mlflow/server/prometheus_exporter.py +17 -0
- mlflow/server/validation.py +30 -0
- mlflow/shap/__init__.py +691 -0
- mlflow/sklearn/__init__.py +1994 -0
- mlflow/sklearn/utils.py +1041 -0
- mlflow/smolagents/__init__.py +66 -0
- mlflow/smolagents/autolog.py +139 -0
- mlflow/smolagents/chat.py +29 -0
- mlflow/store/__init__.py +10 -0
- mlflow/store/_unity_catalog/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/constants.py +2 -0
- mlflow/store/_unity_catalog/registry/__init__.py +6 -0
- mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
- mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
- mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
- mlflow/store/_unity_catalog/registry/utils.py +121 -0
- mlflow/store/artifact/__init__.py +0 -0
- mlflow/store/artifact/artifact_repo.py +472 -0
- mlflow/store/artifact/artifact_repository_registry.py +154 -0
- mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
- mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
- mlflow/store/artifact/cli.py +141 -0
- mlflow/store/artifact/cloud_artifact_repo.py +332 -0
- mlflow/store/artifact/databricks_artifact_repo.py +729 -0
- mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
- mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
- mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
- mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
- mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
- mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
- mlflow/store/artifact/ftp_artifact_repo.py +132 -0
- mlflow/store/artifact/gcs_artifact_repo.py +296 -0
- mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
- mlflow/store/artifact/http_artifact_repo.py +218 -0
- mlflow/store/artifact/local_artifact_repo.py +142 -0
- mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
- mlflow/store/artifact/models_artifact_repo.py +259 -0
- mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
- mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
- mlflow/store/artifact/r2_artifact_repo.py +70 -0
- mlflow/store/artifact/runs_artifact_repo.py +265 -0
- mlflow/store/artifact/s3_artifact_repo.py +330 -0
- mlflow/store/artifact/sftp_artifact_repo.py +141 -0
- mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
- mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
- mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
- mlflow/store/artifact/utils/__init__.py +0 -0
- mlflow/store/artifact/utils/models.py +148 -0
- mlflow/store/db/__init__.py +0 -0
- mlflow/store/db/base_sql_model.py +3 -0
- mlflow/store/db/db_types.py +10 -0
- mlflow/store/db/utils.py +314 -0
- mlflow/store/db_migrations/__init__.py +0 -0
- mlflow/store/db_migrations/alembic.ini +74 -0
- mlflow/store/db_migrations/env.py +84 -0
- mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
- mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
- mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
- mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
- mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
- mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
- mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
- mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
- mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
- mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
- mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
- mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
- mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
- mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
- mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
- mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
- mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
- mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
- mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
- mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
- mlflow/store/db_migrations/versions/__init__.py +0 -0
- mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
- mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
- mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
- mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
- mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
- mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
- mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
- mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
- mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
- mlflow/store/entities/__init__.py +3 -0
- mlflow/store/entities/paged_list.py +18 -0
- mlflow/store/model_registry/__init__.py +10 -0
- mlflow/store/model_registry/abstract_store.py +1081 -0
- mlflow/store/model_registry/base_rest_store.py +44 -0
- mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
- mlflow/store/model_registry/dbmodels/__init__.py +0 -0
- mlflow/store/model_registry/dbmodels/models.py +206 -0
- mlflow/store/model_registry/file_store.py +1091 -0
- mlflow/store/model_registry/rest_store.py +481 -0
- mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
- mlflow/store/tracking/__init__.py +23 -0
- mlflow/store/tracking/abstract_store.py +816 -0
- mlflow/store/tracking/dbmodels/__init__.py +0 -0
- mlflow/store/tracking/dbmodels/initial_models.py +243 -0
- mlflow/store/tracking/dbmodels/models.py +1073 -0
- mlflow/store/tracking/file_store.py +2438 -0
- mlflow/store/tracking/postgres_managed_identity.py +146 -0
- mlflow/store/tracking/rest_store.py +1131 -0
- mlflow/store/tracking/sqlalchemy_store.py +2785 -0
- mlflow/system_metrics/__init__.py +61 -0
- mlflow/system_metrics/metrics/__init__.py +0 -0
- mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
- mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
- mlflow/system_metrics/metrics/disk_monitor.py +21 -0
- mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
- mlflow/system_metrics/metrics/network_monitor.py +34 -0
- mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
- mlflow/system_metrics/system_metrics_monitor.py +198 -0
- mlflow/tracing/__init__.py +16 -0
- mlflow/tracing/assessment.py +356 -0
- mlflow/tracing/client.py +531 -0
- mlflow/tracing/config.py +125 -0
- mlflow/tracing/constant.py +105 -0
- mlflow/tracing/destination.py +81 -0
- mlflow/tracing/display/__init__.py +40 -0
- mlflow/tracing/display/display_handler.py +196 -0
- mlflow/tracing/export/async_export_queue.py +186 -0
- mlflow/tracing/export/inference_table.py +138 -0
- mlflow/tracing/export/mlflow_v3.py +137 -0
- mlflow/tracing/export/utils.py +70 -0
- mlflow/tracing/fluent.py +1417 -0
- mlflow/tracing/processor/base_mlflow.py +199 -0
- mlflow/tracing/processor/inference_table.py +175 -0
- mlflow/tracing/processor/mlflow_v3.py +47 -0
- mlflow/tracing/processor/otel.py +73 -0
- mlflow/tracing/provider.py +487 -0
- mlflow/tracing/trace_manager.py +200 -0
- mlflow/tracing/utils/__init__.py +616 -0
- mlflow/tracing/utils/artifact_utils.py +28 -0
- mlflow/tracing/utils/copy.py +55 -0
- mlflow/tracing/utils/environment.py +55 -0
- mlflow/tracing/utils/exception.py +21 -0
- mlflow/tracing/utils/once.py +35 -0
- mlflow/tracing/utils/otlp.py +63 -0
- mlflow/tracing/utils/processor.py +54 -0
- mlflow/tracing/utils/search.py +292 -0
- mlflow/tracing/utils/timeout.py +250 -0
- mlflow/tracing/utils/token.py +19 -0
- mlflow/tracing/utils/truncation.py +124 -0
- mlflow/tracing/utils/warning.py +76 -0
- mlflow/tracking/__init__.py +39 -0
- mlflow/tracking/_model_registry/__init__.py +1 -0
- mlflow/tracking/_model_registry/client.py +764 -0
- mlflow/tracking/_model_registry/fluent.py +853 -0
- mlflow/tracking/_model_registry/registry.py +67 -0
- mlflow/tracking/_model_registry/utils.py +251 -0
- mlflow/tracking/_tracking_service/__init__.py +0 -0
- mlflow/tracking/_tracking_service/client.py +883 -0
- mlflow/tracking/_tracking_service/registry.py +56 -0
- mlflow/tracking/_tracking_service/utils.py +275 -0
- mlflow/tracking/artifact_utils.py +179 -0
- mlflow/tracking/client.py +5900 -0
- mlflow/tracking/context/__init__.py +0 -0
- mlflow/tracking/context/abstract_context.py +35 -0
- mlflow/tracking/context/databricks_cluster_context.py +15 -0
- mlflow/tracking/context/databricks_command_context.py +15 -0
- mlflow/tracking/context/databricks_job_context.py +49 -0
- mlflow/tracking/context/databricks_notebook_context.py +41 -0
- mlflow/tracking/context/databricks_repo_context.py +43 -0
- mlflow/tracking/context/default_context.py +51 -0
- mlflow/tracking/context/git_context.py +32 -0
- mlflow/tracking/context/registry.py +98 -0
- mlflow/tracking/context/system_environment_context.py +15 -0
- mlflow/tracking/default_experiment/__init__.py +1 -0
- mlflow/tracking/default_experiment/abstract_context.py +43 -0
- mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
- mlflow/tracking/default_experiment/registry.py +75 -0
- mlflow/tracking/fluent.py +3595 -0
- mlflow/tracking/metric_value_conversion_utils.py +93 -0
- mlflow/tracking/multimedia.py +206 -0
- mlflow/tracking/registry.py +86 -0
- mlflow/tracking/request_auth/__init__.py +0 -0
- mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
- mlflow/tracking/request_auth/registry.py +60 -0
- mlflow/tracking/request_header/__init__.py +0 -0
- mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
- mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
- mlflow/tracking/request_header/default_request_header_provider.py +17 -0
- mlflow/tracking/request_header/registry.py +79 -0
- mlflow/transformers/__init__.py +2982 -0
- mlflow/transformers/flavor_config.py +258 -0
- mlflow/transformers/hub_utils.py +83 -0
- mlflow/transformers/llm_inference_utils.py +468 -0
- mlflow/transformers/model_io.py +301 -0
- mlflow/transformers/peft.py +51 -0
- mlflow/transformers/signature.py +183 -0
- mlflow/transformers/torch_utils.py +55 -0
- mlflow/types/__init__.py +21 -0
- mlflow/types/agent.py +270 -0
- mlflow/types/chat.py +240 -0
- mlflow/types/llm.py +935 -0
- mlflow/types/responses.py +139 -0
- mlflow/types/responses_helpers.py +416 -0
- mlflow/types/schema.py +1505 -0
- mlflow/types/type_hints.py +647 -0
- mlflow/types/utils.py +753 -0
- mlflow/utils/__init__.py +283 -0
- mlflow/utils/_capture_modules.py +256 -0
- mlflow/utils/_capture_transformers_modules.py +75 -0
- mlflow/utils/_spark_utils.py +201 -0
- mlflow/utils/_unity_catalog_oss_utils.py +97 -0
- mlflow/utils/_unity_catalog_utils.py +479 -0
- mlflow/utils/annotations.py +218 -0
- mlflow/utils/arguments_utils.py +16 -0
- mlflow/utils/async_logging/__init__.py +1 -0
- mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
- mlflow/utils/async_logging/async_logging_queue.py +366 -0
- mlflow/utils/async_logging/run_artifact.py +38 -0
- mlflow/utils/async_logging/run_batch.py +58 -0
- mlflow/utils/async_logging/run_operations.py +49 -0
- mlflow/utils/autologging_utils/__init__.py +737 -0
- mlflow/utils/autologging_utils/client.py +432 -0
- mlflow/utils/autologging_utils/config.py +33 -0
- mlflow/utils/autologging_utils/events.py +294 -0
- mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
- mlflow/utils/autologging_utils/metrics_queue.py +71 -0
- mlflow/utils/autologging_utils/safety.py +1104 -0
- mlflow/utils/autologging_utils/versioning.py +95 -0
- mlflow/utils/checkpoint_utils.py +206 -0
- mlflow/utils/class_utils.py +6 -0
- mlflow/utils/cli_args.py +257 -0
- mlflow/utils/conda.py +354 -0
- mlflow/utils/credentials.py +231 -0
- mlflow/utils/data_utils.py +17 -0
- mlflow/utils/databricks_utils.py +1436 -0
- mlflow/utils/docstring_utils.py +477 -0
- mlflow/utils/doctor.py +133 -0
- mlflow/utils/download_cloud_file_chunk.py +43 -0
- mlflow/utils/env_manager.py +16 -0
- mlflow/utils/env_pack.py +131 -0
- mlflow/utils/environment.py +1009 -0
- mlflow/utils/exception_utils.py +14 -0
- mlflow/utils/file_utils.py +978 -0
- mlflow/utils/git_utils.py +77 -0
- mlflow/utils/gorilla.py +797 -0
- mlflow/utils/import_hooks/__init__.py +363 -0
- mlflow/utils/lazy_load.py +51 -0
- mlflow/utils/logging_utils.py +168 -0
- mlflow/utils/mime_type_utils.py +58 -0
- mlflow/utils/mlflow_tags.py +103 -0
- mlflow/utils/model_utils.py +486 -0
- mlflow/utils/name_utils.py +346 -0
- mlflow/utils/nfs_on_spark.py +62 -0
- mlflow/utils/openai_utils.py +164 -0
- mlflow/utils/os.py +12 -0
- mlflow/utils/oss_registry_utils.py +29 -0
- mlflow/utils/plugins.py +17 -0
- mlflow/utils/process.py +182 -0
- mlflow/utils/promptlab_utils.py +146 -0
- mlflow/utils/proto_json_utils.py +743 -0
- mlflow/utils/pydantic_utils.py +54 -0
- mlflow/utils/request_utils.py +279 -0
- mlflow/utils/requirements_utils.py +704 -0
- mlflow/utils/rest_utils.py +673 -0
- mlflow/utils/search_logged_model_utils.py +127 -0
- mlflow/utils/search_utils.py +2111 -0
- mlflow/utils/secure_loading.py +221 -0
- mlflow/utils/security_validation.py +384 -0
- mlflow/utils/server_cli_utils.py +61 -0
- mlflow/utils/spark_utils.py +15 -0
- mlflow/utils/string_utils.py +138 -0
- mlflow/utils/thread_utils.py +63 -0
- mlflow/utils/time.py +54 -0
- mlflow/utils/timeout.py +42 -0
- mlflow/utils/uri.py +572 -0
- mlflow/utils/validation.py +662 -0
- mlflow/utils/virtualenv.py +458 -0
- mlflow/utils/warnings_utils.py +25 -0
- mlflow/utils/yaml_utils.py +179 -0
- mlflow/version.py +24 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
from typing_extensions import Self
|
4
|
+
|
5
|
+
from mlflow.data.dataset_source import DatasetSource
|
6
|
+
|
7
|
+
|
8
|
+
class CodeDatasetSource(DatasetSource):
|
9
|
+
def __init__(
|
10
|
+
self,
|
11
|
+
tags: dict[Any, Any],
|
12
|
+
):
|
13
|
+
self._tags = tags
|
14
|
+
|
15
|
+
@staticmethod
|
16
|
+
def _get_source_type() -> str:
|
17
|
+
return "code"
|
18
|
+
|
19
|
+
def load(self, **kwargs):
|
20
|
+
"""
|
21
|
+
Load is not implemented for Code Dataset Source.
|
22
|
+
"""
|
23
|
+
raise NotImplementedError
|
24
|
+
|
25
|
+
@staticmethod
|
26
|
+
def _can_resolve(raw_source: Any):
|
27
|
+
return False
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
def _resolve(cls, raw_source: str) -> Self:
|
31
|
+
raise NotImplementedError
|
32
|
+
|
33
|
+
def to_dict(self) -> dict[Any, Any]:
|
34
|
+
return {"tags": self._tags}
|
35
|
+
|
36
|
+
@classmethod
|
37
|
+
def from_dict(cls, source_dict: dict[Any, Any]) -> Self:
|
38
|
+
return cls(
|
39
|
+
tags=source_dict.get("tags"),
|
40
|
+
)
|
mlflow/data/dataset.py
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
import json
|
2
|
+
from abc import abstractmethod
|
3
|
+
from typing import Any, Optional
|
4
|
+
|
5
|
+
from mlflow.data.dataset_source import DatasetSource
|
6
|
+
from mlflow.entities import Dataset as DatasetEntity
|
7
|
+
|
8
|
+
|
9
|
+
class Dataset:
|
10
|
+
"""
|
11
|
+
Represents a dataset for use with MLflow Tracking, including the name, digest (hash),
|
12
|
+
schema, and profile of the dataset as well as source information (e.g. the S3 bucket or
|
13
|
+
managed Delta table from which the dataset was derived). Most datasets expose features
|
14
|
+
and targets for training and evaluation as well.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def __init__(
|
18
|
+
self, source: DatasetSource, name: Optional[str] = None, digest: Optional[str] = None
|
19
|
+
):
|
20
|
+
"""
|
21
|
+
Base constructor for a dataset. All subclasses must call this constructor.
|
22
|
+
"""
|
23
|
+
self._name = name
|
24
|
+
self._source = source
|
25
|
+
# Note: Subclasses should call super() once they've initialized all of
|
26
|
+
# the class attributes necessary for digest computation
|
27
|
+
self._digest = digest or self._compute_digest()
|
28
|
+
|
29
|
+
@abstractmethod
|
30
|
+
def _compute_digest(self) -> str:
|
31
|
+
"""Computes a digest for the dataset. Called if the user doesn't supply
|
32
|
+
a digest when constructing the dataset.
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
A string digest for the dataset. We recommend a maximum digest length
|
36
|
+
of 10 characters with an ideal length of 8 characters.
|
37
|
+
|
38
|
+
"""
|
39
|
+
|
40
|
+
def to_dict(self) -> dict[str, str]:
|
41
|
+
"""Create config dictionary for the dataset.
|
42
|
+
|
43
|
+
Subclasses should override this method to provide additional fields in the config dict,
|
44
|
+
e.g., schema, profile, etc.
|
45
|
+
|
46
|
+
Returns a string dictionary containing the following fields: name, digest, source, source
|
47
|
+
type.
|
48
|
+
"""
|
49
|
+
return {
|
50
|
+
"name": self.name,
|
51
|
+
"digest": self.digest,
|
52
|
+
"source": self.source.to_json(),
|
53
|
+
"source_type": self.source._get_source_type(),
|
54
|
+
}
|
55
|
+
|
56
|
+
def to_json(self) -> str:
|
57
|
+
"""
|
58
|
+
Obtains a JSON string representation of the :py:class:`Dataset
|
59
|
+
<mlflow.data.dataset.Dataset>`.
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
A JSON string representation of the :py:class:`Dataset <mlflow.data.dataset.Dataset>`.
|
63
|
+
"""
|
64
|
+
|
65
|
+
return json.dumps(self.to_dict())
|
66
|
+
|
67
|
+
@property
|
68
|
+
def name(self) -> str:
|
69
|
+
"""
|
70
|
+
The name of the dataset, e.g. ``"iris_data"``, ``"myschema.mycatalog.mytable@v1"``, etc.
|
71
|
+
"""
|
72
|
+
if self._name is not None:
|
73
|
+
return self._name
|
74
|
+
else:
|
75
|
+
return "dataset"
|
76
|
+
|
77
|
+
@property
|
78
|
+
def digest(self) -> str:
|
79
|
+
"""
|
80
|
+
A unique hash or fingerprint of the dataset, e.g. ``"498c7496"``.
|
81
|
+
"""
|
82
|
+
return self._digest
|
83
|
+
|
84
|
+
@property
|
85
|
+
def source(self) -> DatasetSource:
|
86
|
+
"""
|
87
|
+
Information about the dataset's source, represented as an instance of
|
88
|
+
:py:class:`DatasetSource <mlflow.data.dataset_source.DatasetSource>`. For example, this
|
89
|
+
may be the S3 location or the name of the managed Delta Table from which the dataset
|
90
|
+
was derived.
|
91
|
+
"""
|
92
|
+
return self._source
|
93
|
+
|
94
|
+
@property
|
95
|
+
@abstractmethod
|
96
|
+
def profile(self) -> Optional[Any]:
|
97
|
+
"""
|
98
|
+
Optional summary statistics for the dataset, such as the number of rows in a table, the
|
99
|
+
mean / median / std of each table column, etc.
|
100
|
+
"""
|
101
|
+
|
102
|
+
@property
|
103
|
+
@abstractmethod
|
104
|
+
def schema(self) -> Optional[Any]:
|
105
|
+
"""
|
106
|
+
Optional dataset schema, such as an instance of :py:class:`mlflow.types.Schema` representing
|
107
|
+
the features and targets of the dataset.
|
108
|
+
"""
|
109
|
+
|
110
|
+
def _to_mlflow_entity(self) -> DatasetEntity:
|
111
|
+
"""
|
112
|
+
Returns:
|
113
|
+
A `mlflow.entities.Dataset` instance representing the dataset.
|
114
|
+
"""
|
115
|
+
dataset_dict = self.to_dict()
|
116
|
+
return DatasetEntity(
|
117
|
+
name=dataset_dict["name"],
|
118
|
+
digest=dataset_dict["digest"],
|
119
|
+
source_type=dataset_dict["source_type"],
|
120
|
+
source=dataset_dict["source"],
|
121
|
+
schema=dataset_dict.get("schema"),
|
122
|
+
profile=dataset_dict.get("profile"),
|
123
|
+
)
|
@@ -0,0 +1,168 @@
|
|
1
|
+
import inspect
|
2
|
+
import warnings
|
3
|
+
from contextlib import suppress
|
4
|
+
from typing import Callable, Optional
|
5
|
+
|
6
|
+
import mlflow.data
|
7
|
+
from mlflow.data.dataset import Dataset
|
8
|
+
from mlflow.exceptions import MlflowException
|
9
|
+
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
|
10
|
+
from mlflow.utils.plugins import get_entry_points
|
11
|
+
|
12
|
+
|
13
|
+
class DatasetRegistry:
|
14
|
+
def __init__(self):
|
15
|
+
self.constructors = {}
|
16
|
+
|
17
|
+
def register_constructor(
|
18
|
+
self,
|
19
|
+
constructor_fn: Callable[[Optional[str], Optional[str]], Dataset],
|
20
|
+
constructor_name: Optional[str] = None,
|
21
|
+
) -> str:
|
22
|
+
"""Registers a dataset constructor.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
constructor_fn: A function that accepts at least the following
|
26
|
+
inputs and returns an instance of a subclass of
|
27
|
+
:py:class:`mlflow.data.dataset.Dataset`:
|
28
|
+
|
29
|
+
- name: Optional. A string dataset name
|
30
|
+
- digest: Optional. A string dataset digest.
|
31
|
+
|
32
|
+
constructor_name: The name of the constructor, e.g.
|
33
|
+
"from_spark". The name must begin with the
|
34
|
+
string "from_" or "load_". If unspecified, the `__name__`
|
35
|
+
attribute of the `constructor_fn` is used instead and must
|
36
|
+
begin with the string "from_" or "load_".
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
The name of the registered constructor, e.g. "from_pandas" or "load_delta".
|
40
|
+
"""
|
41
|
+
if constructor_name is None:
|
42
|
+
constructor_name = constructor_fn.__name__
|
43
|
+
DatasetRegistry._validate_constructor(constructor_fn, constructor_name)
|
44
|
+
self.constructors[constructor_name] = constructor_fn
|
45
|
+
return constructor_name
|
46
|
+
|
47
|
+
def register_entrypoints(self):
|
48
|
+
"""
|
49
|
+
Registers dataset sources defined as Python entrypoints. For reference, see
|
50
|
+
https://mlflow.org/docs/latest/plugins.html#defining-a-plugin.
|
51
|
+
"""
|
52
|
+
for entrypoint in get_entry_points("mlflow.dataset_constructor"):
|
53
|
+
try:
|
54
|
+
self.register_constructor(
|
55
|
+
constructor_fn=entrypoint.load(), constructor_name=entrypoint.name
|
56
|
+
)
|
57
|
+
except Exception as exc:
|
58
|
+
warnings.warn(
|
59
|
+
f"Failure attempting to register dataset constructor"
|
60
|
+
f' "{entrypoint.name}": {exc}.',
|
61
|
+
stacklevel=2,
|
62
|
+
)
|
63
|
+
|
64
|
+
@staticmethod
|
65
|
+
def _validate_constructor(
|
66
|
+
constructor_fn: Callable[[Optional[str], Optional[str]], Dataset],
|
67
|
+
constructor_name: str,
|
68
|
+
):
|
69
|
+
if not constructor_name.startswith("load_") and not constructor_name.startswith("from_"):
|
70
|
+
raise MlflowException(
|
71
|
+
f"Invalid dataset constructor name: {constructor_name}."
|
72
|
+
f" Constructor name must start with 'load_' or 'from_'.",
|
73
|
+
INVALID_PARAMETER_VALUE,
|
74
|
+
)
|
75
|
+
|
76
|
+
signature = inspect.signature(constructor_fn)
|
77
|
+
parameters = signature.parameters
|
78
|
+
for expected_kwarg in ["name", "digest"]:
|
79
|
+
if expected_kwarg not in parameters or parameters[expected_kwarg].kind not in [
|
80
|
+
inspect.Parameter.KEYWORD_ONLY,
|
81
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
82
|
+
]:
|
83
|
+
raise MlflowException(
|
84
|
+
f"Invalid dataset constructor function: {constructor_fn.__name__}. Function"
|
85
|
+
f" must define an optional parameter named '{expected_kwarg}'.",
|
86
|
+
INVALID_PARAMETER_VALUE,
|
87
|
+
)
|
88
|
+
|
89
|
+
if not issubclass(signature.return_annotation, Dataset):
|
90
|
+
raise MlflowException(
|
91
|
+
f"Invalid dataset constructor function: {constructor_fn.__name__}. Function must"
|
92
|
+
f" have a return type annotation that is a subclass of"
|
93
|
+
f" :py:class:`mlflow.data.dataset.Dataset`.",
|
94
|
+
INVALID_PARAMETER_VALUE,
|
95
|
+
)
|
96
|
+
|
97
|
+
|
98
|
+
def register_constructor(
|
99
|
+
constructor_fn: Callable[[Optional[str], Optional[str]], Dataset],
|
100
|
+
constructor_name: Optional[str] = None,
|
101
|
+
) -> str:
|
102
|
+
"""Registers a dataset constructor.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
constructor_fn: A function that accepts at least the following
|
106
|
+
inputs and returns an instance of a subclass of
|
107
|
+
:py:class:`mlflow.data.dataset.Dataset`:
|
108
|
+
|
109
|
+
- name: Optional. A string dataset name
|
110
|
+
- digest: Optional. A string dataset digest.
|
111
|
+
|
112
|
+
constructor_name: The name of the constructor, e.g.
|
113
|
+
"from_spark". The name must begin with the
|
114
|
+
string "from_" or "load_". If unspecified, the `__name__`
|
115
|
+
attribute of the `constructor_fn` is used instead and must
|
116
|
+
begin with the string "from_" or "load_".
|
117
|
+
|
118
|
+
Returns:
|
119
|
+
The name of the registered constructor, e.g. "from_pandas" or "load_delta".
|
120
|
+
|
121
|
+
"""
|
122
|
+
registered_constructor_name = _dataset_registry.register_constructor(
|
123
|
+
constructor_fn=constructor_fn, constructor_name=constructor_name
|
124
|
+
)
|
125
|
+
setattr(mlflow.data, registered_constructor_name, constructor_fn)
|
126
|
+
mlflow.data.__all__.append(registered_constructor_name)
|
127
|
+
return registered_constructor_name
|
128
|
+
|
129
|
+
|
130
|
+
def get_registered_constructors() -> dict[str, Callable[[Optional[str], Optional[str]], Dataset]]:
|
131
|
+
"""Obtains the registered dataset constructors.
|
132
|
+
|
133
|
+
Returns:
|
134
|
+
A dictionary mapping constructor names to constructor functions.
|
135
|
+
|
136
|
+
"""
|
137
|
+
return _dataset_registry.constructors
|
138
|
+
|
139
|
+
|
140
|
+
_dataset_registry = DatasetRegistry()
|
141
|
+
_dataset_registry.register_entrypoints()
|
142
|
+
|
143
|
+
# use contextlib suppress to ignore import errors
|
144
|
+
with suppress(ImportError):
|
145
|
+
from mlflow.data.pandas_dataset import from_pandas
|
146
|
+
|
147
|
+
_dataset_registry.register_constructor(from_pandas)
|
148
|
+
with suppress(ImportError):
|
149
|
+
from mlflow.data.numpy_dataset import from_numpy
|
150
|
+
|
151
|
+
_dataset_registry.register_constructor(from_numpy)
|
152
|
+
with suppress(ImportError):
|
153
|
+
from mlflow.data.huggingface_dataset import from_huggingface
|
154
|
+
|
155
|
+
_dataset_registry.register_constructor(from_huggingface)
|
156
|
+
with suppress(ImportError):
|
157
|
+
from mlflow.data.tensorflow_dataset import from_tensorflow
|
158
|
+
|
159
|
+
_dataset_registry.register_constructor(from_tensorflow)
|
160
|
+
with suppress(ImportError):
|
161
|
+
from mlflow.data.spark_dataset import from_spark, load_delta
|
162
|
+
|
163
|
+
_dataset_registry.register_constructor(load_delta)
|
164
|
+
_dataset_registry.register_constructor(from_spark)
|
165
|
+
with suppress(ImportError):
|
166
|
+
from mlflow.data.polars_dataset import from_polars
|
167
|
+
|
168
|
+
_dataset_registry.register_constructor(from_polars)
|
@@ -0,0 +1,110 @@
|
|
1
|
+
import json
|
2
|
+
from abc import abstractmethod
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
|
6
|
+
class DatasetSource:
|
7
|
+
"""
|
8
|
+
Represents the source of a dataset used in MLflow Tracking, providing information such as
|
9
|
+
cloud storage location, delta table name / version, etc.
|
10
|
+
"""
|
11
|
+
|
12
|
+
@staticmethod
|
13
|
+
@abstractmethod
|
14
|
+
def _get_source_type() -> str:
|
15
|
+
"""Obtains a string representing the source type of the dataset.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
A string representing the source type of the dataset, e.g. "s3", "delta_table", ...
|
19
|
+
|
20
|
+
"""
|
21
|
+
|
22
|
+
@abstractmethod
|
23
|
+
def load(self) -> Any:
|
24
|
+
"""
|
25
|
+
Loads files / objects referred to by the DatasetSource. For example, depending on the type
|
26
|
+
of :py:class:`DatasetSource <mlflow.data.dataset_source.DatasetSource>`, this may download
|
27
|
+
source CSV files from S3 to the local filesystem, load a source Delta Table as a Spark
|
28
|
+
DataFrame, etc.
|
29
|
+
|
30
|
+
Returns:
|
31
|
+
The downloaded source, e.g. a local filesystem path, a Spark DataFrame, etc.
|
32
|
+
|
33
|
+
"""
|
34
|
+
|
35
|
+
@staticmethod
|
36
|
+
@abstractmethod
|
37
|
+
def _can_resolve(raw_source: Any) -> bool:
|
38
|
+
"""Determines whether this type of DatasetSource can be resolved from a specified raw source
|
39
|
+
object. For example, an S3DatasetSource can be resolved from an S3 URI like
|
40
|
+
"s3://mybucket/path/to/iris/data" but not from an Azure Blob Storage URI like
|
41
|
+
"wasbs:/account@host.blob.core.windows.net".
|
42
|
+
|
43
|
+
Args:
|
44
|
+
raw_source: The raw source, e.g. a string like "s3://mybucket/path/to/iris/data".
|
45
|
+
|
46
|
+
Returns:
|
47
|
+
True if this DatasetSource can resolve the raw source, False otherwise.
|
48
|
+
|
49
|
+
"""
|
50
|
+
|
51
|
+
@classmethod
|
52
|
+
@abstractmethod
|
53
|
+
def _resolve(cls, raw_source: Any) -> "DatasetSource":
|
54
|
+
"""Constructs an instance of the DatasetSource from a raw source object, such as a
|
55
|
+
string URI like "s3://mybucket/path/to/iris/data" or a delta table identifier
|
56
|
+
like "my.delta.table@2".
|
57
|
+
|
58
|
+
Args:
|
59
|
+
raw_source: The raw source, e.g. a string like "s3://mybucket/path/to/iris/data".
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
A DatasetSource instance derived from the raw_source.
|
63
|
+
|
64
|
+
"""
|
65
|
+
|
66
|
+
@abstractmethod
|
67
|
+
def to_dict(self) -> dict[str, Any]:
|
68
|
+
"""Obtains a JSON-compatible dictionary representation of the DatasetSource.
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
A JSON-compatible dictionary representation of the DatasetSource.
|
72
|
+
|
73
|
+
"""
|
74
|
+
|
75
|
+
def to_json(self) -> str:
|
76
|
+
"""
|
77
|
+
Obtains a JSON string representation of the
|
78
|
+
:py:class:`DatasetSource <mlflow.data.dataset_source.DatasetSource>`.
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
A JSON string representation of the
|
82
|
+
:py:class:`DatasetSource <mlflow.data.dataset_source.DatasetSource>`.
|
83
|
+
"""
|
84
|
+
return json.dumps(self.to_dict())
|
85
|
+
|
86
|
+
@classmethod
|
87
|
+
@abstractmethod
|
88
|
+
def from_dict(cls, source_dict: dict[Any, Any]) -> "DatasetSource":
|
89
|
+
"""Constructs an instance of the DatasetSource from a dictionary representation.
|
90
|
+
|
91
|
+
Args:
|
92
|
+
source_dict: A dictionary representation of the DatasetSource.
|
93
|
+
|
94
|
+
Returns:
|
95
|
+
A DatasetSource instance.
|
96
|
+
|
97
|
+
"""
|
98
|
+
|
99
|
+
@classmethod
|
100
|
+
def from_json(cls, source_json: str) -> "DatasetSource":
|
101
|
+
"""Constructs an instance of the DatasetSource from a JSON string representation.
|
102
|
+
|
103
|
+
Args:
|
104
|
+
source_json: A JSON string representation of the DatasetSource.
|
105
|
+
|
106
|
+
Returns:
|
107
|
+
A DatasetSource instance.
|
108
|
+
|
109
|
+
"""
|
110
|
+
return cls.from_dict(json.loads(source_json))
|
@@ -0,0 +1,219 @@
|
|
1
|
+
import warnings
|
2
|
+
from typing import Any, Optional
|
3
|
+
|
4
|
+
from mlflow.data.artifact_dataset_sources import register_artifact_dataset_sources
|
5
|
+
from mlflow.data.dataset_source import DatasetSource
|
6
|
+
from mlflow.data.http_dataset_source import HTTPDatasetSource
|
7
|
+
from mlflow.exceptions import MlflowException
|
8
|
+
from mlflow.protos.databricks_pb2 import RESOURCE_DOES_NOT_EXIST
|
9
|
+
from mlflow.utils.plugins import get_entry_points
|
10
|
+
|
11
|
+
|
12
|
+
class DatasetSourceRegistry:
|
13
|
+
def __init__(self):
|
14
|
+
self.sources = []
|
15
|
+
|
16
|
+
def register(self, source: DatasetSource):
|
17
|
+
"""Registers a DatasetSource for use with MLflow Tracking.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
source: The DatasetSource to register.
|
21
|
+
"""
|
22
|
+
self.sources.append(source)
|
23
|
+
|
24
|
+
def register_entrypoints(self):
|
25
|
+
"""
|
26
|
+
Registers dataset sources defined as Python entrypoints. For reference, see
|
27
|
+
https://mlflow.org/docs/latest/plugins.html#defining-a-plugin.
|
28
|
+
"""
|
29
|
+
for entrypoint in get_entry_points("mlflow.dataset_source"):
|
30
|
+
try:
|
31
|
+
self.register(entrypoint.load())
|
32
|
+
except (AttributeError, ImportError) as exc:
|
33
|
+
warnings.warn(
|
34
|
+
"Failure attempting to register dataset constructor"
|
35
|
+
+ f' "{entrypoint}": {exc}',
|
36
|
+
stacklevel=2,
|
37
|
+
)
|
38
|
+
|
39
|
+
def resolve(
|
40
|
+
self, raw_source: Any, candidate_sources: Optional[list[DatasetSource]] = None
|
41
|
+
) -> DatasetSource:
|
42
|
+
"""Resolves a raw source object, such as a string URI, to a DatasetSource for use with
|
43
|
+
MLflow Tracking.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
raw_source: The raw source, e.g. a string like "s3://mybucket/path/to/iris/data" or a
|
47
|
+
HuggingFace :py:class:`datasets.Dataset` object.
|
48
|
+
candidate_sources: A list of DatasetSource classes to consider as potential sources
|
49
|
+
when resolving the raw source. Subclasses of the specified candidate sources are
|
50
|
+
also considered. If unspecified, all registered sources are considered.
|
51
|
+
|
52
|
+
Raises:
|
53
|
+
MlflowException: If no DatasetSource class can resolve the raw source.
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
The resolved DatasetSource.
|
57
|
+
"""
|
58
|
+
matching_sources = []
|
59
|
+
for source in self.sources:
|
60
|
+
if candidate_sources and not any(
|
61
|
+
issubclass(source, candidate_src) for candidate_src in candidate_sources
|
62
|
+
):
|
63
|
+
continue
|
64
|
+
try:
|
65
|
+
if source._can_resolve(raw_source):
|
66
|
+
matching_sources.append(source)
|
67
|
+
except Exception as e:
|
68
|
+
warnings.warn(
|
69
|
+
f"Failed to determine whether {source.__name__} can resolve source"
|
70
|
+
f" information for '{raw_source}'. Exception: {e}",
|
71
|
+
stacklevel=2,
|
72
|
+
)
|
73
|
+
continue
|
74
|
+
|
75
|
+
if len(matching_sources) > 1:
|
76
|
+
source_class_names_str = ", ".join([source.__name__ for source in matching_sources])
|
77
|
+
warnings.warn(
|
78
|
+
f"The specified dataset source can be interpreted in multiple ways:"
|
79
|
+
f" {source_class_names_str}. MLflow will assume that this is a"
|
80
|
+
f" {matching_sources[-1].__name__} source.",
|
81
|
+
stacklevel=2,
|
82
|
+
)
|
83
|
+
|
84
|
+
for matching_source in reversed(matching_sources):
|
85
|
+
try:
|
86
|
+
return matching_source._resolve(raw_source)
|
87
|
+
except Exception as e:
|
88
|
+
warnings.warn(
|
89
|
+
f"Encountered an unexpected error while using {matching_source.__name__} to"
|
90
|
+
f" resolve source information for '{raw_source}'. Exception: {e}",
|
91
|
+
stacklevel=2,
|
92
|
+
)
|
93
|
+
continue
|
94
|
+
|
95
|
+
raise MlflowException(
|
96
|
+
f"Could not find a source information resolver for the specified"
|
97
|
+
f" dataset source: {raw_source}.",
|
98
|
+
RESOURCE_DOES_NOT_EXIST,
|
99
|
+
)
|
100
|
+
|
101
|
+
def get_source_from_json(self, source_json: str, source_type: str) -> DatasetSource:
|
102
|
+
"""Parses and returns a DatasetSource object from its JSON representation.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
source_json: The JSON representation of the DatasetSource.
|
106
|
+
source_type: The string type of the DatasetSource, which indicates how to parse the
|
107
|
+
source JSON.
|
108
|
+
"""
|
109
|
+
for source in reversed(self.sources):
|
110
|
+
if source._get_source_type() == source_type:
|
111
|
+
return source.from_json(source_json)
|
112
|
+
|
113
|
+
raise MlflowException(
|
114
|
+
f"Could not parse dataset source from JSON due to unrecognized"
|
115
|
+
f" source type: {source_type}.",
|
116
|
+
RESOURCE_DOES_NOT_EXIST,
|
117
|
+
)
|
118
|
+
|
119
|
+
|
120
|
+
def register_dataset_source(source: DatasetSource):
|
121
|
+
"""Registers a DatasetSource for use with MLflow Tracking.
|
122
|
+
|
123
|
+
Args:
|
124
|
+
source: The DatasetSource to register.
|
125
|
+
"""
|
126
|
+
_dataset_source_registry.register(source)
|
127
|
+
|
128
|
+
|
129
|
+
def resolve_dataset_source(
|
130
|
+
raw_source: Any, candidate_sources: Optional[list[DatasetSource]] = None
|
131
|
+
) -> DatasetSource:
|
132
|
+
"""Resolves a raw source object, such as a string URI, to a DatasetSource for use with
|
133
|
+
MLflow Tracking.
|
134
|
+
|
135
|
+
Args:
|
136
|
+
raw_source: The raw source, e.g. a string like "s3://mybucket/path/to/iris/data" or a
|
137
|
+
HuggingFace :py:class:`datasets.Dataset` object.
|
138
|
+
candidate_sources: A list of DatasetSource classes to consider as potential sources
|
139
|
+
when resolving the raw source. Subclasses of the specified candidate
|
140
|
+
sources are also considered. If unspecified, all registered sources
|
141
|
+
are considered.
|
142
|
+
|
143
|
+
Raises:
|
144
|
+
MlflowException: If no DatasetSource class can resolve the raw source.
|
145
|
+
|
146
|
+
Returns:
|
147
|
+
The resolved DatasetSource.
|
148
|
+
"""
|
149
|
+
return _dataset_source_registry.resolve(
|
150
|
+
raw_source=raw_source, candidate_sources=candidate_sources
|
151
|
+
)
|
152
|
+
|
153
|
+
|
154
|
+
def get_dataset_source_from_json(source_json: str, source_type: str) -> DatasetSource:
|
155
|
+
"""Parses and returns a DatasetSource object from its JSON representation.
|
156
|
+
|
157
|
+
Args:
|
158
|
+
source_json: The JSON representation of the DatasetSource.
|
159
|
+
source_type: The string type of the DatasetSource, which indicates how to parse the
|
160
|
+
source JSON.
|
161
|
+
"""
|
162
|
+
return _dataset_source_registry.get_source_from_json(
|
163
|
+
source_json=source_json, source_type=source_type
|
164
|
+
)
|
165
|
+
|
166
|
+
|
167
|
+
def get_registered_sources() -> list[DatasetSource]:
|
168
|
+
"""Obtains the registered dataset sources.
|
169
|
+
|
170
|
+
Returns:
|
171
|
+
A list of registered dataset sources.
|
172
|
+
|
173
|
+
"""
|
174
|
+
return _dataset_source_registry.sources
|
175
|
+
|
176
|
+
|
177
|
+
# NB: The ordering here is important. The last dataset source to be registered takes precedence
|
178
|
+
# when resolving dataset information for a raw source (e.g. a string like "s3://mybucket/my/path").
|
179
|
+
# Dataset sources derived from artifact repositories are the most generic / provide the most
|
180
|
+
# general information about dataset source locations, so they are registered first. More specific
|
181
|
+
# source information is provided by specialized dataset platform sources like
|
182
|
+
# HuggingFaceDatasetSource, so these sources are registered next. Finally, externally-defined
|
183
|
+
# dataset sources are registered last because externally-defined behavior should take precedence
|
184
|
+
# over any internally-defined generic behavior
|
185
|
+
_dataset_source_registry = DatasetSourceRegistry()
|
186
|
+
register_artifact_dataset_sources()
|
187
|
+
_dataset_source_registry.register(HTTPDatasetSource)
|
188
|
+
_dataset_source_registry.register_entrypoints()
|
189
|
+
|
190
|
+
try:
|
191
|
+
from mlflow.data.huggingface_dataset_source import HuggingFaceDatasetSource
|
192
|
+
|
193
|
+
_dataset_source_registry.register(HuggingFaceDatasetSource)
|
194
|
+
except ImportError:
|
195
|
+
pass
|
196
|
+
try:
|
197
|
+
from mlflow.data.spark_dataset_source import SparkDatasetSource
|
198
|
+
|
199
|
+
_dataset_source_registry.register(SparkDatasetSource)
|
200
|
+
except ImportError:
|
201
|
+
pass
|
202
|
+
try:
|
203
|
+
from mlflow.data.delta_dataset_source import DeltaDatasetSource
|
204
|
+
|
205
|
+
_dataset_source_registry.register(DeltaDatasetSource)
|
206
|
+
except ImportError:
|
207
|
+
pass
|
208
|
+
try:
|
209
|
+
from mlflow.data.code_dataset_source import CodeDatasetSource
|
210
|
+
|
211
|
+
_dataset_source_registry.register(CodeDatasetSource)
|
212
|
+
except ImportError:
|
213
|
+
pass
|
214
|
+
try:
|
215
|
+
from mlflow.data.uc_volume_dataset_source import UCVolumeDatasetSource
|
216
|
+
|
217
|
+
_dataset_source_registry.register(UCVolumeDatasetSource)
|
218
|
+
except ImportError:
|
219
|
+
pass
|