PyPI - genesis-flow - Versions diffs - 1.0.0__py3-none-any.whl - Mend

genesis-flow 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (645) hide show

genesis_flow-1.0.0.dist-info/METADATA +822 -0
genesis_flow-1.0.0.dist-info/RECORD +645 -0
genesis_flow-1.0.0.dist-info/WHEEL +5 -0
genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
mlflow/__init__.py +367 -0
mlflow/__main__.py +3 -0
mlflow/ag2/__init__.py +56 -0
mlflow/ag2/ag2_logger.py +294 -0
mlflow/anthropic/__init__.py +40 -0
mlflow/anthropic/autolog.py +129 -0
mlflow/anthropic/chat.py +144 -0
mlflow/artifacts/__init__.py +268 -0
mlflow/autogen/__init__.py +144 -0
mlflow/autogen/chat.py +142 -0
mlflow/azure/__init__.py +26 -0
mlflow/azure/auth_handler.py +257 -0
mlflow/azure/client.py +319 -0
mlflow/azure/config.py +120 -0
mlflow/azure/connection_factory.py +340 -0
mlflow/azure/exceptions.py +27 -0
mlflow/azure/stores.py +327 -0
mlflow/azure/utils.py +183 -0
mlflow/bedrock/__init__.py +45 -0
mlflow/bedrock/_autolog.py +202 -0
mlflow/bedrock/chat.py +122 -0
mlflow/bedrock/stream.py +160 -0
mlflow/bedrock/utils.py +43 -0
mlflow/cli.py +707 -0
mlflow/client.py +12 -0
mlflow/config/__init__.py +56 -0
mlflow/crewai/__init__.py +79 -0
mlflow/crewai/autolog.py +253 -0
mlflow/crewai/chat.py +29 -0
mlflow/data/__init__.py +75 -0
mlflow/data/artifact_dataset_sources.py +170 -0
mlflow/data/code_dataset_source.py +40 -0
mlflow/data/dataset.py +123 -0
mlflow/data/dataset_registry.py +168 -0
mlflow/data/dataset_source.py +110 -0
mlflow/data/dataset_source_registry.py +219 -0
mlflow/data/delta_dataset_source.py +167 -0
mlflow/data/digest_utils.py +108 -0
mlflow/data/evaluation_dataset.py +562 -0
mlflow/data/filesystem_dataset_source.py +81 -0
mlflow/data/http_dataset_source.py +145 -0
mlflow/data/huggingface_dataset.py +258 -0
mlflow/data/huggingface_dataset_source.py +118 -0
mlflow/data/meta_dataset.py +104 -0
mlflow/data/numpy_dataset.py +223 -0
mlflow/data/pandas_dataset.py +231 -0
mlflow/data/polars_dataset.py +352 -0
mlflow/data/pyfunc_dataset_mixin.py +31 -0
mlflow/data/schema.py +76 -0
mlflow/data/sources.py +1 -0
mlflow/data/spark_dataset.py +406 -0
mlflow/data/spark_dataset_source.py +74 -0
mlflow/data/spark_delta_utils.py +118 -0
mlflow/data/tensorflow_dataset.py +350 -0
mlflow/data/uc_volume_dataset_source.py +81 -0
mlflow/db.py +27 -0
mlflow/dspy/__init__.py +17 -0
mlflow/dspy/autolog.py +197 -0
mlflow/dspy/callback.py +398 -0
mlflow/dspy/constant.py +1 -0
mlflow/dspy/load.py +93 -0
mlflow/dspy/save.py +393 -0
mlflow/dspy/util.py +109 -0
mlflow/dspy/wrapper.py +226 -0
mlflow/entities/__init__.py +104 -0
mlflow/entities/_mlflow_object.py +52 -0
mlflow/entities/assessment.py +545 -0
mlflow/entities/assessment_error.py +80 -0
mlflow/entities/assessment_source.py +141 -0
mlflow/entities/dataset.py +92 -0
mlflow/entities/dataset_input.py +51 -0
mlflow/entities/dataset_summary.py +62 -0
mlflow/entities/document.py +48 -0
mlflow/entities/experiment.py +109 -0
mlflow/entities/experiment_tag.py +35 -0
mlflow/entities/file_info.py +45 -0
mlflow/entities/input_tag.py +35 -0
mlflow/entities/lifecycle_stage.py +35 -0
mlflow/entities/logged_model.py +228 -0
mlflow/entities/logged_model_input.py +26 -0
mlflow/entities/logged_model_output.py +32 -0
mlflow/entities/logged_model_parameter.py +46 -0
mlflow/entities/logged_model_status.py +74 -0
mlflow/entities/logged_model_tag.py +33 -0
mlflow/entities/metric.py +200 -0
mlflow/entities/model_registry/__init__.py +29 -0
mlflow/entities/model_registry/_model_registry_entity.py +13 -0
mlflow/entities/model_registry/model_version.py +243 -0
mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
mlflow/entities/model_registry/model_version_search.py +25 -0
mlflow/entities/model_registry/model_version_stages.py +25 -0
mlflow/entities/model_registry/model_version_status.py +35 -0
mlflow/entities/model_registry/model_version_tag.py +35 -0
mlflow/entities/model_registry/prompt.py +73 -0
mlflow/entities/model_registry/prompt_version.py +244 -0
mlflow/entities/model_registry/registered_model.py +175 -0
mlflow/entities/model_registry/registered_model_alias.py +35 -0
mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
mlflow/entities/model_registry/registered_model_search.py +25 -0
mlflow/entities/model_registry/registered_model_tag.py +35 -0
mlflow/entities/multipart_upload.py +74 -0
mlflow/entities/param.py +49 -0
mlflow/entities/run.py +97 -0
mlflow/entities/run_data.py +84 -0
mlflow/entities/run_info.py +188 -0
mlflow/entities/run_inputs.py +59 -0
mlflow/entities/run_outputs.py +43 -0
mlflow/entities/run_status.py +41 -0
mlflow/entities/run_tag.py +36 -0
mlflow/entities/source_type.py +31 -0
mlflow/entities/span.py +774 -0
mlflow/entities/span_event.py +96 -0
mlflow/entities/span_status.py +102 -0
mlflow/entities/trace.py +317 -0
mlflow/entities/trace_data.py +71 -0
mlflow/entities/trace_info.py +220 -0
mlflow/entities/trace_info_v2.py +162 -0
mlflow/entities/trace_location.py +173 -0
mlflow/entities/trace_state.py +39 -0
mlflow/entities/trace_status.py +68 -0
mlflow/entities/view_type.py +51 -0
mlflow/environment_variables.py +866 -0
mlflow/evaluation/__init__.py +16 -0
mlflow/evaluation/assessment.py +369 -0
mlflow/evaluation/evaluation.py +411 -0
mlflow/evaluation/evaluation_tag.py +61 -0
mlflow/evaluation/fluent.py +48 -0
mlflow/evaluation/utils.py +201 -0
mlflow/exceptions.py +213 -0
mlflow/experiments.py +140 -0
mlflow/gemini/__init__.py +81 -0
mlflow/gemini/autolog.py +186 -0
mlflow/gemini/chat.py +261 -0
mlflow/genai/__init__.py +71 -0
mlflow/genai/datasets/__init__.py +67 -0
mlflow/genai/datasets/evaluation_dataset.py +131 -0
mlflow/genai/evaluation/__init__.py +3 -0
mlflow/genai/evaluation/base.py +411 -0
mlflow/genai/evaluation/constant.py +23 -0
mlflow/genai/evaluation/utils.py +244 -0
mlflow/genai/judges/__init__.py +21 -0
mlflow/genai/judges/databricks.py +404 -0
mlflow/genai/label_schemas/__init__.py +153 -0
mlflow/genai/label_schemas/label_schemas.py +209 -0
mlflow/genai/labeling/__init__.py +159 -0
mlflow/genai/labeling/labeling.py +250 -0
mlflow/genai/optimize/__init__.py +13 -0
mlflow/genai/optimize/base.py +198 -0
mlflow/genai/optimize/optimizers/__init__.py +4 -0
mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
mlflow/genai/optimize/types.py +75 -0
mlflow/genai/optimize/util.py +30 -0
mlflow/genai/prompts/__init__.py +206 -0
mlflow/genai/scheduled_scorers.py +431 -0
mlflow/genai/scorers/__init__.py +26 -0
mlflow/genai/scorers/base.py +492 -0
mlflow/genai/scorers/builtin_scorers.py +765 -0
mlflow/genai/scorers/scorer_utils.py +138 -0
mlflow/genai/scorers/validation.py +165 -0
mlflow/genai/utils/data_validation.py +146 -0
mlflow/genai/utils/enum_utils.py +23 -0
mlflow/genai/utils/trace_utils.py +211 -0
mlflow/groq/__init__.py +42 -0
mlflow/groq/_groq_autolog.py +74 -0
mlflow/johnsnowlabs/__init__.py +888 -0
mlflow/langchain/__init__.py +24 -0
mlflow/langchain/api_request_parallel_processor.py +330 -0
mlflow/langchain/autolog.py +147 -0
mlflow/langchain/chat_agent_langgraph.py +340 -0
mlflow/langchain/constant.py +1 -0
mlflow/langchain/constants.py +1 -0
mlflow/langchain/databricks_dependencies.py +444 -0
mlflow/langchain/langchain_tracer.py +597 -0
mlflow/langchain/model.py +919 -0
mlflow/langchain/output_parsers.py +142 -0
mlflow/langchain/retriever_chain.py +153 -0
mlflow/langchain/runnables.py +527 -0
mlflow/langchain/utils/chat.py +402 -0
mlflow/langchain/utils/logging.py +671 -0
mlflow/langchain/utils/serialization.py +36 -0
mlflow/legacy_databricks_cli/__init__.py +0 -0
mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
mlflow/legacy_databricks_cli/configure/provider.py +482 -0
mlflow/litellm/__init__.py +175 -0
mlflow/llama_index/__init__.py +22 -0
mlflow/llama_index/autolog.py +55 -0
mlflow/llama_index/chat.py +43 -0
mlflow/llama_index/constant.py +1 -0
mlflow/llama_index/model.py +577 -0
mlflow/llama_index/pyfunc_wrapper.py +332 -0
mlflow/llama_index/serialize_objects.py +188 -0
mlflow/llama_index/tracer.py +561 -0
mlflow/metrics/__init__.py +479 -0
mlflow/metrics/base.py +39 -0
mlflow/metrics/genai/__init__.py +25 -0
mlflow/metrics/genai/base.py +101 -0
mlflow/metrics/genai/genai_metric.py +771 -0
mlflow/metrics/genai/metric_definitions.py +450 -0
mlflow/metrics/genai/model_utils.py +371 -0
mlflow/metrics/genai/prompt_template.py +68 -0
mlflow/metrics/genai/prompts/__init__.py +0 -0
mlflow/metrics/genai/prompts/v1.py +422 -0
mlflow/metrics/genai/utils.py +6 -0
mlflow/metrics/metric_definitions.py +619 -0
mlflow/mismatch.py +34 -0
mlflow/mistral/__init__.py +34 -0
mlflow/mistral/autolog.py +71 -0
mlflow/mistral/chat.py +135 -0
mlflow/ml_package_versions.py +452 -0
mlflow/models/__init__.py +97 -0
mlflow/models/auth_policy.py +83 -0
mlflow/models/cli.py +354 -0
mlflow/models/container/__init__.py +294 -0
mlflow/models/container/scoring_server/__init__.py +0 -0
mlflow/models/container/scoring_server/nginx.conf +39 -0
mlflow/models/dependencies_schemas.py +287 -0
mlflow/models/display_utils.py +158 -0
mlflow/models/docker_utils.py +211 -0
mlflow/models/evaluation/__init__.py +23 -0
mlflow/models/evaluation/_shap_patch.py +64 -0
mlflow/models/evaluation/artifacts.py +194 -0
mlflow/models/evaluation/base.py +1811 -0
mlflow/models/evaluation/calibration_curve.py +109 -0
mlflow/models/evaluation/default_evaluator.py +996 -0
mlflow/models/evaluation/deprecated.py +23 -0
mlflow/models/evaluation/evaluator_registry.py +80 -0
mlflow/models/evaluation/evaluators/classifier.py +704 -0
mlflow/models/evaluation/evaluators/default.py +233 -0
mlflow/models/evaluation/evaluators/regressor.py +96 -0
mlflow/models/evaluation/evaluators/shap.py +296 -0
mlflow/models/evaluation/lift_curve.py +178 -0
mlflow/models/evaluation/utils/metric.py +123 -0
mlflow/models/evaluation/utils/trace.py +179 -0
mlflow/models/evaluation/validation.py +434 -0
mlflow/models/flavor_backend.py +93 -0
mlflow/models/flavor_backend_registry.py +53 -0
mlflow/models/model.py +1639 -0
mlflow/models/model_config.py +150 -0
mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
mlflow/models/python_api.py +369 -0
mlflow/models/rag_signatures.py +128 -0
mlflow/models/resources.py +321 -0
mlflow/models/signature.py +662 -0
mlflow/models/utils.py +2054 -0
mlflow/models/wheeled_model.py +280 -0
mlflow/openai/__init__.py +57 -0
mlflow/openai/_agent_tracer.py +364 -0
mlflow/openai/api_request_parallel_processor.py +131 -0
mlflow/openai/autolog.py +509 -0
mlflow/openai/constant.py +1 -0
mlflow/openai/model.py +824 -0
mlflow/openai/utils/chat_schema.py +367 -0
mlflow/optuna/__init__.py +3 -0
mlflow/optuna/storage.py +646 -0
mlflow/plugins/__init__.py +72 -0
mlflow/plugins/base.py +358 -0
mlflow/plugins/builtin/__init__.py +24 -0
mlflow/plugins/builtin/pytorch_plugin.py +150 -0
mlflow/plugins/builtin/sklearn_plugin.py +158 -0
mlflow/plugins/builtin/transformers_plugin.py +187 -0
mlflow/plugins/cli.py +321 -0
mlflow/plugins/discovery.py +340 -0
mlflow/plugins/manager.py +465 -0
mlflow/plugins/registry.py +316 -0
mlflow/plugins/templates/framework_plugin_template.py +329 -0
mlflow/prompt/constants.py +20 -0
mlflow/prompt/promptlab_model.py +197 -0
mlflow/prompt/registry_utils.py +248 -0
mlflow/promptflow/__init__.py +495 -0
mlflow/protos/__init__.py +0 -0
mlflow/protos/assessments_pb2.py +174 -0
mlflow/protos/databricks_artifacts_pb2.py +489 -0
mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
mlflow/protos/databricks_pb2.py +267 -0
mlflow/protos/databricks_trace_server_pb2.py +374 -0
mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
mlflow/protos/facet_feature_statistics_pb2.py +296 -0
mlflow/protos/internal_pb2.py +77 -0
mlflow/protos/mlflow_artifacts_pb2.py +336 -0
mlflow/protos/model_registry_pb2.py +1073 -0
mlflow/protos/scalapb/__init__.py +0 -0
mlflow/protos/scalapb/scalapb_pb2.py +104 -0
mlflow/protos/service_pb2.py +2600 -0
mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
mlflow/py.typed +0 -0
mlflow/pydantic_ai/__init__.py +57 -0
mlflow/pydantic_ai/autolog.py +173 -0
mlflow/pyfunc/__init__.py +3844 -0
mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
mlflow/pyfunc/backend.py +523 -0
mlflow/pyfunc/context.py +78 -0
mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
mlflow/pyfunc/loaders/__init__.py +7 -0
mlflow/pyfunc/loaders/chat_agent.py +117 -0
mlflow/pyfunc/loaders/chat_model.py +125 -0
mlflow/pyfunc/loaders/code_model.py +31 -0
mlflow/pyfunc/loaders/responses_agent.py +112 -0
mlflow/pyfunc/mlserver.py +46 -0
mlflow/pyfunc/model.py +1473 -0
mlflow/pyfunc/scoring_server/__init__.py +604 -0
mlflow/pyfunc/scoring_server/app.py +7 -0
mlflow/pyfunc/scoring_server/client.py +146 -0
mlflow/pyfunc/spark_model_cache.py +48 -0
mlflow/pyfunc/stdin_server.py +44 -0
mlflow/pyfunc/utils/__init__.py +3 -0
mlflow/pyfunc/utils/data_validation.py +224 -0
mlflow/pyfunc/utils/environment.py +22 -0
mlflow/pyfunc/utils/input_converter.py +47 -0
mlflow/pyfunc/utils/serving_data_parser.py +11 -0
mlflow/pytorch/__init__.py +1171 -0
mlflow/pytorch/_lightning_autolog.py +580 -0
mlflow/pytorch/_pytorch_autolog.py +50 -0
mlflow/pytorch/pickle_module.py +35 -0
mlflow/rfunc/__init__.py +42 -0
mlflow/rfunc/backend.py +134 -0
mlflow/runs.py +89 -0
mlflow/server/__init__.py +302 -0
mlflow/server/auth/__init__.py +1224 -0
mlflow/server/auth/__main__.py +4 -0
mlflow/server/auth/basic_auth.ini +6 -0
mlflow/server/auth/cli.py +11 -0
mlflow/server/auth/client.py +537 -0
mlflow/server/auth/config.py +34 -0
mlflow/server/auth/db/__init__.py +0 -0
mlflow/server/auth/db/cli.py +18 -0
mlflow/server/auth/db/migrations/__init__.py +0 -0
mlflow/server/auth/db/migrations/alembic.ini +110 -0
mlflow/server/auth/db/migrations/env.py +76 -0
mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
mlflow/server/auth/db/models.py +67 -0
mlflow/server/auth/db/utils.py +37 -0
mlflow/server/auth/entities.py +165 -0
mlflow/server/auth/logo.py +14 -0
mlflow/server/auth/permissions.py +65 -0
mlflow/server/auth/routes.py +18 -0
mlflow/server/auth/sqlalchemy_store.py +263 -0
mlflow/server/graphql/__init__.py +0 -0
mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
mlflow/server/graphql/graphql_custom_scalars.py +24 -0
mlflow/server/graphql/graphql_errors.py +15 -0
mlflow/server/graphql/graphql_no_batching.py +89 -0
mlflow/server/graphql/graphql_schema_extensions.py +74 -0
mlflow/server/handlers.py +3217 -0
mlflow/server/prometheus_exporter.py +17 -0
mlflow/server/validation.py +30 -0
mlflow/shap/__init__.py +691 -0
mlflow/sklearn/__init__.py +1994 -0
mlflow/sklearn/utils.py +1041 -0
mlflow/smolagents/__init__.py +66 -0
mlflow/smolagents/autolog.py +139 -0
mlflow/smolagents/chat.py +29 -0
mlflow/store/__init__.py +10 -0
mlflow/store/_unity_catalog/__init__.py +1 -0
mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
mlflow/store/_unity_catalog/lineage/constants.py +2 -0
mlflow/store/_unity_catalog/registry/__init__.py +6 -0
mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
mlflow/store/_unity_catalog/registry/utils.py +121 -0
mlflow/store/artifact/__init__.py +0 -0
mlflow/store/artifact/artifact_repo.py +472 -0
mlflow/store/artifact/artifact_repository_registry.py +154 -0
mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
mlflow/store/artifact/cli.py +141 -0
mlflow/store/artifact/cloud_artifact_repo.py +332 -0
mlflow/store/artifact/databricks_artifact_repo.py +729 -0
mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
mlflow/store/artifact/ftp_artifact_repo.py +132 -0
mlflow/store/artifact/gcs_artifact_repo.py +296 -0
mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
mlflow/store/artifact/http_artifact_repo.py +218 -0
mlflow/store/artifact/local_artifact_repo.py +142 -0
mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
mlflow/store/artifact/models_artifact_repo.py +259 -0
mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
mlflow/store/artifact/r2_artifact_repo.py +70 -0
mlflow/store/artifact/runs_artifact_repo.py +265 -0
mlflow/store/artifact/s3_artifact_repo.py +330 -0
mlflow/store/artifact/sftp_artifact_repo.py +141 -0
mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
mlflow/store/artifact/utils/__init__.py +0 -0
mlflow/store/artifact/utils/models.py +148 -0
mlflow/store/db/__init__.py +0 -0
mlflow/store/db/base_sql_model.py +3 -0
mlflow/store/db/db_types.py +10 -0
mlflow/store/db/utils.py +314 -0
mlflow/store/db_migrations/__init__.py +0 -0
mlflow/store/db_migrations/alembic.ini +74 -0
mlflow/store/db_migrations/env.py +84 -0
mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
mlflow/store/db_migrations/versions/__init__.py +0 -0
mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
mlflow/store/entities/__init__.py +3 -0
mlflow/store/entities/paged_list.py +18 -0
mlflow/store/model_registry/__init__.py +10 -0
mlflow/store/model_registry/abstract_store.py +1081 -0
mlflow/store/model_registry/base_rest_store.py +44 -0
mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
mlflow/store/model_registry/dbmodels/__init__.py +0 -0
mlflow/store/model_registry/dbmodels/models.py +206 -0
mlflow/store/model_registry/file_store.py +1091 -0
mlflow/store/model_registry/rest_store.py +481 -0
mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
mlflow/store/tracking/__init__.py +23 -0
mlflow/store/tracking/abstract_store.py +816 -0
mlflow/store/tracking/dbmodels/__init__.py +0 -0
mlflow/store/tracking/dbmodels/initial_models.py +243 -0
mlflow/store/tracking/dbmodels/models.py +1073 -0
mlflow/store/tracking/file_store.py +2438 -0
mlflow/store/tracking/postgres_managed_identity.py +146 -0
mlflow/store/tracking/rest_store.py +1131 -0
mlflow/store/tracking/sqlalchemy_store.py +2785 -0
mlflow/system_metrics/__init__.py +61 -0
mlflow/system_metrics/metrics/__init__.py +0 -0
mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
mlflow/system_metrics/metrics/disk_monitor.py +21 -0
mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
mlflow/system_metrics/metrics/network_monitor.py +34 -0
mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
mlflow/system_metrics/system_metrics_monitor.py +198 -0
mlflow/tracing/__init__.py +16 -0
mlflow/tracing/assessment.py +356 -0
mlflow/tracing/client.py +531 -0
mlflow/tracing/config.py +125 -0
mlflow/tracing/constant.py +105 -0
mlflow/tracing/destination.py +81 -0
mlflow/tracing/display/__init__.py +40 -0
mlflow/tracing/display/display_handler.py +196 -0
mlflow/tracing/export/async_export_queue.py +186 -0
mlflow/tracing/export/inference_table.py +138 -0
mlflow/tracing/export/mlflow_v3.py +137 -0
mlflow/tracing/export/utils.py +70 -0
mlflow/tracing/fluent.py +1417 -0
mlflow/tracing/processor/base_mlflow.py +199 -0
mlflow/tracing/processor/inference_table.py +175 -0
mlflow/tracing/processor/mlflow_v3.py +47 -0
mlflow/tracing/processor/otel.py +73 -0
mlflow/tracing/provider.py +487 -0
mlflow/tracing/trace_manager.py +200 -0
mlflow/tracing/utils/__init__.py +616 -0
mlflow/tracing/utils/artifact_utils.py +28 -0
mlflow/tracing/utils/copy.py +55 -0
mlflow/tracing/utils/environment.py +55 -0
mlflow/tracing/utils/exception.py +21 -0
mlflow/tracing/utils/once.py +35 -0
mlflow/tracing/utils/otlp.py +63 -0
mlflow/tracing/utils/processor.py +54 -0
mlflow/tracing/utils/search.py +292 -0
mlflow/tracing/utils/timeout.py +250 -0
mlflow/tracing/utils/token.py +19 -0
mlflow/tracing/utils/truncation.py +124 -0
mlflow/tracing/utils/warning.py +76 -0
mlflow/tracking/__init__.py +39 -0
mlflow/tracking/_model_registry/__init__.py +1 -0
mlflow/tracking/_model_registry/client.py +764 -0
mlflow/tracking/_model_registry/fluent.py +853 -0
mlflow/tracking/_model_registry/registry.py +67 -0
mlflow/tracking/_model_registry/utils.py +251 -0
mlflow/tracking/_tracking_service/__init__.py +0 -0
mlflow/tracking/_tracking_service/client.py +883 -0
mlflow/tracking/_tracking_service/registry.py +56 -0
mlflow/tracking/_tracking_service/utils.py +275 -0
mlflow/tracking/artifact_utils.py +179 -0
mlflow/tracking/client.py +5900 -0
mlflow/tracking/context/__init__.py +0 -0
mlflow/tracking/context/abstract_context.py +35 -0
mlflow/tracking/context/databricks_cluster_context.py +15 -0
mlflow/tracking/context/databricks_command_context.py +15 -0
mlflow/tracking/context/databricks_job_context.py +49 -0
mlflow/tracking/context/databricks_notebook_context.py +41 -0
mlflow/tracking/context/databricks_repo_context.py +43 -0
mlflow/tracking/context/default_context.py +51 -0
mlflow/tracking/context/git_context.py +32 -0
mlflow/tracking/context/registry.py +98 -0
mlflow/tracking/context/system_environment_context.py +15 -0
mlflow/tracking/default_experiment/__init__.py +1 -0
mlflow/tracking/default_experiment/abstract_context.py +43 -0
mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
mlflow/tracking/default_experiment/registry.py +75 -0
mlflow/tracking/fluent.py +3595 -0
mlflow/tracking/metric_value_conversion_utils.py +93 -0
mlflow/tracking/multimedia.py +206 -0
mlflow/tracking/registry.py +86 -0
mlflow/tracking/request_auth/__init__.py +0 -0
mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
mlflow/tracking/request_auth/registry.py +60 -0
mlflow/tracking/request_header/__init__.py +0 -0
mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
mlflow/tracking/request_header/default_request_header_provider.py +17 -0
mlflow/tracking/request_header/registry.py +79 -0
mlflow/transformers/__init__.py +2982 -0
mlflow/transformers/flavor_config.py +258 -0
mlflow/transformers/hub_utils.py +83 -0
mlflow/transformers/llm_inference_utils.py +468 -0
mlflow/transformers/model_io.py +301 -0
mlflow/transformers/peft.py +51 -0
mlflow/transformers/signature.py +183 -0
mlflow/transformers/torch_utils.py +55 -0
mlflow/types/__init__.py +21 -0
mlflow/types/agent.py +270 -0
mlflow/types/chat.py +240 -0
mlflow/types/llm.py +935 -0
mlflow/types/responses.py +139 -0
mlflow/types/responses_helpers.py +416 -0
mlflow/types/schema.py +1505 -0
mlflow/types/type_hints.py +647 -0
mlflow/types/utils.py +753 -0
mlflow/utils/__init__.py +283 -0
mlflow/utils/_capture_modules.py +256 -0
mlflow/utils/_capture_transformers_modules.py +75 -0
mlflow/utils/_spark_utils.py +201 -0
mlflow/utils/_unity_catalog_oss_utils.py +97 -0
mlflow/utils/_unity_catalog_utils.py +479 -0
mlflow/utils/annotations.py +218 -0
mlflow/utils/arguments_utils.py +16 -0
mlflow/utils/async_logging/__init__.py +1 -0
mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
mlflow/utils/async_logging/async_logging_queue.py +366 -0
mlflow/utils/async_logging/run_artifact.py +38 -0
mlflow/utils/async_logging/run_batch.py +58 -0
mlflow/utils/async_logging/run_operations.py +49 -0
mlflow/utils/autologging_utils/__init__.py +737 -0
mlflow/utils/autologging_utils/client.py +432 -0
mlflow/utils/autologging_utils/config.py +33 -0
mlflow/utils/autologging_utils/events.py +294 -0
mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
mlflow/utils/autologging_utils/metrics_queue.py +71 -0
mlflow/utils/autologging_utils/safety.py +1104 -0
mlflow/utils/autologging_utils/versioning.py +95 -0
mlflow/utils/checkpoint_utils.py +206 -0
mlflow/utils/class_utils.py +6 -0
mlflow/utils/cli_args.py +257 -0
mlflow/utils/conda.py +354 -0
mlflow/utils/credentials.py +231 -0
mlflow/utils/data_utils.py +17 -0
mlflow/utils/databricks_utils.py +1436 -0
mlflow/utils/docstring_utils.py +477 -0
mlflow/utils/doctor.py +133 -0
mlflow/utils/download_cloud_file_chunk.py +43 -0
mlflow/utils/env_manager.py +16 -0
mlflow/utils/env_pack.py +131 -0
mlflow/utils/environment.py +1009 -0
mlflow/utils/exception_utils.py +14 -0
mlflow/utils/file_utils.py +978 -0
mlflow/utils/git_utils.py +77 -0
mlflow/utils/gorilla.py +797 -0
mlflow/utils/import_hooks/__init__.py +363 -0
mlflow/utils/lazy_load.py +51 -0
mlflow/utils/logging_utils.py +168 -0
mlflow/utils/mime_type_utils.py +58 -0
mlflow/utils/mlflow_tags.py +103 -0
mlflow/utils/model_utils.py +486 -0
mlflow/utils/name_utils.py +346 -0
mlflow/utils/nfs_on_spark.py +62 -0
mlflow/utils/openai_utils.py +164 -0
mlflow/utils/os.py +12 -0
mlflow/utils/oss_registry_utils.py +29 -0
mlflow/utils/plugins.py +17 -0
mlflow/utils/process.py +182 -0
mlflow/utils/promptlab_utils.py +146 -0
mlflow/utils/proto_json_utils.py +743 -0
mlflow/utils/pydantic_utils.py +54 -0
mlflow/utils/request_utils.py +279 -0
mlflow/utils/requirements_utils.py +704 -0
mlflow/utils/rest_utils.py +673 -0
mlflow/utils/search_logged_model_utils.py +127 -0
mlflow/utils/search_utils.py +2111 -0
mlflow/utils/secure_loading.py +221 -0
mlflow/utils/security_validation.py +384 -0
mlflow/utils/server_cli_utils.py +61 -0
mlflow/utils/spark_utils.py +15 -0
mlflow/utils/string_utils.py +138 -0
mlflow/utils/thread_utils.py +63 -0
mlflow/utils/time.py +54 -0
mlflow/utils/timeout.py +42 -0
mlflow/utils/uri.py +572 -0
mlflow/utils/validation.py +662 -0
mlflow/utils/virtualenv.py +458 -0
mlflow/utils/warnings_utils.py +25 -0
mlflow/utils/yaml_utils.py +179 -0
mlflow/version.py +24 -0

mlflow/data/evaluation_dataset.py ADDED Viewed

@@ -0,0 +1,562 @@
+import hashlib
+import json
+import logging
+import math
+import struct
+import sys
+from packaging.version import Version
+import mlflow
+from mlflow.entities import RunTag
+from mlflow.exceptions import MlflowException
+from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
+from mlflow.utils.string_utils import generate_feature_name_if_not_string
+try:
+    # `numpy` and `pandas` are not required for `mlflow-skinny`.
+    import numpy as np
+    import pandas as pd
+except ImportError:
+    pass
+_logger = logging.getLogger(__name__)
+def _hash_uint64_ndarray_as_bytes(array):
+    assert len(array.shape) == 1
+    # see struct pack format string https://docs.python.org/3/library/struct.html#format-strings
+    return struct.pack(f">{array.size}Q", *array)
+def _is_empty_list_or_array(data):
+    if isinstance(data, list):
+        return len(data) == 0
+    elif isinstance(data, np.ndarray):
+        return data.size == 0
+    return False
+def _is_array_has_dict(nd_array):
+    if _is_empty_list_or_array(nd_array):
+        return False
+    # It is less likely the array or list contains heterogeneous elements, so just checking the
+    # first element to avoid performance overhead.
+    elm = nd_array.item(0)
+    if isinstance(elm, (list, np.ndarray)):
+        return _is_array_has_dict(elm)
+    elif isinstance(elm, dict):
+        return True
+    return False
+def _hash_array_of_dict_as_bytes(data):
+    # NB: If an array or list contains dictionary element, it can't be hashed with
+    # pandas.util.hash_array. Hence we need to manually hash the elements here. This is
+    # particularly for the LLM use case where the input can be a list of dictionary
+    # (chat/completion payloads), so doesn't handle more complex case like nested lists.
+    result = b""
+    for elm in data:
+        if isinstance(elm, (list, np.ndarray)):
+            result += _hash_array_of_dict_as_bytes(elm)
+        elif isinstance(elm, dict):
+            result += _hash_dict_as_bytes(elm)
+        else:
+            result += _hash_data_as_bytes(elm)
+    return result
+def _hash_ndarray_as_bytes(nd_array):
+    if not isinstance(nd_array, np.ndarray):
+        nd_array = np.array(nd_array)
+    if _is_array_has_dict(nd_array):
+        return _hash_array_of_dict_as_bytes(nd_array)
+    return _hash_uint64_ndarray_as_bytes(
+        pd.util.hash_array(nd_array.flatten(order="C"))
+    ) + _hash_uint64_ndarray_as_bytes(np.array(nd_array.shape, dtype="uint64"))
+def _hash_data_as_bytes(data):
+    try:
+        if isinstance(data, (list, np.ndarray)):
+            return _hash_ndarray_as_bytes(data)
+        if isinstance(data, dict):
+            return _hash_dict_as_bytes(data)
+        if np.isscalar(data):
+            return _hash_uint64_ndarray_as_bytes(pd.util.hash_array(np.array([data])))
+    finally:
+        return b""  # Skip unsupported types by returning an empty byte string
+def _hash_dict_as_bytes(data_dict):
+    result = _hash_ndarray_as_bytes(list(data_dict.keys()))
+    try:
+        result += _hash_ndarray_as_bytes(list(data_dict.values()))
+    # If the values containing non-hashable objects, we will hash the values recursively.
+    except Exception:
+        for value in data_dict.values():
+            result += _hash_data_as_bytes(value)
+    return result
+def _hash_array_like_obj_as_bytes(data):
+    """
+    Helper method to convert pandas dataframe/numpy array/list into bytes for
+    MD5 calculation purpose.
+    """
+    if isinstance(data, pd.DataFrame):
+        # add checking `'pyspark' in sys.modules` to avoid importing pyspark when user
+        # run code not related to pyspark.
+        if "pyspark" in sys.modules:
+            from pyspark.ml.linalg import Vector as spark_vector_type
+        else:
+            spark_vector_type = None
+        def _hash_array_like_element_as_bytes(v):
+            if spark_vector_type is not None:
+                if isinstance(v, spark_vector_type):
+                    return _hash_ndarray_as_bytes(v.toArray())
+            if isinstance(v, (dict, list, np.ndarray)):
+                return _hash_data_as_bytes(v)
+            try:
+                # Attempt to hash the value, if it fails, return an empty byte string
+                pd.util.hash_array(np.array([v]))
+                return v
+            except TypeError:
+                return b""  # Skip unhashable types by returning an empty byte string
+        if Version(pd.__version__) >= Version("2.1.0"):
+            data = data.map(_hash_array_like_element_as_bytes)
+        else:
+            data = data.applymap(_hash_array_like_element_as_bytes)
+        return _hash_uint64_ndarray_as_bytes(pd.util.hash_pandas_object(data))
+    elif isinstance(data, np.ndarray) and len(data) > 0 and isinstance(data[0], list):
+        # convert numpy array of lists into numpy array of the string representation of the lists
+        # because lists are not hashable
+        hashable = np.array(str(val) for val in data)
+        return _hash_ndarray_as_bytes(hashable)
+    elif isinstance(data, np.ndarray) and len(data) > 0 and isinstance(data[0], np.ndarray):
+        # convert numpy array of numpy arrays into 2d numpy arrays
+        # because numpy array of numpy arrays are not hashable
+        hashable = np.array(data.tolist())
+        return _hash_ndarray_as_bytes(hashable)
+    elif isinstance(data, np.ndarray):
+        return _hash_ndarray_as_bytes(data)
+    elif isinstance(data, list):
+        return _hash_ndarray_as_bytes(np.array(data))
+    else:
+        raise ValueError("Unsupported data type.")
+def _gen_md5_for_arraylike_obj(md5_gen, data):
+    """
+    Helper method to generate MD5 hash array-like object, the MD5 will calculate over:
+     - array length
+     - first NUM_SAMPLE_ROWS_FOR_HASH rows content
+     - last NUM_SAMPLE_ROWS_FOR_HASH rows content
+    """
+    len_bytes = _hash_uint64_ndarray_as_bytes(np.array([len(data)], dtype="uint64"))
+    md5_gen.update(len_bytes)
+    if len(data) < EvaluationDataset.NUM_SAMPLE_ROWS_FOR_HASH * 2:
+        md5_gen.update(_hash_array_like_obj_as_bytes(data))
+    else:
+        if isinstance(data, pd.DataFrame):
+            # Access rows of pandas Df with iloc
+            head_rows = data.iloc[: EvaluationDataset.NUM_SAMPLE_ROWS_FOR_HASH]
+            tail_rows = data.iloc[-EvaluationDataset.NUM_SAMPLE_ROWS_FOR_HASH :]
+        else:
+            head_rows = data[: EvaluationDataset.NUM_SAMPLE_ROWS_FOR_HASH]
+            tail_rows = data[-EvaluationDataset.NUM_SAMPLE_ROWS_FOR_HASH :]
+        md5_gen.update(_hash_array_like_obj_as_bytes(head_rows))
+        md5_gen.update(_hash_array_like_obj_as_bytes(tail_rows))
+def convert_data_to_mlflow_dataset(data, targets=None, predictions=None):
+    """Convert input data to mlflow dataset."""
+    supported_dataframe_types = [pd.DataFrame]
+    if "pyspark" in sys.modules:
+        from mlflow.utils.spark_utils import get_spark_dataframe_type
+        spark_df_type = get_spark_dataframe_type()
+        supported_dataframe_types.append(spark_df_type)
+    if predictions is not None:
+        _validate_dataset_type_supports_predictions(
+            data=data, supported_predictions_dataset_types=supported_dataframe_types
+        )
+    if isinstance(data, list):
+        # If the list is flat, we assume each element is an independent sample.
+        if not isinstance(data[0], (list, np.ndarray)):
+            data = [[elm] for elm in data]
+        return mlflow.data.from_numpy(
+            np.array(data), targets=np.array(targets) if targets else None
+        )
+    elif isinstance(data, np.ndarray):
+        return mlflow.data.from_numpy(data, targets=targets)
+    elif isinstance(data, pd.DataFrame):
+        return mlflow.data.from_pandas(df=data, targets=targets, predictions=predictions)
+    elif "pyspark" in sys.modules and isinstance(data, spark_df_type):
+        return mlflow.data.from_spark(df=data, targets=targets, predictions=predictions)
+    else:
+        # Cannot convert to mlflow dataset, return original data.
+        _logger.info(
+            "Cannot convert input data to `evaluate()` to an mlflow dataset, input must be a list, "
+            f"a numpy array, a panda Dataframe or a spark Dataframe, but received {type(data)}."
+        )
+        return data
+def _validate_dataset_type_supports_predictions(data, supported_predictions_dataset_types):
+    """
+    Validate that the dataset type supports a user-specified "predictions" column.
+    """
+    if not any(isinstance(data, sdt) for sdt in supported_predictions_dataset_types):
+        raise MlflowException(
+            message=(
+                "If predictions is specified, data must be one of the following types, or an"
+                " MLflow Dataset that represents one of the following types:"
+                f" {supported_predictions_dataset_types}."
+            ),
+            error_code=INVALID_PARAMETER_VALUE,
+        )
+class EvaluationDataset:
+    """
+    An input dataset for model evaluation. This is intended for use with the
+    :py:func:`mlflow.models.evaluate()`
+    API.
+    """
+    NUM_SAMPLE_ROWS_FOR_HASH = 5
+    SPARK_DATAFRAME_LIMIT = 10000
+    def __init__(
+        self,
+        data,
+        *,
+        targets=None,
+        name=None,
+        path=None,
+        feature_names=None,
+        predictions=None,
+        digest=None,
+    ):
+        """
+        The values of the constructor arguments comes from the `evaluate` call.
+        """
+        if name is not None and '"' in name:
+            raise MlflowException(
+                message=f'Dataset name cannot include a double quote (") but got {name}',
+                error_code=INVALID_PARAMETER_VALUE,
+            )
+        if path is not None and '"' in path:
+            raise MlflowException(
+                message=f'Dataset path cannot include a double quote (") but got {path}',
+                error_code=INVALID_PARAMETER_VALUE,
+            )
+        self._user_specified_name = name
+        self._path = path
+        self._hash = None
+        self._supported_dataframe_types = (pd.DataFrame,)
+        self._spark_df_type = None
+        self._labels_data = None
+        self._targets_name = None
+        self._has_targets = False
+        self._predictions_data = None
+        self._predictions_name = None
+        self._has_predictions = predictions is not None
+        self._digest = digest
+        try:
+            # add checking `'pyspark' in sys.modules` to avoid importing pyspark when user
+            # run code not related to pyspark.
+            if "pyspark" in sys.modules:
+                from mlflow.utils.spark_utils import get_spark_dataframe_type
+                spark_df_type = get_spark_dataframe_type()
+                self._supported_dataframe_types = (pd.DataFrame, spark_df_type)
+                self._spark_df_type = spark_df_type
+        except ImportError:
+            pass
+        if feature_names is not None and len(set(feature_names)) < len(list(feature_names)):
+            raise MlflowException(
+                message="`feature_names` argument must be a list containing unique feature names.",
+                error_code=INVALID_PARAMETER_VALUE,
+            )
+        if self._has_predictions:
+            _validate_dataset_type_supports_predictions(
+                data=data,
+                supported_predictions_dataset_types=self._supported_dataframe_types,
+            )
+        has_targets = targets is not None
+        if has_targets:
+            self._has_targets = True
+        if isinstance(data, (np.ndarray, list)):
+            if has_targets and not isinstance(targets, (np.ndarray, list)):
+                raise MlflowException(
+                    message="If data is a numpy array or list of evaluation features, "
+                    "`targets` argument must be a numpy array or list of evaluation labels.",
+                    error_code=INVALID_PARAMETER_VALUE,
+                )
+            shape_message = (
+                "If the `data` argument is a numpy array, it must be a 2-dimensional "
+                "array, with the second dimension representing the number of features. If the "
+                "`data` argument is a list, each of its elements must be a feature array of "
+                "the numpy array or list, and all elements must have the same length."
+            )
+            if isinstance(data, list):
+                try:
+                    data = np.array(data)
+                except ValueError as e:
+                    raise MlflowException(
+                        message=shape_message, error_code=INVALID_PARAMETER_VALUE
+                    ) from e
+            if len(data.shape) != 2:
+                raise MlflowException(
+                    message=shape_message,
+                    error_code=INVALID_PARAMETER_VALUE,
+                )
+            self._features_data = data
+            if has_targets:
+                self._labels_data = (
+                    targets if isinstance(targets, np.ndarray) else np.array(targets)
+                )
+                if len(self._features_data) != len(self._labels_data):
+                    raise MlflowException(
+                        message="The input features example rows must be the same length "
+                        "with labels array.",
+                        error_code=INVALID_PARAMETER_VALUE,
+                    )
+            num_features = data.shape[1]
+            if feature_names is not None:
+                feature_names = list(feature_names)
+                if num_features != len(feature_names):
+                    raise MlflowException(
+                        message="feature name list must be the same length with feature data.",
+                        error_code=INVALID_PARAMETER_VALUE,
+                    )
+                self._feature_names = feature_names
+            else:
+                self._feature_names = [
+                    f"feature_{str(i + 1).zfill(math.ceil(math.log10(num_features + 1)))}"
+                    for i in range(num_features)
+                ]
+        elif isinstance(data, self._supported_dataframe_types):
+            if has_targets and not isinstance(targets, str):
+                raise MlflowException(
+                    message="If data is a Pandas DataFrame or Spark DataFrame, `targets` argument "
+                    "must be the name of the column which contains evaluation labels in the `data` "
+                    "dataframe.",
+                    error_code=INVALID_PARAMETER_VALUE,
+                )
+            if self._spark_df_type and isinstance(data, self._spark_df_type):
+                if data.count() > EvaluationDataset.SPARK_DATAFRAME_LIMIT:
+                    _logger.warning(
+                        "Specified Spark DataFrame is too large for model evaluation. Only "
+                        f"the first {EvaluationDataset.SPARK_DATAFRAME_LIMIT} rows will be used. "
+                        "If you want evaluate on the whole spark dataframe, please manually call "
+                        "`spark_dataframe.toPandas()`."
+                    )
+                data = data.limit(EvaluationDataset.SPARK_DATAFRAME_LIMIT).toPandas()
+            if has_targets:
+                self._labels_data = data[targets].to_numpy()
+                self._targets_name = targets
+            if self._has_predictions:
+                self._predictions_data = data[predictions].to_numpy()
+                self._predictions_name = predictions
+            if feature_names is not None:
+                self._features_data = data[list(feature_names)]
+                self._feature_names = feature_names
+            else:
+                features_data = data
+                if has_targets:
+                    features_data = features_data.drop(targets, axis=1, inplace=False)
+                if self._has_predictions:
+                    features_data = features_data.drop(predictions, axis=1, inplace=False)
+                self._features_data = features_data
+                self._feature_names = [
+                    generate_feature_name_if_not_string(c) for c in self._features_data.columns
+                ]
+        else:
+            raise MlflowException(
+                message="The data argument must be a numpy array, a list or a Pandas DataFrame, or "
+                "spark DataFrame if pyspark package installed.",
+                error_code=INVALID_PARAMETER_VALUE,
+            )
+        # generate dataset hash
+        md5_gen = hashlib.md5(usedforsecurity=False)
+        _gen_md5_for_arraylike_obj(md5_gen, self._features_data)
+        if self._labels_data is not None:
+            _gen_md5_for_arraylike_obj(md5_gen, self._labels_data)
+        if self._predictions_data is not None:
+            _gen_md5_for_arraylike_obj(md5_gen, self._predictions_data)
+        md5_gen.update(",".join(list(map(str, self._feature_names))).encode("UTF-8"))
+        self._hash = md5_gen.hexdigest()
+    @property
+    def feature_names(self):
+        return self._feature_names
+    @property
+    def features_data(self):
+        """
+        return features data as a numpy array or a pandas DataFrame.
+        """
+        return self._features_data
+    @property
+    def labels_data(self):
+        """
+        return labels data as a numpy array
+        """
+        return self._labels_data
+    @property
+    def has_targets(self):
+        """
+        Returns True if the dataset has targets, False otherwise.
+        """
+        return self._has_targets
+    @property
+    def targets_name(self):
+        """
+        return targets name
+        """
+        return self._targets_name
+    @property
+    def predictions_data(self):
+        """
+        return labels data as a numpy array
+        """
+        return self._predictions_data
+    @property
+    def has_predictions(self):
+        """
+        Returns True if the dataset has targets, False otherwise.
+        """
+        return self._has_predictions
+    @property
+    def predictions_name(self):
+        """
+        return predictions name
+        """
+        return self._predictions_name
+    @property
+    def name(self):
+        """
+        Dataset name, which is specified dataset name or the dataset hash if user don't specify
+        name.
+        """
+        return self._user_specified_name if self._user_specified_name is not None else self.hash
+    @property
+    def path(self):
+        """
+        Dataset path
+        """
+        return self._path
+    @property
+    def hash(self):
+        """
+        Dataset hash, includes hash on first 20 rows and last 20 rows.
+        """
+        return self._hash
+    @property
+    def _metadata(self):
+        """
+        Return dataset metadata containing name, hash, and optional path.
+        """
+        metadata = {
+            "name": self.name,
+            "hash": self.hash,
+        }
+        if self.path is not None:
+            metadata["path"] = self.path
+        return metadata
+    @property
+    def digest(self):
+        """
+        Return the digest of the dataset.
+        """
+        return self._digest
+    def _log_dataset_tag(self, client, run_id, model_uuid):
+        """
+        Log dataset metadata as a tag "mlflow.datasets", if the tag already exists, it will
+        append current dataset metadata into existing tag content.
+        """
+        existing_dataset_metadata_str = client.get_run(run_id).data.tags.get(
+            "mlflow.datasets", "[]"
+        )
+        dataset_metadata_list = json.loads(existing_dataset_metadata_str)
+        for metadata in dataset_metadata_list:
+            if (
+                metadata["hash"] == self.hash
+                and metadata["name"] == self.name
+                and metadata["model"] == model_uuid
+            ):
+                break
+        else:
+            dataset_metadata_list.append({**self._metadata, "model": model_uuid})
+        dataset_metadata_str = json.dumps(dataset_metadata_list, separators=(",", ":"))
+        client.log_batch(
+            run_id,
+            tags=[RunTag("mlflow.datasets", dataset_metadata_str)],
+        )
+    def __hash__(self):
+        return hash(self.hash)
+    def __eq__(self, other):
+        if not isinstance(other, EvaluationDataset):
+            return False
+        if isinstance(self._features_data, np.ndarray):
+            is_features_data_equal = np.array_equal(self._features_data, other._features_data)
+        else:
+            is_features_data_equal = self._features_data.equals(other._features_data)
+        return (
+            is_features_data_equal
+            and np.array_equal(self._labels_data, other._labels_data)
+            and self.name == other.name
+            and self.path == other.path
+            and self._feature_names == other._feature_names
+        )

mlflow/data/filesystem_dataset_source.py ADDED Viewed

@@ -0,0 +1,81 @@
+from abc import abstractmethod
+from typing import Any
+from mlflow.data.dataset_source import DatasetSource
+class FileSystemDatasetSource(DatasetSource):
+    """
+    Represents the source of a dataset stored on a filesystem, e.g. a local UNIX filesystem,
+    blob storage services like S3, etc.
+    """
+    @property
+    @abstractmethod
+    def uri(self):
+        """The URI referring to the dataset source filesystem location.
+        Returns:
+            The URI referring to the dataset source filesystem location,
+            e.g "s3://mybucket/path/to/mydataset", "/tmp/path/to/my/dataset" etc.
+        """
+    @staticmethod
+    @abstractmethod
+    def _get_source_type() -> str:
+        """
+        Returns:
+            A string describing the filesystem containing the dataset, e.g. "local", "s3", ...
+        """
+    @abstractmethod
+    def load(self, dst_path=None) -> str:
+        """Downloads the dataset source to the local filesystem.
+        Args:
+            dst_path: Path of the local filesystem destination directory to which to download the
+                dataset source. If the directory does not exist, it is created. If
+                unspecified, the dataset source is downloaded to a new uniquely-named
+                directory on the local filesystem, unless the dataset source already
+                exists on the local filesystem, in which case its local path is returned
+                directly.
+        Returns:
+            The path to the downloaded dataset source on the local filesystem.
+        """
+    @staticmethod
+    @abstractmethod
+    def _can_resolve(raw_source: Any) -> bool:
+        """
+        Args:
+            raw_source: The raw source, e.g. a string like "s3://mybucket/path/to/iris/data".
+        Returns:
+            True if this DatasetSource can resolve the raw source, False otherwise.
+        """
+    @classmethod
+    @abstractmethod
+    def _resolve(cls, raw_source: Any) -> "FileSystemDatasetSource":
+        """
+        Args:
+            raw_source: The raw source, e.g. a string like "s3://mybucket/path/to/iris/data".
+        """
+    @abstractmethod
+    def to_dict(self) -> dict[Any, Any]:
+        """
+        Returns:
+            A JSON-compatible dictionary representation of the FileSystemDatasetSource.
+        """
+    @classmethod
+    @abstractmethod
+    def from_dict(cls, source_dict: dict[Any, Any]) -> "FileSystemDatasetSource":
+        """
+        Args:
+            source_dict: A dictionary representation of the FileSystemDatasetSource.
+        """