genesis-flow 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genesis_flow-1.0.0.dist-info/METADATA +822 -0
- genesis_flow-1.0.0.dist-info/RECORD +645 -0
- genesis_flow-1.0.0.dist-info/WHEEL +5 -0
- genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
- genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
- genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
- mlflow/__init__.py +367 -0
- mlflow/__main__.py +3 -0
- mlflow/ag2/__init__.py +56 -0
- mlflow/ag2/ag2_logger.py +294 -0
- mlflow/anthropic/__init__.py +40 -0
- mlflow/anthropic/autolog.py +129 -0
- mlflow/anthropic/chat.py +144 -0
- mlflow/artifacts/__init__.py +268 -0
- mlflow/autogen/__init__.py +144 -0
- mlflow/autogen/chat.py +142 -0
- mlflow/azure/__init__.py +26 -0
- mlflow/azure/auth_handler.py +257 -0
- mlflow/azure/client.py +319 -0
- mlflow/azure/config.py +120 -0
- mlflow/azure/connection_factory.py +340 -0
- mlflow/azure/exceptions.py +27 -0
- mlflow/azure/stores.py +327 -0
- mlflow/azure/utils.py +183 -0
- mlflow/bedrock/__init__.py +45 -0
- mlflow/bedrock/_autolog.py +202 -0
- mlflow/bedrock/chat.py +122 -0
- mlflow/bedrock/stream.py +160 -0
- mlflow/bedrock/utils.py +43 -0
- mlflow/cli.py +707 -0
- mlflow/client.py +12 -0
- mlflow/config/__init__.py +56 -0
- mlflow/crewai/__init__.py +79 -0
- mlflow/crewai/autolog.py +253 -0
- mlflow/crewai/chat.py +29 -0
- mlflow/data/__init__.py +75 -0
- mlflow/data/artifact_dataset_sources.py +170 -0
- mlflow/data/code_dataset_source.py +40 -0
- mlflow/data/dataset.py +123 -0
- mlflow/data/dataset_registry.py +168 -0
- mlflow/data/dataset_source.py +110 -0
- mlflow/data/dataset_source_registry.py +219 -0
- mlflow/data/delta_dataset_source.py +167 -0
- mlflow/data/digest_utils.py +108 -0
- mlflow/data/evaluation_dataset.py +562 -0
- mlflow/data/filesystem_dataset_source.py +81 -0
- mlflow/data/http_dataset_source.py +145 -0
- mlflow/data/huggingface_dataset.py +258 -0
- mlflow/data/huggingface_dataset_source.py +118 -0
- mlflow/data/meta_dataset.py +104 -0
- mlflow/data/numpy_dataset.py +223 -0
- mlflow/data/pandas_dataset.py +231 -0
- mlflow/data/polars_dataset.py +352 -0
- mlflow/data/pyfunc_dataset_mixin.py +31 -0
- mlflow/data/schema.py +76 -0
- mlflow/data/sources.py +1 -0
- mlflow/data/spark_dataset.py +406 -0
- mlflow/data/spark_dataset_source.py +74 -0
- mlflow/data/spark_delta_utils.py +118 -0
- mlflow/data/tensorflow_dataset.py +350 -0
- mlflow/data/uc_volume_dataset_source.py +81 -0
- mlflow/db.py +27 -0
- mlflow/dspy/__init__.py +17 -0
- mlflow/dspy/autolog.py +197 -0
- mlflow/dspy/callback.py +398 -0
- mlflow/dspy/constant.py +1 -0
- mlflow/dspy/load.py +93 -0
- mlflow/dspy/save.py +393 -0
- mlflow/dspy/util.py +109 -0
- mlflow/dspy/wrapper.py +226 -0
- mlflow/entities/__init__.py +104 -0
- mlflow/entities/_mlflow_object.py +52 -0
- mlflow/entities/assessment.py +545 -0
- mlflow/entities/assessment_error.py +80 -0
- mlflow/entities/assessment_source.py +141 -0
- mlflow/entities/dataset.py +92 -0
- mlflow/entities/dataset_input.py +51 -0
- mlflow/entities/dataset_summary.py +62 -0
- mlflow/entities/document.py +48 -0
- mlflow/entities/experiment.py +109 -0
- mlflow/entities/experiment_tag.py +35 -0
- mlflow/entities/file_info.py +45 -0
- mlflow/entities/input_tag.py +35 -0
- mlflow/entities/lifecycle_stage.py +35 -0
- mlflow/entities/logged_model.py +228 -0
- mlflow/entities/logged_model_input.py +26 -0
- mlflow/entities/logged_model_output.py +32 -0
- mlflow/entities/logged_model_parameter.py +46 -0
- mlflow/entities/logged_model_status.py +74 -0
- mlflow/entities/logged_model_tag.py +33 -0
- mlflow/entities/metric.py +200 -0
- mlflow/entities/model_registry/__init__.py +29 -0
- mlflow/entities/model_registry/_model_registry_entity.py +13 -0
- mlflow/entities/model_registry/model_version.py +243 -0
- mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
- mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
- mlflow/entities/model_registry/model_version_search.py +25 -0
- mlflow/entities/model_registry/model_version_stages.py +25 -0
- mlflow/entities/model_registry/model_version_status.py +35 -0
- mlflow/entities/model_registry/model_version_tag.py +35 -0
- mlflow/entities/model_registry/prompt.py +73 -0
- mlflow/entities/model_registry/prompt_version.py +244 -0
- mlflow/entities/model_registry/registered_model.py +175 -0
- mlflow/entities/model_registry/registered_model_alias.py +35 -0
- mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
- mlflow/entities/model_registry/registered_model_search.py +25 -0
- mlflow/entities/model_registry/registered_model_tag.py +35 -0
- mlflow/entities/multipart_upload.py +74 -0
- mlflow/entities/param.py +49 -0
- mlflow/entities/run.py +97 -0
- mlflow/entities/run_data.py +84 -0
- mlflow/entities/run_info.py +188 -0
- mlflow/entities/run_inputs.py +59 -0
- mlflow/entities/run_outputs.py +43 -0
- mlflow/entities/run_status.py +41 -0
- mlflow/entities/run_tag.py +36 -0
- mlflow/entities/source_type.py +31 -0
- mlflow/entities/span.py +774 -0
- mlflow/entities/span_event.py +96 -0
- mlflow/entities/span_status.py +102 -0
- mlflow/entities/trace.py +317 -0
- mlflow/entities/trace_data.py +71 -0
- mlflow/entities/trace_info.py +220 -0
- mlflow/entities/trace_info_v2.py +162 -0
- mlflow/entities/trace_location.py +173 -0
- mlflow/entities/trace_state.py +39 -0
- mlflow/entities/trace_status.py +68 -0
- mlflow/entities/view_type.py +51 -0
- mlflow/environment_variables.py +866 -0
- mlflow/evaluation/__init__.py +16 -0
- mlflow/evaluation/assessment.py +369 -0
- mlflow/evaluation/evaluation.py +411 -0
- mlflow/evaluation/evaluation_tag.py +61 -0
- mlflow/evaluation/fluent.py +48 -0
- mlflow/evaluation/utils.py +201 -0
- mlflow/exceptions.py +213 -0
- mlflow/experiments.py +140 -0
- mlflow/gemini/__init__.py +81 -0
- mlflow/gemini/autolog.py +186 -0
- mlflow/gemini/chat.py +261 -0
- mlflow/genai/__init__.py +71 -0
- mlflow/genai/datasets/__init__.py +67 -0
- mlflow/genai/datasets/evaluation_dataset.py +131 -0
- mlflow/genai/evaluation/__init__.py +3 -0
- mlflow/genai/evaluation/base.py +411 -0
- mlflow/genai/evaluation/constant.py +23 -0
- mlflow/genai/evaluation/utils.py +244 -0
- mlflow/genai/judges/__init__.py +21 -0
- mlflow/genai/judges/databricks.py +404 -0
- mlflow/genai/label_schemas/__init__.py +153 -0
- mlflow/genai/label_schemas/label_schemas.py +209 -0
- mlflow/genai/labeling/__init__.py +159 -0
- mlflow/genai/labeling/labeling.py +250 -0
- mlflow/genai/optimize/__init__.py +13 -0
- mlflow/genai/optimize/base.py +198 -0
- mlflow/genai/optimize/optimizers/__init__.py +4 -0
- mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
- mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
- mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
- mlflow/genai/optimize/types.py +75 -0
- mlflow/genai/optimize/util.py +30 -0
- mlflow/genai/prompts/__init__.py +206 -0
- mlflow/genai/scheduled_scorers.py +431 -0
- mlflow/genai/scorers/__init__.py +26 -0
- mlflow/genai/scorers/base.py +492 -0
- mlflow/genai/scorers/builtin_scorers.py +765 -0
- mlflow/genai/scorers/scorer_utils.py +138 -0
- mlflow/genai/scorers/validation.py +165 -0
- mlflow/genai/utils/data_validation.py +146 -0
- mlflow/genai/utils/enum_utils.py +23 -0
- mlflow/genai/utils/trace_utils.py +211 -0
- mlflow/groq/__init__.py +42 -0
- mlflow/groq/_groq_autolog.py +74 -0
- mlflow/johnsnowlabs/__init__.py +888 -0
- mlflow/langchain/__init__.py +24 -0
- mlflow/langchain/api_request_parallel_processor.py +330 -0
- mlflow/langchain/autolog.py +147 -0
- mlflow/langchain/chat_agent_langgraph.py +340 -0
- mlflow/langchain/constant.py +1 -0
- mlflow/langchain/constants.py +1 -0
- mlflow/langchain/databricks_dependencies.py +444 -0
- mlflow/langchain/langchain_tracer.py +597 -0
- mlflow/langchain/model.py +919 -0
- mlflow/langchain/output_parsers.py +142 -0
- mlflow/langchain/retriever_chain.py +153 -0
- mlflow/langchain/runnables.py +527 -0
- mlflow/langchain/utils/chat.py +402 -0
- mlflow/langchain/utils/logging.py +671 -0
- mlflow/langchain/utils/serialization.py +36 -0
- mlflow/legacy_databricks_cli/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/provider.py +482 -0
- mlflow/litellm/__init__.py +175 -0
- mlflow/llama_index/__init__.py +22 -0
- mlflow/llama_index/autolog.py +55 -0
- mlflow/llama_index/chat.py +43 -0
- mlflow/llama_index/constant.py +1 -0
- mlflow/llama_index/model.py +577 -0
- mlflow/llama_index/pyfunc_wrapper.py +332 -0
- mlflow/llama_index/serialize_objects.py +188 -0
- mlflow/llama_index/tracer.py +561 -0
- mlflow/metrics/__init__.py +479 -0
- mlflow/metrics/base.py +39 -0
- mlflow/metrics/genai/__init__.py +25 -0
- mlflow/metrics/genai/base.py +101 -0
- mlflow/metrics/genai/genai_metric.py +771 -0
- mlflow/metrics/genai/metric_definitions.py +450 -0
- mlflow/metrics/genai/model_utils.py +371 -0
- mlflow/metrics/genai/prompt_template.py +68 -0
- mlflow/metrics/genai/prompts/__init__.py +0 -0
- mlflow/metrics/genai/prompts/v1.py +422 -0
- mlflow/metrics/genai/utils.py +6 -0
- mlflow/metrics/metric_definitions.py +619 -0
- mlflow/mismatch.py +34 -0
- mlflow/mistral/__init__.py +34 -0
- mlflow/mistral/autolog.py +71 -0
- mlflow/mistral/chat.py +135 -0
- mlflow/ml_package_versions.py +452 -0
- mlflow/models/__init__.py +97 -0
- mlflow/models/auth_policy.py +83 -0
- mlflow/models/cli.py +354 -0
- mlflow/models/container/__init__.py +294 -0
- mlflow/models/container/scoring_server/__init__.py +0 -0
- mlflow/models/container/scoring_server/nginx.conf +39 -0
- mlflow/models/dependencies_schemas.py +287 -0
- mlflow/models/display_utils.py +158 -0
- mlflow/models/docker_utils.py +211 -0
- mlflow/models/evaluation/__init__.py +23 -0
- mlflow/models/evaluation/_shap_patch.py +64 -0
- mlflow/models/evaluation/artifacts.py +194 -0
- mlflow/models/evaluation/base.py +1811 -0
- mlflow/models/evaluation/calibration_curve.py +109 -0
- mlflow/models/evaluation/default_evaluator.py +996 -0
- mlflow/models/evaluation/deprecated.py +23 -0
- mlflow/models/evaluation/evaluator_registry.py +80 -0
- mlflow/models/evaluation/evaluators/classifier.py +704 -0
- mlflow/models/evaluation/evaluators/default.py +233 -0
- mlflow/models/evaluation/evaluators/regressor.py +96 -0
- mlflow/models/evaluation/evaluators/shap.py +296 -0
- mlflow/models/evaluation/lift_curve.py +178 -0
- mlflow/models/evaluation/utils/metric.py +123 -0
- mlflow/models/evaluation/utils/trace.py +179 -0
- mlflow/models/evaluation/validation.py +434 -0
- mlflow/models/flavor_backend.py +93 -0
- mlflow/models/flavor_backend_registry.py +53 -0
- mlflow/models/model.py +1639 -0
- mlflow/models/model_config.py +150 -0
- mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
- mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
- mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
- mlflow/models/python_api.py +369 -0
- mlflow/models/rag_signatures.py +128 -0
- mlflow/models/resources.py +321 -0
- mlflow/models/signature.py +662 -0
- mlflow/models/utils.py +2054 -0
- mlflow/models/wheeled_model.py +280 -0
- mlflow/openai/__init__.py +57 -0
- mlflow/openai/_agent_tracer.py +364 -0
- mlflow/openai/api_request_parallel_processor.py +131 -0
- mlflow/openai/autolog.py +509 -0
- mlflow/openai/constant.py +1 -0
- mlflow/openai/model.py +824 -0
- mlflow/openai/utils/chat_schema.py +367 -0
- mlflow/optuna/__init__.py +3 -0
- mlflow/optuna/storage.py +646 -0
- mlflow/plugins/__init__.py +72 -0
- mlflow/plugins/base.py +358 -0
- mlflow/plugins/builtin/__init__.py +24 -0
- mlflow/plugins/builtin/pytorch_plugin.py +150 -0
- mlflow/plugins/builtin/sklearn_plugin.py +158 -0
- mlflow/plugins/builtin/transformers_plugin.py +187 -0
- mlflow/plugins/cli.py +321 -0
- mlflow/plugins/discovery.py +340 -0
- mlflow/plugins/manager.py +465 -0
- mlflow/plugins/registry.py +316 -0
- mlflow/plugins/templates/framework_plugin_template.py +329 -0
- mlflow/prompt/constants.py +20 -0
- mlflow/prompt/promptlab_model.py +197 -0
- mlflow/prompt/registry_utils.py +248 -0
- mlflow/promptflow/__init__.py +495 -0
- mlflow/protos/__init__.py +0 -0
- mlflow/protos/assessments_pb2.py +174 -0
- mlflow/protos/databricks_artifacts_pb2.py +489 -0
- mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
- mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
- mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
- mlflow/protos/databricks_pb2.py +267 -0
- mlflow/protos/databricks_trace_server_pb2.py +374 -0
- mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
- mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
- mlflow/protos/facet_feature_statistics_pb2.py +296 -0
- mlflow/protos/internal_pb2.py +77 -0
- mlflow/protos/mlflow_artifacts_pb2.py +336 -0
- mlflow/protos/model_registry_pb2.py +1073 -0
- mlflow/protos/scalapb/__init__.py +0 -0
- mlflow/protos/scalapb/scalapb_pb2.py +104 -0
- mlflow/protos/service_pb2.py +2600 -0
- mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
- mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
- mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
- mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
- mlflow/py.typed +0 -0
- mlflow/pydantic_ai/__init__.py +57 -0
- mlflow/pydantic_ai/autolog.py +173 -0
- mlflow/pyfunc/__init__.py +3844 -0
- mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
- mlflow/pyfunc/backend.py +523 -0
- mlflow/pyfunc/context.py +78 -0
- mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
- mlflow/pyfunc/loaders/__init__.py +7 -0
- mlflow/pyfunc/loaders/chat_agent.py +117 -0
- mlflow/pyfunc/loaders/chat_model.py +125 -0
- mlflow/pyfunc/loaders/code_model.py +31 -0
- mlflow/pyfunc/loaders/responses_agent.py +112 -0
- mlflow/pyfunc/mlserver.py +46 -0
- mlflow/pyfunc/model.py +1473 -0
- mlflow/pyfunc/scoring_server/__init__.py +604 -0
- mlflow/pyfunc/scoring_server/app.py +7 -0
- mlflow/pyfunc/scoring_server/client.py +146 -0
- mlflow/pyfunc/spark_model_cache.py +48 -0
- mlflow/pyfunc/stdin_server.py +44 -0
- mlflow/pyfunc/utils/__init__.py +3 -0
- mlflow/pyfunc/utils/data_validation.py +224 -0
- mlflow/pyfunc/utils/environment.py +22 -0
- mlflow/pyfunc/utils/input_converter.py +47 -0
- mlflow/pyfunc/utils/serving_data_parser.py +11 -0
- mlflow/pytorch/__init__.py +1171 -0
- mlflow/pytorch/_lightning_autolog.py +580 -0
- mlflow/pytorch/_pytorch_autolog.py +50 -0
- mlflow/pytorch/pickle_module.py +35 -0
- mlflow/rfunc/__init__.py +42 -0
- mlflow/rfunc/backend.py +134 -0
- mlflow/runs.py +89 -0
- mlflow/server/__init__.py +302 -0
- mlflow/server/auth/__init__.py +1224 -0
- mlflow/server/auth/__main__.py +4 -0
- mlflow/server/auth/basic_auth.ini +6 -0
- mlflow/server/auth/cli.py +11 -0
- mlflow/server/auth/client.py +537 -0
- mlflow/server/auth/config.py +34 -0
- mlflow/server/auth/db/__init__.py +0 -0
- mlflow/server/auth/db/cli.py +18 -0
- mlflow/server/auth/db/migrations/__init__.py +0 -0
- mlflow/server/auth/db/migrations/alembic.ini +110 -0
- mlflow/server/auth/db/migrations/env.py +76 -0
- mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
- mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
- mlflow/server/auth/db/models.py +67 -0
- mlflow/server/auth/db/utils.py +37 -0
- mlflow/server/auth/entities.py +165 -0
- mlflow/server/auth/logo.py +14 -0
- mlflow/server/auth/permissions.py +65 -0
- mlflow/server/auth/routes.py +18 -0
- mlflow/server/auth/sqlalchemy_store.py +263 -0
- mlflow/server/graphql/__init__.py +0 -0
- mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
- mlflow/server/graphql/graphql_custom_scalars.py +24 -0
- mlflow/server/graphql/graphql_errors.py +15 -0
- mlflow/server/graphql/graphql_no_batching.py +89 -0
- mlflow/server/graphql/graphql_schema_extensions.py +74 -0
- mlflow/server/handlers.py +3217 -0
- mlflow/server/prometheus_exporter.py +17 -0
- mlflow/server/validation.py +30 -0
- mlflow/shap/__init__.py +691 -0
- mlflow/sklearn/__init__.py +1994 -0
- mlflow/sklearn/utils.py +1041 -0
- mlflow/smolagents/__init__.py +66 -0
- mlflow/smolagents/autolog.py +139 -0
- mlflow/smolagents/chat.py +29 -0
- mlflow/store/__init__.py +10 -0
- mlflow/store/_unity_catalog/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/constants.py +2 -0
- mlflow/store/_unity_catalog/registry/__init__.py +6 -0
- mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
- mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
- mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
- mlflow/store/_unity_catalog/registry/utils.py +121 -0
- mlflow/store/artifact/__init__.py +0 -0
- mlflow/store/artifact/artifact_repo.py +472 -0
- mlflow/store/artifact/artifact_repository_registry.py +154 -0
- mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
- mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
- mlflow/store/artifact/cli.py +141 -0
- mlflow/store/artifact/cloud_artifact_repo.py +332 -0
- mlflow/store/artifact/databricks_artifact_repo.py +729 -0
- mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
- mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
- mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
- mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
- mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
- mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
- mlflow/store/artifact/ftp_artifact_repo.py +132 -0
- mlflow/store/artifact/gcs_artifact_repo.py +296 -0
- mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
- mlflow/store/artifact/http_artifact_repo.py +218 -0
- mlflow/store/artifact/local_artifact_repo.py +142 -0
- mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
- mlflow/store/artifact/models_artifact_repo.py +259 -0
- mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
- mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
- mlflow/store/artifact/r2_artifact_repo.py +70 -0
- mlflow/store/artifact/runs_artifact_repo.py +265 -0
- mlflow/store/artifact/s3_artifact_repo.py +330 -0
- mlflow/store/artifact/sftp_artifact_repo.py +141 -0
- mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
- mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
- mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
- mlflow/store/artifact/utils/__init__.py +0 -0
- mlflow/store/artifact/utils/models.py +148 -0
- mlflow/store/db/__init__.py +0 -0
- mlflow/store/db/base_sql_model.py +3 -0
- mlflow/store/db/db_types.py +10 -0
- mlflow/store/db/utils.py +314 -0
- mlflow/store/db_migrations/__init__.py +0 -0
- mlflow/store/db_migrations/alembic.ini +74 -0
- mlflow/store/db_migrations/env.py +84 -0
- mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
- mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
- mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
- mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
- mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
- mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
- mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
- mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
- mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
- mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
- mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
- mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
- mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
- mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
- mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
- mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
- mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
- mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
- mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
- mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
- mlflow/store/db_migrations/versions/__init__.py +0 -0
- mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
- mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
- mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
- mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
- mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
- mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
- mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
- mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
- mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
- mlflow/store/entities/__init__.py +3 -0
- mlflow/store/entities/paged_list.py +18 -0
- mlflow/store/model_registry/__init__.py +10 -0
- mlflow/store/model_registry/abstract_store.py +1081 -0
- mlflow/store/model_registry/base_rest_store.py +44 -0
- mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
- mlflow/store/model_registry/dbmodels/__init__.py +0 -0
- mlflow/store/model_registry/dbmodels/models.py +206 -0
- mlflow/store/model_registry/file_store.py +1091 -0
- mlflow/store/model_registry/rest_store.py +481 -0
- mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
- mlflow/store/tracking/__init__.py +23 -0
- mlflow/store/tracking/abstract_store.py +816 -0
- mlflow/store/tracking/dbmodels/__init__.py +0 -0
- mlflow/store/tracking/dbmodels/initial_models.py +243 -0
- mlflow/store/tracking/dbmodels/models.py +1073 -0
- mlflow/store/tracking/file_store.py +2438 -0
- mlflow/store/tracking/postgres_managed_identity.py +146 -0
- mlflow/store/tracking/rest_store.py +1131 -0
- mlflow/store/tracking/sqlalchemy_store.py +2785 -0
- mlflow/system_metrics/__init__.py +61 -0
- mlflow/system_metrics/metrics/__init__.py +0 -0
- mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
- mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
- mlflow/system_metrics/metrics/disk_monitor.py +21 -0
- mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
- mlflow/system_metrics/metrics/network_monitor.py +34 -0
- mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
- mlflow/system_metrics/system_metrics_monitor.py +198 -0
- mlflow/tracing/__init__.py +16 -0
- mlflow/tracing/assessment.py +356 -0
- mlflow/tracing/client.py +531 -0
- mlflow/tracing/config.py +125 -0
- mlflow/tracing/constant.py +105 -0
- mlflow/tracing/destination.py +81 -0
- mlflow/tracing/display/__init__.py +40 -0
- mlflow/tracing/display/display_handler.py +196 -0
- mlflow/tracing/export/async_export_queue.py +186 -0
- mlflow/tracing/export/inference_table.py +138 -0
- mlflow/tracing/export/mlflow_v3.py +137 -0
- mlflow/tracing/export/utils.py +70 -0
- mlflow/tracing/fluent.py +1417 -0
- mlflow/tracing/processor/base_mlflow.py +199 -0
- mlflow/tracing/processor/inference_table.py +175 -0
- mlflow/tracing/processor/mlflow_v3.py +47 -0
- mlflow/tracing/processor/otel.py +73 -0
- mlflow/tracing/provider.py +487 -0
- mlflow/tracing/trace_manager.py +200 -0
- mlflow/tracing/utils/__init__.py +616 -0
- mlflow/tracing/utils/artifact_utils.py +28 -0
- mlflow/tracing/utils/copy.py +55 -0
- mlflow/tracing/utils/environment.py +55 -0
- mlflow/tracing/utils/exception.py +21 -0
- mlflow/tracing/utils/once.py +35 -0
- mlflow/tracing/utils/otlp.py +63 -0
- mlflow/tracing/utils/processor.py +54 -0
- mlflow/tracing/utils/search.py +292 -0
- mlflow/tracing/utils/timeout.py +250 -0
- mlflow/tracing/utils/token.py +19 -0
- mlflow/tracing/utils/truncation.py +124 -0
- mlflow/tracing/utils/warning.py +76 -0
- mlflow/tracking/__init__.py +39 -0
- mlflow/tracking/_model_registry/__init__.py +1 -0
- mlflow/tracking/_model_registry/client.py +764 -0
- mlflow/tracking/_model_registry/fluent.py +853 -0
- mlflow/tracking/_model_registry/registry.py +67 -0
- mlflow/tracking/_model_registry/utils.py +251 -0
- mlflow/tracking/_tracking_service/__init__.py +0 -0
- mlflow/tracking/_tracking_service/client.py +883 -0
- mlflow/tracking/_tracking_service/registry.py +56 -0
- mlflow/tracking/_tracking_service/utils.py +275 -0
- mlflow/tracking/artifact_utils.py +179 -0
- mlflow/tracking/client.py +5900 -0
- mlflow/tracking/context/__init__.py +0 -0
- mlflow/tracking/context/abstract_context.py +35 -0
- mlflow/tracking/context/databricks_cluster_context.py +15 -0
- mlflow/tracking/context/databricks_command_context.py +15 -0
- mlflow/tracking/context/databricks_job_context.py +49 -0
- mlflow/tracking/context/databricks_notebook_context.py +41 -0
- mlflow/tracking/context/databricks_repo_context.py +43 -0
- mlflow/tracking/context/default_context.py +51 -0
- mlflow/tracking/context/git_context.py +32 -0
- mlflow/tracking/context/registry.py +98 -0
- mlflow/tracking/context/system_environment_context.py +15 -0
- mlflow/tracking/default_experiment/__init__.py +1 -0
- mlflow/tracking/default_experiment/abstract_context.py +43 -0
- mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
- mlflow/tracking/default_experiment/registry.py +75 -0
- mlflow/tracking/fluent.py +3595 -0
- mlflow/tracking/metric_value_conversion_utils.py +93 -0
- mlflow/tracking/multimedia.py +206 -0
- mlflow/tracking/registry.py +86 -0
- mlflow/tracking/request_auth/__init__.py +0 -0
- mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
- mlflow/tracking/request_auth/registry.py +60 -0
- mlflow/tracking/request_header/__init__.py +0 -0
- mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
- mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
- mlflow/tracking/request_header/default_request_header_provider.py +17 -0
- mlflow/tracking/request_header/registry.py +79 -0
- mlflow/transformers/__init__.py +2982 -0
- mlflow/transformers/flavor_config.py +258 -0
- mlflow/transformers/hub_utils.py +83 -0
- mlflow/transformers/llm_inference_utils.py +468 -0
- mlflow/transformers/model_io.py +301 -0
- mlflow/transformers/peft.py +51 -0
- mlflow/transformers/signature.py +183 -0
- mlflow/transformers/torch_utils.py +55 -0
- mlflow/types/__init__.py +21 -0
- mlflow/types/agent.py +270 -0
- mlflow/types/chat.py +240 -0
- mlflow/types/llm.py +935 -0
- mlflow/types/responses.py +139 -0
- mlflow/types/responses_helpers.py +416 -0
- mlflow/types/schema.py +1505 -0
- mlflow/types/type_hints.py +647 -0
- mlflow/types/utils.py +753 -0
- mlflow/utils/__init__.py +283 -0
- mlflow/utils/_capture_modules.py +256 -0
- mlflow/utils/_capture_transformers_modules.py +75 -0
- mlflow/utils/_spark_utils.py +201 -0
- mlflow/utils/_unity_catalog_oss_utils.py +97 -0
- mlflow/utils/_unity_catalog_utils.py +479 -0
- mlflow/utils/annotations.py +218 -0
- mlflow/utils/arguments_utils.py +16 -0
- mlflow/utils/async_logging/__init__.py +1 -0
- mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
- mlflow/utils/async_logging/async_logging_queue.py +366 -0
- mlflow/utils/async_logging/run_artifact.py +38 -0
- mlflow/utils/async_logging/run_batch.py +58 -0
- mlflow/utils/async_logging/run_operations.py +49 -0
- mlflow/utils/autologging_utils/__init__.py +737 -0
- mlflow/utils/autologging_utils/client.py +432 -0
- mlflow/utils/autologging_utils/config.py +33 -0
- mlflow/utils/autologging_utils/events.py +294 -0
- mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
- mlflow/utils/autologging_utils/metrics_queue.py +71 -0
- mlflow/utils/autologging_utils/safety.py +1104 -0
- mlflow/utils/autologging_utils/versioning.py +95 -0
- mlflow/utils/checkpoint_utils.py +206 -0
- mlflow/utils/class_utils.py +6 -0
- mlflow/utils/cli_args.py +257 -0
- mlflow/utils/conda.py +354 -0
- mlflow/utils/credentials.py +231 -0
- mlflow/utils/data_utils.py +17 -0
- mlflow/utils/databricks_utils.py +1436 -0
- mlflow/utils/docstring_utils.py +477 -0
- mlflow/utils/doctor.py +133 -0
- mlflow/utils/download_cloud_file_chunk.py +43 -0
- mlflow/utils/env_manager.py +16 -0
- mlflow/utils/env_pack.py +131 -0
- mlflow/utils/environment.py +1009 -0
- mlflow/utils/exception_utils.py +14 -0
- mlflow/utils/file_utils.py +978 -0
- mlflow/utils/git_utils.py +77 -0
- mlflow/utils/gorilla.py +797 -0
- mlflow/utils/import_hooks/__init__.py +363 -0
- mlflow/utils/lazy_load.py +51 -0
- mlflow/utils/logging_utils.py +168 -0
- mlflow/utils/mime_type_utils.py +58 -0
- mlflow/utils/mlflow_tags.py +103 -0
- mlflow/utils/model_utils.py +486 -0
- mlflow/utils/name_utils.py +346 -0
- mlflow/utils/nfs_on_spark.py +62 -0
- mlflow/utils/openai_utils.py +164 -0
- mlflow/utils/os.py +12 -0
- mlflow/utils/oss_registry_utils.py +29 -0
- mlflow/utils/plugins.py +17 -0
- mlflow/utils/process.py +182 -0
- mlflow/utils/promptlab_utils.py +146 -0
- mlflow/utils/proto_json_utils.py +743 -0
- mlflow/utils/pydantic_utils.py +54 -0
- mlflow/utils/request_utils.py +279 -0
- mlflow/utils/requirements_utils.py +704 -0
- mlflow/utils/rest_utils.py +673 -0
- mlflow/utils/search_logged_model_utils.py +127 -0
- mlflow/utils/search_utils.py +2111 -0
- mlflow/utils/secure_loading.py +221 -0
- mlflow/utils/security_validation.py +384 -0
- mlflow/utils/server_cli_utils.py +61 -0
- mlflow/utils/spark_utils.py +15 -0
- mlflow/utils/string_utils.py +138 -0
- mlflow/utils/thread_utils.py +63 -0
- mlflow/utils/time.py +54 -0
- mlflow/utils/timeout.py +42 -0
- mlflow/utils/uri.py +572 -0
- mlflow/utils/validation.py +662 -0
- mlflow/utils/virtualenv.py +458 -0
- mlflow/utils/warnings_utils.py +25 -0
- mlflow/utils/yaml_utils.py +179 -0
- mlflow/version.py +24 -0
@@ -0,0 +1,765 @@
|
|
1
|
+
from dataclasses import asdict
|
2
|
+
from typing import Any, Optional, Union
|
3
|
+
|
4
|
+
import mlflow
|
5
|
+
from mlflow.entities.assessment import Feedback
|
6
|
+
from mlflow.entities.trace import Trace
|
7
|
+
from mlflow.exceptions import MlflowException
|
8
|
+
from mlflow.genai import judges
|
9
|
+
from mlflow.genai.judges.databricks import requires_databricks_agents
|
10
|
+
from mlflow.genai.scorers.base import _SERIALIZATION_VERSION, Scorer, SerializedScorer
|
11
|
+
from mlflow.genai.utils.trace_utils import (
|
12
|
+
extract_retrieval_context_from_trace,
|
13
|
+
parse_inputs_to_str,
|
14
|
+
parse_output_to_str,
|
15
|
+
)
|
16
|
+
from mlflow.utils.annotations import experimental
|
17
|
+
|
18
|
+
GENAI_CONFIG_NAME = "databricks-agent"
|
19
|
+
|
20
|
+
|
21
|
+
class BuiltInScorer(Scorer):
|
22
|
+
"""
|
23
|
+
Base class for built-in scorers that share a common implementation. All built-in scorers should
|
24
|
+
inherit from this class.
|
25
|
+
"""
|
26
|
+
|
27
|
+
name: str
|
28
|
+
required_columns: set[str] = set()
|
29
|
+
|
30
|
+
def model_dump(self, **kwargs) -> dict[str, Any]:
|
31
|
+
"""Override model_dump to handle builtin scorer serialization."""
|
32
|
+
# Use mode='json' to automatically convert sets to lists for JSON compatibility
|
33
|
+
from pydantic import BaseModel
|
34
|
+
|
35
|
+
pydantic_model_data = BaseModel.model_dump(self, mode="json", **kwargs)
|
36
|
+
|
37
|
+
# Create serialized scorer with core fields
|
38
|
+
serialized = SerializedScorer(
|
39
|
+
name=self.name,
|
40
|
+
aggregations=self.aggregations,
|
41
|
+
mlflow_version=mlflow.__version__,
|
42
|
+
serialization_version=_SERIALIZATION_VERSION,
|
43
|
+
builtin_scorer_class=self.__class__.__name__,
|
44
|
+
builtin_scorer_pydantic_data=pydantic_model_data,
|
45
|
+
)
|
46
|
+
|
47
|
+
return asdict(serialized)
|
48
|
+
|
49
|
+
@classmethod
|
50
|
+
def model_validate(cls, obj) -> "BuiltInScorer":
|
51
|
+
"""Override model_validate to handle builtin scorer deserialization."""
|
52
|
+
if not isinstance(obj, dict) or "builtin_scorer_class" not in obj:
|
53
|
+
raise MlflowException.invalid_parameter_value(
|
54
|
+
f"Invalid builtin scorer data: expected a dictionary with 'builtin_scorer_class'"
|
55
|
+
f" field, got {type(obj).__name__}."
|
56
|
+
)
|
57
|
+
|
58
|
+
from mlflow.genai.scorers import builtin_scorers
|
59
|
+
from mlflow.genai.scorers.base import SerializedScorer
|
60
|
+
|
61
|
+
# Parse the serialized data using our dataclass
|
62
|
+
try:
|
63
|
+
serialized = SerializedScorer(**obj)
|
64
|
+
except Exception as e:
|
65
|
+
raise MlflowException.invalid_parameter_value(
|
66
|
+
f"Failed to parse serialized scorer data: {e}"
|
67
|
+
)
|
68
|
+
|
69
|
+
try:
|
70
|
+
scorer_class = getattr(builtin_scorers, serialized.builtin_scorer_class)
|
71
|
+
except AttributeError:
|
72
|
+
raise MlflowException.invalid_parameter_value(
|
73
|
+
f"Unknown builtin scorer class: {serialized.builtin_scorer_class}"
|
74
|
+
)
|
75
|
+
|
76
|
+
# Use the builtin_scorer_pydantic_data directly to reconstruct the scorer
|
77
|
+
constructor_args = serialized.builtin_scorer_pydantic_data or {}
|
78
|
+
|
79
|
+
return scorer_class(**constructor_args)
|
80
|
+
|
81
|
+
def validate_columns(self, columns: set[str]) -> None:
|
82
|
+
missing_columns = self.required_columns - columns
|
83
|
+
if missing_columns:
|
84
|
+
raise MissingColumnsException(self.name, missing_columns)
|
85
|
+
|
86
|
+
|
87
|
+
# === Builtin Scorers ===
|
88
|
+
@experimental(version="3.0.0")
|
89
|
+
class RetrievalRelevance(BuiltInScorer):
|
90
|
+
"""
|
91
|
+
Retrieval relevance measures whether each chunk is relevant to the input request.
|
92
|
+
|
93
|
+
You can invoke the scorer directly with a single input for testing, or pass it to
|
94
|
+
`mlflow.genai.evaluate` for running full evaluation on a dataset.
|
95
|
+
|
96
|
+
Example (direct usage):
|
97
|
+
|
98
|
+
.. code-block:: python
|
99
|
+
|
100
|
+
import mlflow
|
101
|
+
from mlflow.genai.scorers import RetrievalRelevance
|
102
|
+
|
103
|
+
trace = mlflow.get_trace("<your-trace-id>")
|
104
|
+
feedbacks = RetrievalRelevance(name="my_retrieval_relevance")(trace=trace)
|
105
|
+
print(feedbacks)
|
106
|
+
|
107
|
+
Example (with evaluate):
|
108
|
+
|
109
|
+
.. code-block:: python
|
110
|
+
|
111
|
+
import mlflow
|
112
|
+
|
113
|
+
data = mlflow.search_traces(...)
|
114
|
+
result = mlflow.genai.evaluate(data=data, scorers=[RetrievalRelevance()])
|
115
|
+
"""
|
116
|
+
|
117
|
+
name: str = "retrieval_relevance"
|
118
|
+
required_columns: set[str] = {"inputs", "trace"}
|
119
|
+
|
120
|
+
def __call__(self, *, trace: Trace) -> Feedback:
|
121
|
+
"""
|
122
|
+
Evaluate chunk relevance for each context chunk.
|
123
|
+
|
124
|
+
Args:
|
125
|
+
trace: The trace of the model's execution. Must contains at least one span with
|
126
|
+
type `RETRIEVER`. MLflow will extract the retrieved context from that span.
|
127
|
+
If multiple spans are found, MLflow will use the **last** one.
|
128
|
+
|
129
|
+
Returns:
|
130
|
+
A list of assessments evaluating the relevance of each context chunk.
|
131
|
+
If the number of retrievers is N and each retriever has M chunks, the list will
|
132
|
+
contain N * (M + 1) assessments. Each retriever span will emit M assessments
|
133
|
+
for the relevance of its chunks and 1 assessment for the average relevance of all
|
134
|
+
chunks.
|
135
|
+
"""
|
136
|
+
request = parse_inputs_to_str(trace.data.spans[0].inputs)
|
137
|
+
span_id_to_context = extract_retrieval_context_from_trace(trace)
|
138
|
+
|
139
|
+
feedbacks = []
|
140
|
+
for span_id, context in span_id_to_context.items():
|
141
|
+
feedbacks.extend(self._compute_span_relevance(span_id, request, context))
|
142
|
+
return feedbacks
|
143
|
+
|
144
|
+
@requires_databricks_agents
|
145
|
+
def _compute_span_relevance(
|
146
|
+
self, span_id: str, request: str, chunks: dict[str, str]
|
147
|
+
) -> list[Feedback]:
|
148
|
+
"""Compute the relevance of retrieved context for one retriever span."""
|
149
|
+
from databricks.agents.evals.judges import chunk_relevance
|
150
|
+
|
151
|
+
# Compute relevance for each chunk. Call `chunk_relevance` judge directly
|
152
|
+
# to get a list of feedbacks with ids.
|
153
|
+
chunk_feedbacks = chunk_relevance(
|
154
|
+
request=request, retrieved_context=chunks, assessment_name=self.name
|
155
|
+
)
|
156
|
+
for feedback in chunk_feedbacks:
|
157
|
+
feedback.span_id = span_id
|
158
|
+
|
159
|
+
if len(chunk_feedbacks) == 0:
|
160
|
+
return []
|
161
|
+
|
162
|
+
# Compute average relevance across all chunks.
|
163
|
+
# NB: Handling error feedback as 0.0 relevance for simplicity.
|
164
|
+
average = sum(f.value == "yes" for f in chunk_feedbacks) / len(chunk_feedbacks)
|
165
|
+
|
166
|
+
span_level_feedback = Feedback(
|
167
|
+
# NB: Adding a special suffix for span-level aggregation so that UI can distinguish
|
168
|
+
# it from the chunk-level score and render it on span correctly.
|
169
|
+
name=self.name + "/precision",
|
170
|
+
value=average,
|
171
|
+
source=chunk_feedbacks[0].source,
|
172
|
+
span_id=span_id,
|
173
|
+
)
|
174
|
+
return [span_level_feedback] + chunk_feedbacks
|
175
|
+
|
176
|
+
|
177
|
+
@experimental(version="3.0.0")
|
178
|
+
class RetrievalSufficiency(BuiltInScorer):
|
179
|
+
"""
|
180
|
+
Retrieval sufficiency evaluates whether the retrieved documents provide all necessary
|
181
|
+
information to generate the expected response.
|
182
|
+
|
183
|
+
You can invoke the scorer directly with a single input for testing, or pass it to
|
184
|
+
`mlflow.genai.evaluate` for running full evaluation on a dataset.
|
185
|
+
|
186
|
+
Example (direct usage):
|
187
|
+
|
188
|
+
.. code-block:: python
|
189
|
+
|
190
|
+
import mlflow
|
191
|
+
from mlflow.genai.scorers import RetrievalSufficiency
|
192
|
+
|
193
|
+
trace = mlflow.get_trace("<your-trace-id>")
|
194
|
+
feedback = RetrievalSufficiency(name="my_retrieval_sufficiency")(trace=trace)
|
195
|
+
print(feedback)
|
196
|
+
|
197
|
+
Example (with evaluate):
|
198
|
+
|
199
|
+
.. code-block:: python
|
200
|
+
|
201
|
+
import mlflow
|
202
|
+
|
203
|
+
data = mlflow.search_traces(...)
|
204
|
+
result = mlflow.genai.evaluate(data=data, scorers=[RetrievalSufficiency()])
|
205
|
+
"""
|
206
|
+
|
207
|
+
name: str = "retrieval_sufficiency"
|
208
|
+
required_columns: set[str] = {"inputs", "trace"}
|
209
|
+
|
210
|
+
def validate_columns(self, columns: set[str]) -> None:
|
211
|
+
super().validate_columns(columns)
|
212
|
+
if (
|
213
|
+
"expectations/expected_response" not in columns
|
214
|
+
and "expectations/expected_facts" not in columns
|
215
|
+
):
|
216
|
+
raise MissingColumnsException(
|
217
|
+
self.name, ["expectations/expected_response or expectations/expected_facts"]
|
218
|
+
)
|
219
|
+
|
220
|
+
def __call__(
|
221
|
+
self, *, trace: Trace, expectations: Optional[dict[str, Any]] = None
|
222
|
+
) -> list[Feedback]:
|
223
|
+
"""
|
224
|
+
Evaluate context sufficiency based on retrieved documents.
|
225
|
+
|
226
|
+
Args:
|
227
|
+
trace: The trace of the model's execution. Must contains at least one span with
|
228
|
+
type `RETRIEVER`. MLflow will extract the retrieved context from that span.
|
229
|
+
If multiple spans are found, MLflow will use the **last** one.
|
230
|
+
expectations: A dictionary of expectations for the response. Either `expected_facts` or
|
231
|
+
`expected_response` key is required. Alternatively, you can pass a trace annotated
|
232
|
+
with `expected_facts` or `expected_response` label(s) and omit this argument.
|
233
|
+
"""
|
234
|
+
request = parse_inputs_to_str(trace.data.spans[0].inputs)
|
235
|
+
span_id_to_context = extract_retrieval_context_from_trace(trace)
|
236
|
+
|
237
|
+
# If expectations are explicitly provided, use them.
|
238
|
+
expectations = expectations or {}
|
239
|
+
expected_facts = expectations.get("expected_facts")
|
240
|
+
expected_response = expectations.get("expected_response")
|
241
|
+
|
242
|
+
# As a fallback, use the trace annotations as expectations.
|
243
|
+
if expected_facts is None or expected_response is None:
|
244
|
+
for assessment in trace.info.assessments:
|
245
|
+
if assessment.name == "expected_facts" and expected_facts is None:
|
246
|
+
expected_facts = assessment.value
|
247
|
+
if assessment.name == "expected_response" and expected_response is None:
|
248
|
+
expected_response = assessment.value
|
249
|
+
|
250
|
+
# This scorer returns a list of feedbacks, one for retriever span in the trace.
|
251
|
+
feedbacks = []
|
252
|
+
for span_id, context in span_id_to_context.items():
|
253
|
+
feedback = judges.is_context_sufficient(
|
254
|
+
request=request,
|
255
|
+
context=context,
|
256
|
+
expected_response=expected_response,
|
257
|
+
expected_facts=expected_facts,
|
258
|
+
name=self.name,
|
259
|
+
)
|
260
|
+
feedback.span_id = span_id
|
261
|
+
feedbacks.append(feedback)
|
262
|
+
|
263
|
+
return feedbacks
|
264
|
+
|
265
|
+
|
266
|
+
@experimental(version="3.0.0")
|
267
|
+
class RetrievalGroundedness(BuiltInScorer):
|
268
|
+
"""
|
269
|
+
RetrievalGroundedness assesses whether the agent's response is aligned with the information
|
270
|
+
provided in the retrieved context.
|
271
|
+
|
272
|
+
You can invoke the scorer directly with a single input for testing, or pass it to
|
273
|
+
`mlflow.genai.evaluate` for running full evaluation on a dataset.
|
274
|
+
|
275
|
+
Example (direct usage):
|
276
|
+
|
277
|
+
.. code-block:: python
|
278
|
+
|
279
|
+
import mlflow
|
280
|
+
from mlflow.genai.scorers import RetrievalGroundedness
|
281
|
+
|
282
|
+
trace = mlflow.get_trace("<your-trace-id>")
|
283
|
+
feedback = RetrievalGroundedness(name="my_retrieval_groundedness")(trace=trace)
|
284
|
+
print(feedback)
|
285
|
+
|
286
|
+
Example (with evaluate):
|
287
|
+
|
288
|
+
.. code-block:: python
|
289
|
+
|
290
|
+
import mlflow
|
291
|
+
|
292
|
+
data = mlflow.search_traces(...)
|
293
|
+
result = mlflow.genai.evaluate(data=data, scorers=[RetrievalGroundedness()])
|
294
|
+
"""
|
295
|
+
|
296
|
+
name: str = "retrieval_groundedness"
|
297
|
+
required_columns: set[str] = {"inputs", "trace"}
|
298
|
+
|
299
|
+
def __call__(self, *, trace: Trace) -> list[Feedback]:
|
300
|
+
"""
|
301
|
+
Evaluate groundedness of response against retrieved context.
|
302
|
+
|
303
|
+
Args:
|
304
|
+
trace: The trace of the model's execution. Must contains at least one span with
|
305
|
+
type `RETRIEVER`. MLflow will extract the retrieved context from that span.
|
306
|
+
If multiple spans are found, MLflow will use the **last** one.
|
307
|
+
|
308
|
+
Returns:
|
309
|
+
An :py:class:`mlflow.entities.assessment.Feedback~` object with a boolean value
|
310
|
+
indicating the groundedness of the response.
|
311
|
+
"""
|
312
|
+
request = parse_inputs_to_str(trace.data.spans[0].inputs)
|
313
|
+
response = parse_output_to_str(trace.data.spans[0].outputs)
|
314
|
+
span_id_to_context = extract_retrieval_context_from_trace(trace)
|
315
|
+
feedbacks = []
|
316
|
+
for span_id, context in span_id_to_context.items():
|
317
|
+
feedback = judges.is_grounded(
|
318
|
+
request=request, response=response, context=context, name=self.name
|
319
|
+
)
|
320
|
+
feedback.span_id = span_id
|
321
|
+
feedbacks.append(feedback)
|
322
|
+
return feedbacks
|
323
|
+
|
324
|
+
|
325
|
+
@experimental(version="3.0.0")
|
326
|
+
class Guidelines(BuiltInScorer):
|
327
|
+
"""
|
328
|
+
Guideline adherence evaluates whether the agent's response follows specific constraints
|
329
|
+
or instructions provided in the guidelines.
|
330
|
+
|
331
|
+
You can invoke the scorer directly with a single input for testing, or pass it to
|
332
|
+
`mlflow.genai.evaluate` for running full evaluation on a dataset.
|
333
|
+
|
334
|
+
If you want to evaluate all the response with a single set of guidelines, you can specify
|
335
|
+
the guidelines in the `guidelines` parameter of this scorer.
|
336
|
+
|
337
|
+
Example (direct usage):
|
338
|
+
|
339
|
+
.. code-block:: python
|
340
|
+
|
341
|
+
import mlflow
|
342
|
+
from mlflow.genai.scorers import Guidelines
|
343
|
+
|
344
|
+
# Create a global judge
|
345
|
+
english = Guidelines(
|
346
|
+
name="english_guidelines",
|
347
|
+
guidelines=["The response must be in English"],
|
348
|
+
)
|
349
|
+
feedback = english(
|
350
|
+
inputs={"question": "What is the capital of France?"},
|
351
|
+
outputs="The capital of France is Paris.",
|
352
|
+
)
|
353
|
+
print(feedback)
|
354
|
+
|
355
|
+
Example (with evaluate):
|
356
|
+
|
357
|
+
In the following example, the guidelines specified in the `english` and `clarify` scorers
|
358
|
+
will be uniformly applied to all the examples in the dataset. The evaluation result will
|
359
|
+
contains two scores "english" and "clarify".
|
360
|
+
|
361
|
+
.. code-block:: python
|
362
|
+
|
363
|
+
import mlflow
|
364
|
+
from mlflow.genai.scorers import Guidelines
|
365
|
+
|
366
|
+
english = Guidelines(
|
367
|
+
name="english",
|
368
|
+
guidelines=["The response must be in English"],
|
369
|
+
)
|
370
|
+
clarify = Guidelines(
|
371
|
+
name="clarify",
|
372
|
+
guidelines=["The response must be clear, coherent, and concise"],
|
373
|
+
)
|
374
|
+
|
375
|
+
data = [
|
376
|
+
{
|
377
|
+
"inputs": {"question": "What is the capital of France?"},
|
378
|
+
"outputs": "The capital of France is Paris.",
|
379
|
+
},
|
380
|
+
{
|
381
|
+
"inputs": {"question": "What is the capital of Germany?"},
|
382
|
+
"outputs": "The capital of Germany is Berlin.",
|
383
|
+
},
|
384
|
+
]
|
385
|
+
mlflow.genai.evaluate(data=data, scorers=[english, clarify])
|
386
|
+
"""
|
387
|
+
|
388
|
+
name: str = "guidelines"
|
389
|
+
guidelines: Union[str, list[str]]
|
390
|
+
required_columns: set[str] = {"inputs", "outputs"}
|
391
|
+
|
392
|
+
def __call__(
|
393
|
+
self,
|
394
|
+
*,
|
395
|
+
inputs: dict[str, Any],
|
396
|
+
outputs: Any,
|
397
|
+
) -> Feedback:
|
398
|
+
"""
|
399
|
+
Evaluate adherence to specified guidelines.
|
400
|
+
|
401
|
+
Args:
|
402
|
+
inputs: A dictionary of input data, e.g. {"question": "What is the capital of France?"}.
|
403
|
+
outputs: The response from the model, e.g. "The capital of France is Paris."
|
404
|
+
|
405
|
+
Returns:
|
406
|
+
An :py:class:`mlflow.entities.assessment.Feedback~` object with a boolean value
|
407
|
+
indicating the adherence to the specified guidelines.
|
408
|
+
"""
|
409
|
+
return judges.meets_guidelines(
|
410
|
+
guidelines=self.guidelines,
|
411
|
+
context={
|
412
|
+
"request": parse_inputs_to_str(inputs),
|
413
|
+
"response": parse_output_to_str(outputs),
|
414
|
+
},
|
415
|
+
name=self.name,
|
416
|
+
)
|
417
|
+
|
418
|
+
|
419
|
+
@experimental(version="3.0.0")
|
420
|
+
class ExpectationsGuidelines(BuiltInScorer):
|
421
|
+
"""
|
422
|
+
This scorer evaluates whether the agent's response follows specific constraints
|
423
|
+
or instructions provided for each row in the input dataset. This scorer is useful when
|
424
|
+
you have a different set of guidelines for each example.
|
425
|
+
|
426
|
+
To use this scorer, the input dataset should contain the `expectations` column with the
|
427
|
+
`guidelines` field. Then pass this scorer to `mlflow.genai.evaluate` for running full
|
428
|
+
evaluation on the input dataset.
|
429
|
+
|
430
|
+
Example:
|
431
|
+
|
432
|
+
In this example, the guidelines specified in the `guidelines` field of the `expectations`
|
433
|
+
column will be applied to each example individually. The evaluation result will contain a
|
434
|
+
single "expectations_guidelines" score.
|
435
|
+
|
436
|
+
.. code-block:: python
|
437
|
+
|
438
|
+
import mlflow
|
439
|
+
from mlflow.genai.scorers import ExpectationsGuidelines
|
440
|
+
|
441
|
+
data = [
|
442
|
+
{
|
443
|
+
"inputs": {"question": "What is the capital of France?"},
|
444
|
+
"outputs": "The capital of France is Paris.",
|
445
|
+
"expectations": {
|
446
|
+
"guidelines": ["The response must be factual and concise"],
|
447
|
+
},
|
448
|
+
},
|
449
|
+
{
|
450
|
+
"inputs": {"question": "How to learn Python?"},
|
451
|
+
"outputs": "You can read a book or take a course.",
|
452
|
+
"expectations": {
|
453
|
+
"guidelines": ["The response must be helpful and encouraging"],
|
454
|
+
},
|
455
|
+
},
|
456
|
+
]
|
457
|
+
mlflow.genai.evaluate(data=data, scorers=[ExpectationsGuidelines()])
|
458
|
+
"""
|
459
|
+
|
460
|
+
name: str = "expectations_guidelines"
|
461
|
+
required_columns: set[str] = {"inputs", "outputs"}
|
462
|
+
|
463
|
+
def validate_columns(self, columns: set[str]) -> None:
|
464
|
+
super().validate_columns(columns)
|
465
|
+
if "expectations/guidelines" not in columns:
|
466
|
+
raise MissingColumnsException(self.name, ["expectations/guidelines"])
|
467
|
+
|
468
|
+
def __call__(
|
469
|
+
self,
|
470
|
+
*,
|
471
|
+
inputs: dict[str, Any],
|
472
|
+
outputs: Any,
|
473
|
+
expectations: Optional[dict[str, Any]] = None,
|
474
|
+
) -> Feedback:
|
475
|
+
"""
|
476
|
+
Evaluate adherence to specified guidelines.
|
477
|
+
|
478
|
+
Args:
|
479
|
+
inputs: A dictionary of input data, e.g. {"question": "What is the capital of France?"}.
|
480
|
+
outputs: The response from the model, e.g. "The capital of France is Paris."
|
481
|
+
expectations: A dictionary of expectations for the response. This must contain either
|
482
|
+
`guidelines` key, which is used to evaluate the response against the guidelines
|
483
|
+
specified in the `guidelines` field of the `expectations` column of the dataset.
|
484
|
+
E.g., {"guidelines": ["The response must be factual and concise"]}
|
485
|
+
|
486
|
+
Returns:
|
487
|
+
An :py:class:`mlflow.entities.assessment.Feedback~` object with a boolean value
|
488
|
+
indicating the adherence to the specified guidelines.
|
489
|
+
"""
|
490
|
+
guidelines = (expectations or {}).get("guidelines")
|
491
|
+
if not guidelines:
|
492
|
+
raise MlflowException(
|
493
|
+
"Guidelines must be specified in the `expectations` parameter or "
|
494
|
+
"must be present in the trace."
|
495
|
+
)
|
496
|
+
|
497
|
+
return judges.meets_guidelines(
|
498
|
+
guidelines=guidelines,
|
499
|
+
context={
|
500
|
+
"request": parse_inputs_to_str(inputs),
|
501
|
+
"response": parse_output_to_str(outputs),
|
502
|
+
},
|
503
|
+
name=self.name,
|
504
|
+
)
|
505
|
+
|
506
|
+
|
507
|
+
@experimental(version="3.0.0")
|
508
|
+
class RelevanceToQuery(BuiltInScorer):
|
509
|
+
"""
|
510
|
+
Relevance ensures that the agent's response directly addresses the user's input without
|
511
|
+
deviating into unrelated topics.
|
512
|
+
|
513
|
+
You can invoke the scorer directly with a single input for testing, or pass it to
|
514
|
+
`mlflow.genai.evaluate` for running full evaluation on a dataset.
|
515
|
+
|
516
|
+
Example (direct usage):
|
517
|
+
|
518
|
+
.. code-block:: python
|
519
|
+
|
520
|
+
import mlflow
|
521
|
+
from mlflow.genai.scorers import RelevanceToQuery
|
522
|
+
|
523
|
+
assessment = RelevanceToQuery(name="my_relevance_to_query")(
|
524
|
+
inputs={"question": "What is the capital of France?"},
|
525
|
+
outputs="The capital of France is Paris.",
|
526
|
+
)
|
527
|
+
print(assessment)
|
528
|
+
|
529
|
+
Example (with evaluate):
|
530
|
+
|
531
|
+
.. code-block:: python
|
532
|
+
|
533
|
+
import mlflow
|
534
|
+
from mlflow.genai.scorers import RelevanceToQuery
|
535
|
+
|
536
|
+
data = [
|
537
|
+
{
|
538
|
+
"inputs": {"question": "What is the capital of France?"},
|
539
|
+
"outputs": "The capital of France is Paris.",
|
540
|
+
}
|
541
|
+
]
|
542
|
+
result = mlflow.genai.evaluate(data=data, scorers=[RelevanceToQuery()])
|
543
|
+
"""
|
544
|
+
|
545
|
+
name: str = "relevance_to_query"
|
546
|
+
required_columns: set[str] = {"inputs", "outputs"}
|
547
|
+
|
548
|
+
def __call__(self, *, inputs: dict[str, Any], outputs: Any) -> Feedback:
|
549
|
+
"""
|
550
|
+
Evaluate relevance to the user's query.
|
551
|
+
|
552
|
+
Args:
|
553
|
+
inputs: A dictionary of input data, e.g. {"question": "What is the capital of France?"}.
|
554
|
+
outputs: The response from the model, e.g. "The capital of France is Paris."
|
555
|
+
|
556
|
+
Returns:
|
557
|
+
An :py:class:`mlflow.entities.assessment.Feedback~` object with a boolean value
|
558
|
+
indicating the relevance of the response to the query.
|
559
|
+
"""
|
560
|
+
request = parse_inputs_to_str(inputs)
|
561
|
+
# NB: Reuse is_context_relevant judge to evaluate response
|
562
|
+
return judges.is_context_relevant(request=request, context=outputs, name=self.name)
|
563
|
+
|
564
|
+
|
565
|
+
@experimental(version="3.0.0")
|
566
|
+
class Safety(BuiltInScorer):
|
567
|
+
"""
|
568
|
+
Safety ensures that the agent's responses do not contain harmful, offensive, or toxic content.
|
569
|
+
|
570
|
+
You can invoke the scorer directly with a single input for testing, or pass it to
|
571
|
+
`mlflow.genai.evaluate` for running full evaluation on a dataset.
|
572
|
+
|
573
|
+
Example (direct usage):
|
574
|
+
|
575
|
+
.. code-block:: python
|
576
|
+
|
577
|
+
import mlflow
|
578
|
+
from mlflow.genai.scorers import Safety
|
579
|
+
|
580
|
+
assessment = Safety(name="my_safety")(outputs="The capital of France is Paris.")
|
581
|
+
print(assessment)
|
582
|
+
|
583
|
+
Example (with evaluate):
|
584
|
+
|
585
|
+
.. code-block:: python
|
586
|
+
|
587
|
+
import mlflow
|
588
|
+
from mlflow.genai.scorers import Safety
|
589
|
+
|
590
|
+
data = [
|
591
|
+
{
|
592
|
+
"inputs": {"question": "What is the capital of France?"},
|
593
|
+
"outputs": "The capital of France is Paris.",
|
594
|
+
}
|
595
|
+
]
|
596
|
+
result = mlflow.genai.evaluate(data=data, scorers=[Safety()])
|
597
|
+
"""
|
598
|
+
|
599
|
+
name: str = "safety"
|
600
|
+
required_columns: set[str] = {"inputs", "outputs"}
|
601
|
+
|
602
|
+
def __call__(self, *, outputs: Any) -> Feedback:
|
603
|
+
"""
|
604
|
+
Evaluate safety of the response.
|
605
|
+
|
606
|
+
Args:
|
607
|
+
outputs: The response from the model, e.g. "The capital of France is Paris."
|
608
|
+
|
609
|
+
Returns:
|
610
|
+
An :py:class:`mlflow.entities.assessment.Feedback~` object with a boolean value
|
611
|
+
indicating the safety of the response.
|
612
|
+
"""
|
613
|
+
return judges.is_safe(content=parse_output_to_str(outputs), name=self.name)
|
614
|
+
|
615
|
+
|
616
|
+
@experimental(version="3.0.0")
|
617
|
+
class Correctness(BuiltInScorer):
|
618
|
+
"""
|
619
|
+
Correctness ensures that the agent's responses are correct and accurate.
|
620
|
+
|
621
|
+
You can invoke the scorer directly with a single input for testing, or pass it to
|
622
|
+
`mlflow.genai.evaluate` for running full evaluation on a dataset.
|
623
|
+
|
624
|
+
Example (direct usage):
|
625
|
+
|
626
|
+
.. code-block:: python
|
627
|
+
|
628
|
+
import mlflow
|
629
|
+
from mlflow.genai.scorers import Correctness
|
630
|
+
|
631
|
+
assessment = Correctness(name="my_correctness")(
|
632
|
+
inputs={
|
633
|
+
"question": "What is the difference between reduceByKey and groupByKey in Spark?"
|
634
|
+
},
|
635
|
+
outputs=(
|
636
|
+
"reduceByKey aggregates data before shuffling, whereas groupByKey "
|
637
|
+
"shuffles all data, making reduceByKey more efficient."
|
638
|
+
),
|
639
|
+
expectations=[
|
640
|
+
{"expected_response": "reduceByKey aggregates data before shuffling"},
|
641
|
+
{"expected_response": "groupByKey shuffles all data"},
|
642
|
+
],
|
643
|
+
)
|
644
|
+
print(assessment)
|
645
|
+
|
646
|
+
Example (with evaluate):
|
647
|
+
|
648
|
+
.. code-block:: python
|
649
|
+
|
650
|
+
import mlflow
|
651
|
+
from mlflow.genai.scorers import Correctness
|
652
|
+
|
653
|
+
data = [
|
654
|
+
{
|
655
|
+
"inputs": {
|
656
|
+
"question": (
|
657
|
+
"What is the difference between reduceByKey and groupByKey in Spark?"
|
658
|
+
)
|
659
|
+
},
|
660
|
+
"outputs": (
|
661
|
+
"reduceByKey aggregates data before shuffling, whereas groupByKey "
|
662
|
+
"shuffles all data, making reduceByKey more efficient."
|
663
|
+
),
|
664
|
+
"expectations": [
|
665
|
+
{"expected_response": "reduceByKey aggregates data before shuffling"},
|
666
|
+
{"expected_response": "groupByKey shuffles all data"},
|
667
|
+
],
|
668
|
+
}
|
669
|
+
]
|
670
|
+
result = mlflow.genai.evaluate(data=data, scorers=[Correctness()])
|
671
|
+
"""
|
672
|
+
|
673
|
+
name: str = "correctness"
|
674
|
+
required_columns: set[str] = {"inputs", "outputs"}
|
675
|
+
|
676
|
+
def validate_columns(self, columns: set[str]) -> None:
|
677
|
+
super().validate_columns(columns)
|
678
|
+
if (
|
679
|
+
"expectations/expected_response" not in columns
|
680
|
+
and "expectations/expected_facts" not in columns
|
681
|
+
):
|
682
|
+
raise MissingColumnsException(
|
683
|
+
self.name, ["expectations/expected_response or expectations/expected_facts"]
|
684
|
+
)
|
685
|
+
|
686
|
+
def __call__(
|
687
|
+
self, *, inputs: dict[str, Any], outputs: Any, expectations: dict[str, Any]
|
688
|
+
) -> Feedback:
|
689
|
+
"""
|
690
|
+
Evaluate correctness of the response against expectations.
|
691
|
+
|
692
|
+
Args:
|
693
|
+
inputs: A dictionary of input data, e.g. {"question": "What is the capital of France?"}.
|
694
|
+
outputs: The response from the model, e.g. "The capital of France is Paris."
|
695
|
+
expectations: A dictionary of expectations for the response. This must contain either
|
696
|
+
`expected_response` or `expected_facts` key, which is used to evaluate the response
|
697
|
+
against the expected response or facts respectively.
|
698
|
+
E.g., {"expected_facts": ["Paris", "France", "Capital"]}
|
699
|
+
|
700
|
+
Returns:
|
701
|
+
An :py:class:`mlflow.entities.assessment.Feedback~` object with a boolean value
|
702
|
+
indicating the correctness of the response.
|
703
|
+
"""
|
704
|
+
request = parse_inputs_to_str(inputs)
|
705
|
+
response = parse_output_to_str(outputs)
|
706
|
+
expected_facts = expectations.get("expected_facts")
|
707
|
+
expected_response = expectations.get("expected_response")
|
708
|
+
|
709
|
+
if expected_response is None and expected_facts is None:
|
710
|
+
raise MlflowException(
|
711
|
+
"Correctness scorer requires either `expected_response` or `expected_facts` "
|
712
|
+
"in the `expectations` dictionary."
|
713
|
+
)
|
714
|
+
|
715
|
+
return judges.is_correct(
|
716
|
+
request=request,
|
717
|
+
response=response,
|
718
|
+
expected_response=expected_response,
|
719
|
+
expected_facts=expected_facts,
|
720
|
+
name=self.name,
|
721
|
+
)
|
722
|
+
|
723
|
+
|
724
|
+
# === Shorthand for getting preset of builtin scorers ===
|
725
|
+
@experimental(version="3.0.0")
|
726
|
+
def get_all_scorers() -> list[BuiltInScorer]:
|
727
|
+
"""
|
728
|
+
Returns a list of all built-in scorers.
|
729
|
+
|
730
|
+
Example:
|
731
|
+
|
732
|
+
.. code-block:: python
|
733
|
+
|
734
|
+
import mlflow
|
735
|
+
from mlflow.genai.scorers import get_all_scorers
|
736
|
+
|
737
|
+
data = [
|
738
|
+
{
|
739
|
+
"inputs": {"question": "What is the capital of France?"},
|
740
|
+
"outputs": "The capital of France is Paris.",
|
741
|
+
"expectations": [
|
742
|
+
{"expected_response": "Paris is the capital city of France."},
|
743
|
+
],
|
744
|
+
}
|
745
|
+
]
|
746
|
+
result = mlflow.genai.evaluate(data=data, scorers=get_all_scorers())
|
747
|
+
"""
|
748
|
+
return [
|
749
|
+
ExpectationsGuidelines(),
|
750
|
+
Safety(),
|
751
|
+
Correctness(),
|
752
|
+
RelevanceToQuery(),
|
753
|
+
RetrievalRelevance(),
|
754
|
+
RetrievalSufficiency(),
|
755
|
+
RetrievalGroundedness(),
|
756
|
+
]
|
757
|
+
|
758
|
+
|
759
|
+
class MissingColumnsException(MlflowException):
|
760
|
+
def __init__(self, scorer: str, missing_columns: set[str]):
|
761
|
+
self.scorer = scorer
|
762
|
+
self.missing_columns = list(missing_columns)
|
763
|
+
super().__init__(
|
764
|
+
f"The following columns are required for the scorer {scorer}: {missing_columns}"
|
765
|
+
)
|