genesis-flow 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genesis_flow-1.0.0.dist-info/METADATA +822 -0
- genesis_flow-1.0.0.dist-info/RECORD +645 -0
- genesis_flow-1.0.0.dist-info/WHEEL +5 -0
- genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
- genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
- genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
- mlflow/__init__.py +367 -0
- mlflow/__main__.py +3 -0
- mlflow/ag2/__init__.py +56 -0
- mlflow/ag2/ag2_logger.py +294 -0
- mlflow/anthropic/__init__.py +40 -0
- mlflow/anthropic/autolog.py +129 -0
- mlflow/anthropic/chat.py +144 -0
- mlflow/artifacts/__init__.py +268 -0
- mlflow/autogen/__init__.py +144 -0
- mlflow/autogen/chat.py +142 -0
- mlflow/azure/__init__.py +26 -0
- mlflow/azure/auth_handler.py +257 -0
- mlflow/azure/client.py +319 -0
- mlflow/azure/config.py +120 -0
- mlflow/azure/connection_factory.py +340 -0
- mlflow/azure/exceptions.py +27 -0
- mlflow/azure/stores.py +327 -0
- mlflow/azure/utils.py +183 -0
- mlflow/bedrock/__init__.py +45 -0
- mlflow/bedrock/_autolog.py +202 -0
- mlflow/bedrock/chat.py +122 -0
- mlflow/bedrock/stream.py +160 -0
- mlflow/bedrock/utils.py +43 -0
- mlflow/cli.py +707 -0
- mlflow/client.py +12 -0
- mlflow/config/__init__.py +56 -0
- mlflow/crewai/__init__.py +79 -0
- mlflow/crewai/autolog.py +253 -0
- mlflow/crewai/chat.py +29 -0
- mlflow/data/__init__.py +75 -0
- mlflow/data/artifact_dataset_sources.py +170 -0
- mlflow/data/code_dataset_source.py +40 -0
- mlflow/data/dataset.py +123 -0
- mlflow/data/dataset_registry.py +168 -0
- mlflow/data/dataset_source.py +110 -0
- mlflow/data/dataset_source_registry.py +219 -0
- mlflow/data/delta_dataset_source.py +167 -0
- mlflow/data/digest_utils.py +108 -0
- mlflow/data/evaluation_dataset.py +562 -0
- mlflow/data/filesystem_dataset_source.py +81 -0
- mlflow/data/http_dataset_source.py +145 -0
- mlflow/data/huggingface_dataset.py +258 -0
- mlflow/data/huggingface_dataset_source.py +118 -0
- mlflow/data/meta_dataset.py +104 -0
- mlflow/data/numpy_dataset.py +223 -0
- mlflow/data/pandas_dataset.py +231 -0
- mlflow/data/polars_dataset.py +352 -0
- mlflow/data/pyfunc_dataset_mixin.py +31 -0
- mlflow/data/schema.py +76 -0
- mlflow/data/sources.py +1 -0
- mlflow/data/spark_dataset.py +406 -0
- mlflow/data/spark_dataset_source.py +74 -0
- mlflow/data/spark_delta_utils.py +118 -0
- mlflow/data/tensorflow_dataset.py +350 -0
- mlflow/data/uc_volume_dataset_source.py +81 -0
- mlflow/db.py +27 -0
- mlflow/dspy/__init__.py +17 -0
- mlflow/dspy/autolog.py +197 -0
- mlflow/dspy/callback.py +398 -0
- mlflow/dspy/constant.py +1 -0
- mlflow/dspy/load.py +93 -0
- mlflow/dspy/save.py +393 -0
- mlflow/dspy/util.py +109 -0
- mlflow/dspy/wrapper.py +226 -0
- mlflow/entities/__init__.py +104 -0
- mlflow/entities/_mlflow_object.py +52 -0
- mlflow/entities/assessment.py +545 -0
- mlflow/entities/assessment_error.py +80 -0
- mlflow/entities/assessment_source.py +141 -0
- mlflow/entities/dataset.py +92 -0
- mlflow/entities/dataset_input.py +51 -0
- mlflow/entities/dataset_summary.py +62 -0
- mlflow/entities/document.py +48 -0
- mlflow/entities/experiment.py +109 -0
- mlflow/entities/experiment_tag.py +35 -0
- mlflow/entities/file_info.py +45 -0
- mlflow/entities/input_tag.py +35 -0
- mlflow/entities/lifecycle_stage.py +35 -0
- mlflow/entities/logged_model.py +228 -0
- mlflow/entities/logged_model_input.py +26 -0
- mlflow/entities/logged_model_output.py +32 -0
- mlflow/entities/logged_model_parameter.py +46 -0
- mlflow/entities/logged_model_status.py +74 -0
- mlflow/entities/logged_model_tag.py +33 -0
- mlflow/entities/metric.py +200 -0
- mlflow/entities/model_registry/__init__.py +29 -0
- mlflow/entities/model_registry/_model_registry_entity.py +13 -0
- mlflow/entities/model_registry/model_version.py +243 -0
- mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
- mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
- mlflow/entities/model_registry/model_version_search.py +25 -0
- mlflow/entities/model_registry/model_version_stages.py +25 -0
- mlflow/entities/model_registry/model_version_status.py +35 -0
- mlflow/entities/model_registry/model_version_tag.py +35 -0
- mlflow/entities/model_registry/prompt.py +73 -0
- mlflow/entities/model_registry/prompt_version.py +244 -0
- mlflow/entities/model_registry/registered_model.py +175 -0
- mlflow/entities/model_registry/registered_model_alias.py +35 -0
- mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
- mlflow/entities/model_registry/registered_model_search.py +25 -0
- mlflow/entities/model_registry/registered_model_tag.py +35 -0
- mlflow/entities/multipart_upload.py +74 -0
- mlflow/entities/param.py +49 -0
- mlflow/entities/run.py +97 -0
- mlflow/entities/run_data.py +84 -0
- mlflow/entities/run_info.py +188 -0
- mlflow/entities/run_inputs.py +59 -0
- mlflow/entities/run_outputs.py +43 -0
- mlflow/entities/run_status.py +41 -0
- mlflow/entities/run_tag.py +36 -0
- mlflow/entities/source_type.py +31 -0
- mlflow/entities/span.py +774 -0
- mlflow/entities/span_event.py +96 -0
- mlflow/entities/span_status.py +102 -0
- mlflow/entities/trace.py +317 -0
- mlflow/entities/trace_data.py +71 -0
- mlflow/entities/trace_info.py +220 -0
- mlflow/entities/trace_info_v2.py +162 -0
- mlflow/entities/trace_location.py +173 -0
- mlflow/entities/trace_state.py +39 -0
- mlflow/entities/trace_status.py +68 -0
- mlflow/entities/view_type.py +51 -0
- mlflow/environment_variables.py +866 -0
- mlflow/evaluation/__init__.py +16 -0
- mlflow/evaluation/assessment.py +369 -0
- mlflow/evaluation/evaluation.py +411 -0
- mlflow/evaluation/evaluation_tag.py +61 -0
- mlflow/evaluation/fluent.py +48 -0
- mlflow/evaluation/utils.py +201 -0
- mlflow/exceptions.py +213 -0
- mlflow/experiments.py +140 -0
- mlflow/gemini/__init__.py +81 -0
- mlflow/gemini/autolog.py +186 -0
- mlflow/gemini/chat.py +261 -0
- mlflow/genai/__init__.py +71 -0
- mlflow/genai/datasets/__init__.py +67 -0
- mlflow/genai/datasets/evaluation_dataset.py +131 -0
- mlflow/genai/evaluation/__init__.py +3 -0
- mlflow/genai/evaluation/base.py +411 -0
- mlflow/genai/evaluation/constant.py +23 -0
- mlflow/genai/evaluation/utils.py +244 -0
- mlflow/genai/judges/__init__.py +21 -0
- mlflow/genai/judges/databricks.py +404 -0
- mlflow/genai/label_schemas/__init__.py +153 -0
- mlflow/genai/label_schemas/label_schemas.py +209 -0
- mlflow/genai/labeling/__init__.py +159 -0
- mlflow/genai/labeling/labeling.py +250 -0
- mlflow/genai/optimize/__init__.py +13 -0
- mlflow/genai/optimize/base.py +198 -0
- mlflow/genai/optimize/optimizers/__init__.py +4 -0
- mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
- mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
- mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
- mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
- mlflow/genai/optimize/types.py +75 -0
- mlflow/genai/optimize/util.py +30 -0
- mlflow/genai/prompts/__init__.py +206 -0
- mlflow/genai/scheduled_scorers.py +431 -0
- mlflow/genai/scorers/__init__.py +26 -0
- mlflow/genai/scorers/base.py +492 -0
- mlflow/genai/scorers/builtin_scorers.py +765 -0
- mlflow/genai/scorers/scorer_utils.py +138 -0
- mlflow/genai/scorers/validation.py +165 -0
- mlflow/genai/utils/data_validation.py +146 -0
- mlflow/genai/utils/enum_utils.py +23 -0
- mlflow/genai/utils/trace_utils.py +211 -0
- mlflow/groq/__init__.py +42 -0
- mlflow/groq/_groq_autolog.py +74 -0
- mlflow/johnsnowlabs/__init__.py +888 -0
- mlflow/langchain/__init__.py +24 -0
- mlflow/langchain/api_request_parallel_processor.py +330 -0
- mlflow/langchain/autolog.py +147 -0
- mlflow/langchain/chat_agent_langgraph.py +340 -0
- mlflow/langchain/constant.py +1 -0
- mlflow/langchain/constants.py +1 -0
- mlflow/langchain/databricks_dependencies.py +444 -0
- mlflow/langchain/langchain_tracer.py +597 -0
- mlflow/langchain/model.py +919 -0
- mlflow/langchain/output_parsers.py +142 -0
- mlflow/langchain/retriever_chain.py +153 -0
- mlflow/langchain/runnables.py +527 -0
- mlflow/langchain/utils/chat.py +402 -0
- mlflow/langchain/utils/logging.py +671 -0
- mlflow/langchain/utils/serialization.py +36 -0
- mlflow/legacy_databricks_cli/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
- mlflow/legacy_databricks_cli/configure/provider.py +482 -0
- mlflow/litellm/__init__.py +175 -0
- mlflow/llama_index/__init__.py +22 -0
- mlflow/llama_index/autolog.py +55 -0
- mlflow/llama_index/chat.py +43 -0
- mlflow/llama_index/constant.py +1 -0
- mlflow/llama_index/model.py +577 -0
- mlflow/llama_index/pyfunc_wrapper.py +332 -0
- mlflow/llama_index/serialize_objects.py +188 -0
- mlflow/llama_index/tracer.py +561 -0
- mlflow/metrics/__init__.py +479 -0
- mlflow/metrics/base.py +39 -0
- mlflow/metrics/genai/__init__.py +25 -0
- mlflow/metrics/genai/base.py +101 -0
- mlflow/metrics/genai/genai_metric.py +771 -0
- mlflow/metrics/genai/metric_definitions.py +450 -0
- mlflow/metrics/genai/model_utils.py +371 -0
- mlflow/metrics/genai/prompt_template.py +68 -0
- mlflow/metrics/genai/prompts/__init__.py +0 -0
- mlflow/metrics/genai/prompts/v1.py +422 -0
- mlflow/metrics/genai/utils.py +6 -0
- mlflow/metrics/metric_definitions.py +619 -0
- mlflow/mismatch.py +34 -0
- mlflow/mistral/__init__.py +34 -0
- mlflow/mistral/autolog.py +71 -0
- mlflow/mistral/chat.py +135 -0
- mlflow/ml_package_versions.py +452 -0
- mlflow/models/__init__.py +97 -0
- mlflow/models/auth_policy.py +83 -0
- mlflow/models/cli.py +354 -0
- mlflow/models/container/__init__.py +294 -0
- mlflow/models/container/scoring_server/__init__.py +0 -0
- mlflow/models/container/scoring_server/nginx.conf +39 -0
- mlflow/models/dependencies_schemas.py +287 -0
- mlflow/models/display_utils.py +158 -0
- mlflow/models/docker_utils.py +211 -0
- mlflow/models/evaluation/__init__.py +23 -0
- mlflow/models/evaluation/_shap_patch.py +64 -0
- mlflow/models/evaluation/artifacts.py +194 -0
- mlflow/models/evaluation/base.py +1811 -0
- mlflow/models/evaluation/calibration_curve.py +109 -0
- mlflow/models/evaluation/default_evaluator.py +996 -0
- mlflow/models/evaluation/deprecated.py +23 -0
- mlflow/models/evaluation/evaluator_registry.py +80 -0
- mlflow/models/evaluation/evaluators/classifier.py +704 -0
- mlflow/models/evaluation/evaluators/default.py +233 -0
- mlflow/models/evaluation/evaluators/regressor.py +96 -0
- mlflow/models/evaluation/evaluators/shap.py +296 -0
- mlflow/models/evaluation/lift_curve.py +178 -0
- mlflow/models/evaluation/utils/metric.py +123 -0
- mlflow/models/evaluation/utils/trace.py +179 -0
- mlflow/models/evaluation/validation.py +434 -0
- mlflow/models/flavor_backend.py +93 -0
- mlflow/models/flavor_backend_registry.py +53 -0
- mlflow/models/model.py +1639 -0
- mlflow/models/model_config.py +150 -0
- mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
- mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
- mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
- mlflow/models/python_api.py +369 -0
- mlflow/models/rag_signatures.py +128 -0
- mlflow/models/resources.py +321 -0
- mlflow/models/signature.py +662 -0
- mlflow/models/utils.py +2054 -0
- mlflow/models/wheeled_model.py +280 -0
- mlflow/openai/__init__.py +57 -0
- mlflow/openai/_agent_tracer.py +364 -0
- mlflow/openai/api_request_parallel_processor.py +131 -0
- mlflow/openai/autolog.py +509 -0
- mlflow/openai/constant.py +1 -0
- mlflow/openai/model.py +824 -0
- mlflow/openai/utils/chat_schema.py +367 -0
- mlflow/optuna/__init__.py +3 -0
- mlflow/optuna/storage.py +646 -0
- mlflow/plugins/__init__.py +72 -0
- mlflow/plugins/base.py +358 -0
- mlflow/plugins/builtin/__init__.py +24 -0
- mlflow/plugins/builtin/pytorch_plugin.py +150 -0
- mlflow/plugins/builtin/sklearn_plugin.py +158 -0
- mlflow/plugins/builtin/transformers_plugin.py +187 -0
- mlflow/plugins/cli.py +321 -0
- mlflow/plugins/discovery.py +340 -0
- mlflow/plugins/manager.py +465 -0
- mlflow/plugins/registry.py +316 -0
- mlflow/plugins/templates/framework_plugin_template.py +329 -0
- mlflow/prompt/constants.py +20 -0
- mlflow/prompt/promptlab_model.py +197 -0
- mlflow/prompt/registry_utils.py +248 -0
- mlflow/promptflow/__init__.py +495 -0
- mlflow/protos/__init__.py +0 -0
- mlflow/protos/assessments_pb2.py +174 -0
- mlflow/protos/databricks_artifacts_pb2.py +489 -0
- mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
- mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
- mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
- mlflow/protos/databricks_pb2.py +267 -0
- mlflow/protos/databricks_trace_server_pb2.py +374 -0
- mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
- mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
- mlflow/protos/facet_feature_statistics_pb2.py +296 -0
- mlflow/protos/internal_pb2.py +77 -0
- mlflow/protos/mlflow_artifacts_pb2.py +336 -0
- mlflow/protos/model_registry_pb2.py +1073 -0
- mlflow/protos/scalapb/__init__.py +0 -0
- mlflow/protos/scalapb/scalapb_pb2.py +104 -0
- mlflow/protos/service_pb2.py +2600 -0
- mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
- mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
- mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
- mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
- mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
- mlflow/py.typed +0 -0
- mlflow/pydantic_ai/__init__.py +57 -0
- mlflow/pydantic_ai/autolog.py +173 -0
- mlflow/pyfunc/__init__.py +3844 -0
- mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
- mlflow/pyfunc/backend.py +523 -0
- mlflow/pyfunc/context.py +78 -0
- mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
- mlflow/pyfunc/loaders/__init__.py +7 -0
- mlflow/pyfunc/loaders/chat_agent.py +117 -0
- mlflow/pyfunc/loaders/chat_model.py +125 -0
- mlflow/pyfunc/loaders/code_model.py +31 -0
- mlflow/pyfunc/loaders/responses_agent.py +112 -0
- mlflow/pyfunc/mlserver.py +46 -0
- mlflow/pyfunc/model.py +1473 -0
- mlflow/pyfunc/scoring_server/__init__.py +604 -0
- mlflow/pyfunc/scoring_server/app.py +7 -0
- mlflow/pyfunc/scoring_server/client.py +146 -0
- mlflow/pyfunc/spark_model_cache.py +48 -0
- mlflow/pyfunc/stdin_server.py +44 -0
- mlflow/pyfunc/utils/__init__.py +3 -0
- mlflow/pyfunc/utils/data_validation.py +224 -0
- mlflow/pyfunc/utils/environment.py +22 -0
- mlflow/pyfunc/utils/input_converter.py +47 -0
- mlflow/pyfunc/utils/serving_data_parser.py +11 -0
- mlflow/pytorch/__init__.py +1171 -0
- mlflow/pytorch/_lightning_autolog.py +580 -0
- mlflow/pytorch/_pytorch_autolog.py +50 -0
- mlflow/pytorch/pickle_module.py +35 -0
- mlflow/rfunc/__init__.py +42 -0
- mlflow/rfunc/backend.py +134 -0
- mlflow/runs.py +89 -0
- mlflow/server/__init__.py +302 -0
- mlflow/server/auth/__init__.py +1224 -0
- mlflow/server/auth/__main__.py +4 -0
- mlflow/server/auth/basic_auth.ini +6 -0
- mlflow/server/auth/cli.py +11 -0
- mlflow/server/auth/client.py +537 -0
- mlflow/server/auth/config.py +34 -0
- mlflow/server/auth/db/__init__.py +0 -0
- mlflow/server/auth/db/cli.py +18 -0
- mlflow/server/auth/db/migrations/__init__.py +0 -0
- mlflow/server/auth/db/migrations/alembic.ini +110 -0
- mlflow/server/auth/db/migrations/env.py +76 -0
- mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
- mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
- mlflow/server/auth/db/models.py +67 -0
- mlflow/server/auth/db/utils.py +37 -0
- mlflow/server/auth/entities.py +165 -0
- mlflow/server/auth/logo.py +14 -0
- mlflow/server/auth/permissions.py +65 -0
- mlflow/server/auth/routes.py +18 -0
- mlflow/server/auth/sqlalchemy_store.py +263 -0
- mlflow/server/graphql/__init__.py +0 -0
- mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
- mlflow/server/graphql/graphql_custom_scalars.py +24 -0
- mlflow/server/graphql/graphql_errors.py +15 -0
- mlflow/server/graphql/graphql_no_batching.py +89 -0
- mlflow/server/graphql/graphql_schema_extensions.py +74 -0
- mlflow/server/handlers.py +3217 -0
- mlflow/server/prometheus_exporter.py +17 -0
- mlflow/server/validation.py +30 -0
- mlflow/shap/__init__.py +691 -0
- mlflow/sklearn/__init__.py +1994 -0
- mlflow/sklearn/utils.py +1041 -0
- mlflow/smolagents/__init__.py +66 -0
- mlflow/smolagents/autolog.py +139 -0
- mlflow/smolagents/chat.py +29 -0
- mlflow/store/__init__.py +10 -0
- mlflow/store/_unity_catalog/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
- mlflow/store/_unity_catalog/lineage/constants.py +2 -0
- mlflow/store/_unity_catalog/registry/__init__.py +6 -0
- mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
- mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
- mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
- mlflow/store/_unity_catalog/registry/utils.py +121 -0
- mlflow/store/artifact/__init__.py +0 -0
- mlflow/store/artifact/artifact_repo.py +472 -0
- mlflow/store/artifact/artifact_repository_registry.py +154 -0
- mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
- mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
- mlflow/store/artifact/cli.py +141 -0
- mlflow/store/artifact/cloud_artifact_repo.py +332 -0
- mlflow/store/artifact/databricks_artifact_repo.py +729 -0
- mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
- mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
- mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
- mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
- mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
- mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
- mlflow/store/artifact/ftp_artifact_repo.py +132 -0
- mlflow/store/artifact/gcs_artifact_repo.py +296 -0
- mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
- mlflow/store/artifact/http_artifact_repo.py +218 -0
- mlflow/store/artifact/local_artifact_repo.py +142 -0
- mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
- mlflow/store/artifact/models_artifact_repo.py +259 -0
- mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
- mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
- mlflow/store/artifact/r2_artifact_repo.py +70 -0
- mlflow/store/artifact/runs_artifact_repo.py +265 -0
- mlflow/store/artifact/s3_artifact_repo.py +330 -0
- mlflow/store/artifact/sftp_artifact_repo.py +141 -0
- mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
- mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
- mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
- mlflow/store/artifact/utils/__init__.py +0 -0
- mlflow/store/artifact/utils/models.py +148 -0
- mlflow/store/db/__init__.py +0 -0
- mlflow/store/db/base_sql_model.py +3 -0
- mlflow/store/db/db_types.py +10 -0
- mlflow/store/db/utils.py +314 -0
- mlflow/store/db_migrations/__init__.py +0 -0
- mlflow/store/db_migrations/alembic.ini +74 -0
- mlflow/store/db_migrations/env.py +84 -0
- mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
- mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
- mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
- mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
- mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
- mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
- mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
- mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
- mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
- mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
- mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
- mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
- mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
- mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
- mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
- mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
- mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
- mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
- mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
- mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
- mlflow/store/db_migrations/versions/__init__.py +0 -0
- mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
- mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
- mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
- mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
- mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
- mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
- mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
- mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
- mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
- mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
- mlflow/store/entities/__init__.py +3 -0
- mlflow/store/entities/paged_list.py +18 -0
- mlflow/store/model_registry/__init__.py +10 -0
- mlflow/store/model_registry/abstract_store.py +1081 -0
- mlflow/store/model_registry/base_rest_store.py +44 -0
- mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
- mlflow/store/model_registry/dbmodels/__init__.py +0 -0
- mlflow/store/model_registry/dbmodels/models.py +206 -0
- mlflow/store/model_registry/file_store.py +1091 -0
- mlflow/store/model_registry/rest_store.py +481 -0
- mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
- mlflow/store/tracking/__init__.py +23 -0
- mlflow/store/tracking/abstract_store.py +816 -0
- mlflow/store/tracking/dbmodels/__init__.py +0 -0
- mlflow/store/tracking/dbmodels/initial_models.py +243 -0
- mlflow/store/tracking/dbmodels/models.py +1073 -0
- mlflow/store/tracking/file_store.py +2438 -0
- mlflow/store/tracking/postgres_managed_identity.py +146 -0
- mlflow/store/tracking/rest_store.py +1131 -0
- mlflow/store/tracking/sqlalchemy_store.py +2785 -0
- mlflow/system_metrics/__init__.py +61 -0
- mlflow/system_metrics/metrics/__init__.py +0 -0
- mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
- mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
- mlflow/system_metrics/metrics/disk_monitor.py +21 -0
- mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
- mlflow/system_metrics/metrics/network_monitor.py +34 -0
- mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
- mlflow/system_metrics/system_metrics_monitor.py +198 -0
- mlflow/tracing/__init__.py +16 -0
- mlflow/tracing/assessment.py +356 -0
- mlflow/tracing/client.py +531 -0
- mlflow/tracing/config.py +125 -0
- mlflow/tracing/constant.py +105 -0
- mlflow/tracing/destination.py +81 -0
- mlflow/tracing/display/__init__.py +40 -0
- mlflow/tracing/display/display_handler.py +196 -0
- mlflow/tracing/export/async_export_queue.py +186 -0
- mlflow/tracing/export/inference_table.py +138 -0
- mlflow/tracing/export/mlflow_v3.py +137 -0
- mlflow/tracing/export/utils.py +70 -0
- mlflow/tracing/fluent.py +1417 -0
- mlflow/tracing/processor/base_mlflow.py +199 -0
- mlflow/tracing/processor/inference_table.py +175 -0
- mlflow/tracing/processor/mlflow_v3.py +47 -0
- mlflow/tracing/processor/otel.py +73 -0
- mlflow/tracing/provider.py +487 -0
- mlflow/tracing/trace_manager.py +200 -0
- mlflow/tracing/utils/__init__.py +616 -0
- mlflow/tracing/utils/artifact_utils.py +28 -0
- mlflow/tracing/utils/copy.py +55 -0
- mlflow/tracing/utils/environment.py +55 -0
- mlflow/tracing/utils/exception.py +21 -0
- mlflow/tracing/utils/once.py +35 -0
- mlflow/tracing/utils/otlp.py +63 -0
- mlflow/tracing/utils/processor.py +54 -0
- mlflow/tracing/utils/search.py +292 -0
- mlflow/tracing/utils/timeout.py +250 -0
- mlflow/tracing/utils/token.py +19 -0
- mlflow/tracing/utils/truncation.py +124 -0
- mlflow/tracing/utils/warning.py +76 -0
- mlflow/tracking/__init__.py +39 -0
- mlflow/tracking/_model_registry/__init__.py +1 -0
- mlflow/tracking/_model_registry/client.py +764 -0
- mlflow/tracking/_model_registry/fluent.py +853 -0
- mlflow/tracking/_model_registry/registry.py +67 -0
- mlflow/tracking/_model_registry/utils.py +251 -0
- mlflow/tracking/_tracking_service/__init__.py +0 -0
- mlflow/tracking/_tracking_service/client.py +883 -0
- mlflow/tracking/_tracking_service/registry.py +56 -0
- mlflow/tracking/_tracking_service/utils.py +275 -0
- mlflow/tracking/artifact_utils.py +179 -0
- mlflow/tracking/client.py +5900 -0
- mlflow/tracking/context/__init__.py +0 -0
- mlflow/tracking/context/abstract_context.py +35 -0
- mlflow/tracking/context/databricks_cluster_context.py +15 -0
- mlflow/tracking/context/databricks_command_context.py +15 -0
- mlflow/tracking/context/databricks_job_context.py +49 -0
- mlflow/tracking/context/databricks_notebook_context.py +41 -0
- mlflow/tracking/context/databricks_repo_context.py +43 -0
- mlflow/tracking/context/default_context.py +51 -0
- mlflow/tracking/context/git_context.py +32 -0
- mlflow/tracking/context/registry.py +98 -0
- mlflow/tracking/context/system_environment_context.py +15 -0
- mlflow/tracking/default_experiment/__init__.py +1 -0
- mlflow/tracking/default_experiment/abstract_context.py +43 -0
- mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
- mlflow/tracking/default_experiment/registry.py +75 -0
- mlflow/tracking/fluent.py +3595 -0
- mlflow/tracking/metric_value_conversion_utils.py +93 -0
- mlflow/tracking/multimedia.py +206 -0
- mlflow/tracking/registry.py +86 -0
- mlflow/tracking/request_auth/__init__.py +0 -0
- mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
- mlflow/tracking/request_auth/registry.py +60 -0
- mlflow/tracking/request_header/__init__.py +0 -0
- mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
- mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
- mlflow/tracking/request_header/default_request_header_provider.py +17 -0
- mlflow/tracking/request_header/registry.py +79 -0
- mlflow/transformers/__init__.py +2982 -0
- mlflow/transformers/flavor_config.py +258 -0
- mlflow/transformers/hub_utils.py +83 -0
- mlflow/transformers/llm_inference_utils.py +468 -0
- mlflow/transformers/model_io.py +301 -0
- mlflow/transformers/peft.py +51 -0
- mlflow/transformers/signature.py +183 -0
- mlflow/transformers/torch_utils.py +55 -0
- mlflow/types/__init__.py +21 -0
- mlflow/types/agent.py +270 -0
- mlflow/types/chat.py +240 -0
- mlflow/types/llm.py +935 -0
- mlflow/types/responses.py +139 -0
- mlflow/types/responses_helpers.py +416 -0
- mlflow/types/schema.py +1505 -0
- mlflow/types/type_hints.py +647 -0
- mlflow/types/utils.py +753 -0
- mlflow/utils/__init__.py +283 -0
- mlflow/utils/_capture_modules.py +256 -0
- mlflow/utils/_capture_transformers_modules.py +75 -0
- mlflow/utils/_spark_utils.py +201 -0
- mlflow/utils/_unity_catalog_oss_utils.py +97 -0
- mlflow/utils/_unity_catalog_utils.py +479 -0
- mlflow/utils/annotations.py +218 -0
- mlflow/utils/arguments_utils.py +16 -0
- mlflow/utils/async_logging/__init__.py +1 -0
- mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
- mlflow/utils/async_logging/async_logging_queue.py +366 -0
- mlflow/utils/async_logging/run_artifact.py +38 -0
- mlflow/utils/async_logging/run_batch.py +58 -0
- mlflow/utils/async_logging/run_operations.py +49 -0
- mlflow/utils/autologging_utils/__init__.py +737 -0
- mlflow/utils/autologging_utils/client.py +432 -0
- mlflow/utils/autologging_utils/config.py +33 -0
- mlflow/utils/autologging_utils/events.py +294 -0
- mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
- mlflow/utils/autologging_utils/metrics_queue.py +71 -0
- mlflow/utils/autologging_utils/safety.py +1104 -0
- mlflow/utils/autologging_utils/versioning.py +95 -0
- mlflow/utils/checkpoint_utils.py +206 -0
- mlflow/utils/class_utils.py +6 -0
- mlflow/utils/cli_args.py +257 -0
- mlflow/utils/conda.py +354 -0
- mlflow/utils/credentials.py +231 -0
- mlflow/utils/data_utils.py +17 -0
- mlflow/utils/databricks_utils.py +1436 -0
- mlflow/utils/docstring_utils.py +477 -0
- mlflow/utils/doctor.py +133 -0
- mlflow/utils/download_cloud_file_chunk.py +43 -0
- mlflow/utils/env_manager.py +16 -0
- mlflow/utils/env_pack.py +131 -0
- mlflow/utils/environment.py +1009 -0
- mlflow/utils/exception_utils.py +14 -0
- mlflow/utils/file_utils.py +978 -0
- mlflow/utils/git_utils.py +77 -0
- mlflow/utils/gorilla.py +797 -0
- mlflow/utils/import_hooks/__init__.py +363 -0
- mlflow/utils/lazy_load.py +51 -0
- mlflow/utils/logging_utils.py +168 -0
- mlflow/utils/mime_type_utils.py +58 -0
- mlflow/utils/mlflow_tags.py +103 -0
- mlflow/utils/model_utils.py +486 -0
- mlflow/utils/name_utils.py +346 -0
- mlflow/utils/nfs_on_spark.py +62 -0
- mlflow/utils/openai_utils.py +164 -0
- mlflow/utils/os.py +12 -0
- mlflow/utils/oss_registry_utils.py +29 -0
- mlflow/utils/plugins.py +17 -0
- mlflow/utils/process.py +182 -0
- mlflow/utils/promptlab_utils.py +146 -0
- mlflow/utils/proto_json_utils.py +743 -0
- mlflow/utils/pydantic_utils.py +54 -0
- mlflow/utils/request_utils.py +279 -0
- mlflow/utils/requirements_utils.py +704 -0
- mlflow/utils/rest_utils.py +673 -0
- mlflow/utils/search_logged_model_utils.py +127 -0
- mlflow/utils/search_utils.py +2111 -0
- mlflow/utils/secure_loading.py +221 -0
- mlflow/utils/security_validation.py +384 -0
- mlflow/utils/server_cli_utils.py +61 -0
- mlflow/utils/spark_utils.py +15 -0
- mlflow/utils/string_utils.py +138 -0
- mlflow/utils/thread_utils.py +63 -0
- mlflow/utils/time.py +54 -0
- mlflow/utils/timeout.py +42 -0
- mlflow/utils/uri.py +572 -0
- mlflow/utils/validation.py +662 -0
- mlflow/utils/virtualenv.py +458 -0
- mlflow/utils/warnings_utils.py +25 -0
- mlflow/utils/yaml_utils.py +179 -0
- mlflow/version.py +24 -0
@@ -0,0 +1,479 @@
|
|
1
|
+
from mlflow.metrics import genai
|
2
|
+
from mlflow.metrics.base import (
|
3
|
+
MetricValue,
|
4
|
+
)
|
5
|
+
from mlflow.metrics.metric_definitions import (
|
6
|
+
_accuracy_eval_fn,
|
7
|
+
_ari_eval_fn,
|
8
|
+
_bleu_eval_fn,
|
9
|
+
_f1_score_eval_fn,
|
10
|
+
_flesch_kincaid_eval_fn,
|
11
|
+
_mae_eval_fn,
|
12
|
+
_mape_eval_fn,
|
13
|
+
_max_error_eval_fn,
|
14
|
+
_mse_eval_fn,
|
15
|
+
_ndcg_at_k_eval_fn,
|
16
|
+
_precision_at_k_eval_fn,
|
17
|
+
_precision_eval_fn,
|
18
|
+
_r2_score_eval_fn,
|
19
|
+
_recall_at_k_eval_fn,
|
20
|
+
_recall_eval_fn,
|
21
|
+
_rmse_eval_fn,
|
22
|
+
_rouge1_eval_fn,
|
23
|
+
_rouge2_eval_fn,
|
24
|
+
_rougeL_eval_fn,
|
25
|
+
_rougeLsum_eval_fn,
|
26
|
+
_token_count_eval_fn,
|
27
|
+
_toxicity_eval_fn,
|
28
|
+
)
|
29
|
+
from mlflow.models import (
|
30
|
+
EvaluationMetric,
|
31
|
+
make_metric,
|
32
|
+
)
|
33
|
+
from mlflow.utils.annotations import experimental
|
34
|
+
|
35
|
+
|
36
|
+
def latency() -> EvaluationMetric:
|
37
|
+
"""
|
38
|
+
This function will create a metric for calculating latency. Latency is determined by the time
|
39
|
+
it takes to generate a prediction for a given input. Note that computing latency requires
|
40
|
+
each row to be predicted sequentially, which will likely slow down the evaluation process.
|
41
|
+
"""
|
42
|
+
return make_metric(
|
43
|
+
eval_fn=lambda x: MetricValue(),
|
44
|
+
greater_is_better=False,
|
45
|
+
name="latency",
|
46
|
+
)
|
47
|
+
|
48
|
+
|
49
|
+
# general text metrics
|
50
|
+
def token_count() -> EvaluationMetric:
|
51
|
+
"""
|
52
|
+
This function will create a metric for calculating token_count. Token count is calculated
|
53
|
+
using tiktoken by using the `cl100k_base` tokenizer.
|
54
|
+
|
55
|
+
Note: For air-gapped environments, you can set the TIKTOKEN_CACHE_DIR environment variable
|
56
|
+
to specify a local cache directory for tiktoken to avoid downloading the tokenizer files.
|
57
|
+
"""
|
58
|
+
return make_metric(
|
59
|
+
eval_fn=_token_count_eval_fn,
|
60
|
+
greater_is_better=True,
|
61
|
+
name="token_count",
|
62
|
+
)
|
63
|
+
|
64
|
+
|
65
|
+
def toxicity() -> EvaluationMetric:
|
66
|
+
"""
|
67
|
+
This function will create a metric for evaluating `toxicity`_ using the model
|
68
|
+
`roberta-hate-speech-dynabench-r4`_, which defines hate as "abusive speech targeting
|
69
|
+
specific group characteristics, such as ethnic origin, religion, gender, or sexual
|
70
|
+
orientation."
|
71
|
+
|
72
|
+
The score ranges from 0 to 1, where scores closer to 1 are more toxic. The default threshold
|
73
|
+
for a text to be considered "toxic" is 0.5.
|
74
|
+
|
75
|
+
Aggregations calculated for this metric:
|
76
|
+
- ratio (of toxic input texts)
|
77
|
+
|
78
|
+
.. _toxicity: https://huggingface.co/spaces/evaluate-measurement/toxicity
|
79
|
+
.. _roberta-hate-speech-dynabench-r4: https://huggingface.co/facebook/roberta-hate-speech-dynabench-r4-target
|
80
|
+
"""
|
81
|
+
return make_metric(
|
82
|
+
eval_fn=_toxicity_eval_fn,
|
83
|
+
greater_is_better=False,
|
84
|
+
name="toxicity",
|
85
|
+
long_name="toxicity/roberta-hate-speech-dynabench-r4",
|
86
|
+
version="v1",
|
87
|
+
)
|
88
|
+
|
89
|
+
|
90
|
+
def flesch_kincaid_grade_level() -> EvaluationMetric:
|
91
|
+
"""
|
92
|
+
This function will create a metric for calculating `flesch kincaid grade level`_ using
|
93
|
+
`textstat`_.
|
94
|
+
|
95
|
+
This metric outputs a number that approximates the grade level needed to comprehend the text,
|
96
|
+
which will likely range from around 0 to 15 (although it is not limited to this range).
|
97
|
+
|
98
|
+
Aggregations calculated for this metric:
|
99
|
+
- mean
|
100
|
+
|
101
|
+
.. _flesch kincaid grade level:
|
102
|
+
https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch%E2%80%93Kincaid_grade_level
|
103
|
+
.. _textstat: https://pypi.org/project/textstat/
|
104
|
+
"""
|
105
|
+
return make_metric(
|
106
|
+
eval_fn=_flesch_kincaid_eval_fn,
|
107
|
+
greater_is_better=False,
|
108
|
+
name="flesch_kincaid_grade_level",
|
109
|
+
version="v1",
|
110
|
+
)
|
111
|
+
|
112
|
+
|
113
|
+
def ari_grade_level() -> EvaluationMetric:
|
114
|
+
"""
|
115
|
+
This function will create a metric for calculating `automated readability index`_ using
|
116
|
+
`textstat`_.
|
117
|
+
|
118
|
+
This metric outputs a number that approximates the grade level needed to comprehend the text,
|
119
|
+
which will likely range from around 0 to 15 (although it is not limited to this range).
|
120
|
+
|
121
|
+
Aggregations calculated for this metric:
|
122
|
+
- mean
|
123
|
+
|
124
|
+
.. _automated readability index: https://en.wikipedia.org/wiki/Automated_readability_index
|
125
|
+
.. _textstat: https://pypi.org/project/textstat/
|
126
|
+
"""
|
127
|
+
return make_metric(
|
128
|
+
eval_fn=_ari_eval_fn,
|
129
|
+
greater_is_better=False,
|
130
|
+
name="ari_grade_level",
|
131
|
+
long_name="automated_readability_index_grade_level",
|
132
|
+
version="v1",
|
133
|
+
)
|
134
|
+
|
135
|
+
|
136
|
+
# question answering metrics
|
137
|
+
def exact_match() -> EvaluationMetric:
|
138
|
+
"""
|
139
|
+
This function will create a metric for calculating `accuracy`_ using sklearn.
|
140
|
+
|
141
|
+
This metric only computes an aggregate score which ranges from 0 to 1.
|
142
|
+
|
143
|
+
.. _accuracy: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html
|
144
|
+
"""
|
145
|
+
return make_metric(
|
146
|
+
eval_fn=_accuracy_eval_fn, greater_is_better=True, name="exact_match", version="v1"
|
147
|
+
)
|
148
|
+
|
149
|
+
|
150
|
+
# text summarization metrics
|
151
|
+
def rouge1() -> EvaluationMetric:
|
152
|
+
"""
|
153
|
+
This function will create a metric for evaluating `rouge1`_.
|
154
|
+
|
155
|
+
The score ranges from 0 to 1, where a higher score indicates higher similarity.
|
156
|
+
`rouge1`_ uses unigram based scoring to calculate similarity.
|
157
|
+
|
158
|
+
Aggregations calculated for this metric:
|
159
|
+
- mean
|
160
|
+
|
161
|
+
.. _rouge1: https://huggingface.co/spaces/evaluate-metric/rouge
|
162
|
+
"""
|
163
|
+
return make_metric(
|
164
|
+
eval_fn=_rouge1_eval_fn,
|
165
|
+
greater_is_better=True,
|
166
|
+
name="rouge1",
|
167
|
+
version="v1",
|
168
|
+
)
|
169
|
+
|
170
|
+
|
171
|
+
def rouge2() -> EvaluationMetric:
|
172
|
+
"""
|
173
|
+
This function will create a metric for evaluating `rouge2`_.
|
174
|
+
|
175
|
+
The score ranges from 0 to 1, where a higher score indicates higher similarity.
|
176
|
+
`rouge2`_ uses bigram based scoring to calculate similarity.
|
177
|
+
|
178
|
+
Aggregations calculated for this metric:
|
179
|
+
- mean
|
180
|
+
|
181
|
+
.. _rouge2: https://huggingface.co/spaces/evaluate-metric/rouge
|
182
|
+
"""
|
183
|
+
return make_metric(
|
184
|
+
eval_fn=_rouge2_eval_fn,
|
185
|
+
greater_is_better=True,
|
186
|
+
name="rouge2",
|
187
|
+
version="v1",
|
188
|
+
)
|
189
|
+
|
190
|
+
|
191
|
+
def rougeL() -> EvaluationMetric:
|
192
|
+
"""
|
193
|
+
This function will create a metric for evaluating `rougeL`_.
|
194
|
+
|
195
|
+
The score ranges from 0 to 1, where a higher score indicates higher similarity.
|
196
|
+
`rougeL`_ uses unigram based scoring to calculate similarity.
|
197
|
+
|
198
|
+
Aggregations calculated for this metric:
|
199
|
+
- mean
|
200
|
+
|
201
|
+
.. _rougeL: https://huggingface.co/spaces/evaluate-metric/rouge
|
202
|
+
"""
|
203
|
+
return make_metric(
|
204
|
+
eval_fn=_rougeL_eval_fn,
|
205
|
+
greater_is_better=True,
|
206
|
+
name="rougeL",
|
207
|
+
version="v1",
|
208
|
+
)
|
209
|
+
|
210
|
+
|
211
|
+
def rougeLsum() -> EvaluationMetric:
|
212
|
+
"""
|
213
|
+
This function will create a metric for evaluating `rougeLsum`_.
|
214
|
+
|
215
|
+
The score ranges from 0 to 1, where a higher score indicates higher similarity.
|
216
|
+
`rougeLsum`_ uses longest common subsequence based scoring to calculate similarity.
|
217
|
+
|
218
|
+
Aggregations calculated for this metric:
|
219
|
+
- mean
|
220
|
+
|
221
|
+
.. _rougeLsum: https://huggingface.co/spaces/evaluate-metric/rouge
|
222
|
+
"""
|
223
|
+
return make_metric(
|
224
|
+
eval_fn=_rougeLsum_eval_fn,
|
225
|
+
greater_is_better=True,
|
226
|
+
name="rougeLsum",
|
227
|
+
version="v1",
|
228
|
+
)
|
229
|
+
|
230
|
+
|
231
|
+
def precision_at_k(k) -> EvaluationMetric:
|
232
|
+
"""
|
233
|
+
This function will create a metric for calculating ``precision_at_k`` for retriever models.
|
234
|
+
|
235
|
+
This metric computes a score between 0 and 1 for each row representing the precision of the
|
236
|
+
retriever model at the given ``k`` value. If no relevant documents are retrieved, the score is
|
237
|
+
0, indicating that no relevant docs are retrieved. Let ``x = min(k, # of retrieved doc IDs)``.
|
238
|
+
Then, in all other cases, the precision at k is calculated as follows:
|
239
|
+
|
240
|
+
``precision_at_k`` = (# of relevant retrieved doc IDs in top-``x`` ranked docs) / ``x``.
|
241
|
+
"""
|
242
|
+
return make_metric(
|
243
|
+
eval_fn=_precision_at_k_eval_fn(k),
|
244
|
+
greater_is_better=True,
|
245
|
+
name=f"precision_at_{k}",
|
246
|
+
)
|
247
|
+
|
248
|
+
|
249
|
+
def recall_at_k(k) -> EvaluationMetric:
|
250
|
+
"""
|
251
|
+
This function will create a metric for calculating ``recall_at_k`` for retriever models.
|
252
|
+
|
253
|
+
This metric computes a score between 0 and 1 for each row representing the recall ability of
|
254
|
+
the retriever model at the given ``k`` value. If no ground truth doc IDs are provided and no
|
255
|
+
documents are retrieved, the score is 1. However, if no ground truth doc IDs are provided and
|
256
|
+
documents are retrieved, the score is 0. In all other cases, the recall at k is calculated as
|
257
|
+
follows:
|
258
|
+
|
259
|
+
``recall_at_k`` = (# of unique relevant retrieved doc IDs in top-``k`` ranked docs) / (# of
|
260
|
+
ground truth doc IDs)
|
261
|
+
"""
|
262
|
+
return make_metric(
|
263
|
+
eval_fn=_recall_at_k_eval_fn(k),
|
264
|
+
greater_is_better=True,
|
265
|
+
name=f"recall_at_{k}",
|
266
|
+
)
|
267
|
+
|
268
|
+
|
269
|
+
def ndcg_at_k(k) -> EvaluationMetric:
|
270
|
+
"""
|
271
|
+
This function will create a metric for evaluating `NDCG@k`_ for retriever models.
|
272
|
+
|
273
|
+
NDCG score is capable of handling non-binary notions of relevance. However, for simplicity,
|
274
|
+
we use binary relevance here. The relevance score for documents in the ground truth is 1,
|
275
|
+
and the relevance score for documents not in the ground truth is 0.
|
276
|
+
|
277
|
+
The NDCG score is calculated using sklearn.metrics.ndcg_score with the following edge cases
|
278
|
+
on top of the sklearn implementation:
|
279
|
+
|
280
|
+
1. If no ground truth doc IDs are provided and no documents are retrieved, the score is 1.
|
281
|
+
2. If no ground truth doc IDs are provided and documents are retrieved, the score is 0.
|
282
|
+
3. If ground truth doc IDs are provided and no documents are retrieved, the score is 0.
|
283
|
+
4. If duplicate doc IDs are retrieved and the duplicate doc IDs are in the ground truth,
|
284
|
+
they will be treated as different docs. For example, if the ground truth doc IDs are
|
285
|
+
[1, 2] and the retrieved doc IDs are [1, 1, 1, 3], the score will be equivalent to
|
286
|
+
ground truth doc IDs [10, 11, 12, 2] and retrieved doc IDs [10, 11, 12, 3].
|
287
|
+
|
288
|
+
.. _NDCG@k: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ndcg_score.html
|
289
|
+
"""
|
290
|
+
return make_metric(
|
291
|
+
eval_fn=_ndcg_at_k_eval_fn(k),
|
292
|
+
greater_is_better=True,
|
293
|
+
name=f"ndcg_at_{k}",
|
294
|
+
)
|
295
|
+
|
296
|
+
|
297
|
+
# General Regression Metrics
|
298
|
+
def mae() -> EvaluationMetric:
|
299
|
+
"""
|
300
|
+
This function will create a metric for evaluating `mae`_.
|
301
|
+
|
302
|
+
This metric computes an aggregate score for the mean absolute error for regression.
|
303
|
+
|
304
|
+
.. _mae: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html
|
305
|
+
"""
|
306
|
+
return make_metric(
|
307
|
+
eval_fn=_mae_eval_fn,
|
308
|
+
greater_is_better=False,
|
309
|
+
name="mean_absolute_error",
|
310
|
+
)
|
311
|
+
|
312
|
+
|
313
|
+
def mse() -> EvaluationMetric:
|
314
|
+
"""
|
315
|
+
This function will create a metric for evaluating `mse`_.
|
316
|
+
|
317
|
+
This metric computes an aggregate score for the mean squared error for regression.
|
318
|
+
|
319
|
+
.. _mse: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html
|
320
|
+
"""
|
321
|
+
return make_metric(
|
322
|
+
eval_fn=_mse_eval_fn,
|
323
|
+
greater_is_better=False,
|
324
|
+
name="mean_squared_error",
|
325
|
+
)
|
326
|
+
|
327
|
+
|
328
|
+
def rmse() -> EvaluationMetric:
|
329
|
+
"""
|
330
|
+
This function will create a metric for evaluating the square root of `mse`_.
|
331
|
+
|
332
|
+
This metric computes an aggregate score for the root mean absolute error for regression.
|
333
|
+
|
334
|
+
.. _mse: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html
|
335
|
+
"""
|
336
|
+
|
337
|
+
return make_metric(
|
338
|
+
eval_fn=_rmse_eval_fn,
|
339
|
+
greater_is_better=False,
|
340
|
+
name="root_mean_squared_error",
|
341
|
+
)
|
342
|
+
|
343
|
+
|
344
|
+
def r2_score() -> EvaluationMetric:
|
345
|
+
"""
|
346
|
+
This function will create a metric for evaluating `r2_score`_.
|
347
|
+
|
348
|
+
This metric computes an aggregate score for the coefficient of determination. R2 ranges from
|
349
|
+
negative infinity to 1, and measures the percentage of variance explained by the predictor
|
350
|
+
variables in a regression.
|
351
|
+
|
352
|
+
.. _r2_score: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html
|
353
|
+
"""
|
354
|
+
return make_metric(
|
355
|
+
eval_fn=_r2_score_eval_fn,
|
356
|
+
greater_is_better=True,
|
357
|
+
name="r2_score",
|
358
|
+
)
|
359
|
+
|
360
|
+
|
361
|
+
def max_error() -> EvaluationMetric:
|
362
|
+
"""
|
363
|
+
This function will create a metric for evaluating `max_error`_.
|
364
|
+
|
365
|
+
This metric computes an aggregate score for the maximum residual error for regression.
|
366
|
+
|
367
|
+
.. _max_error: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.max_error.html
|
368
|
+
"""
|
369
|
+
return make_metric(
|
370
|
+
eval_fn=_max_error_eval_fn,
|
371
|
+
greater_is_better=False,
|
372
|
+
name="max_error",
|
373
|
+
)
|
374
|
+
|
375
|
+
|
376
|
+
def mape() -> EvaluationMetric:
|
377
|
+
"""
|
378
|
+
This function will create a metric for evaluating `mape`_.
|
379
|
+
|
380
|
+
This metric computes an aggregate score for the mean absolute percentage error for regression.
|
381
|
+
|
382
|
+
.. _mape: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_percentage_error.html
|
383
|
+
"""
|
384
|
+
return make_metric(
|
385
|
+
eval_fn=_mape_eval_fn,
|
386
|
+
greater_is_better=False,
|
387
|
+
name="mean_absolute_percentage_error",
|
388
|
+
)
|
389
|
+
|
390
|
+
|
391
|
+
# Binary Classification Metrics
|
392
|
+
|
393
|
+
|
394
|
+
def recall_score() -> EvaluationMetric:
|
395
|
+
"""
|
396
|
+
This function will create a metric for evaluating `recall`_ for classification.
|
397
|
+
|
398
|
+
This metric computes an aggregate score between 0 and 1 for the recall of a classification task.
|
399
|
+
|
400
|
+
.. _recall: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.recall_score.html
|
401
|
+
"""
|
402
|
+
return make_metric(eval_fn=_recall_eval_fn, greater_is_better=True, name="recall_score")
|
403
|
+
|
404
|
+
|
405
|
+
def precision_score() -> EvaluationMetric:
|
406
|
+
"""
|
407
|
+
This function will create a metric for evaluating `precision`_ for classification.
|
408
|
+
|
409
|
+
This metric computes an aggregate score between 0 and 1 for the precision of
|
410
|
+
classification task.
|
411
|
+
|
412
|
+
.. _precision: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html
|
413
|
+
"""
|
414
|
+
return make_metric(eval_fn=_precision_eval_fn, greater_is_better=True, name="precision_score")
|
415
|
+
|
416
|
+
|
417
|
+
def f1_score() -> EvaluationMetric:
|
418
|
+
"""
|
419
|
+
This function will create a metric for evaluating `f1_score`_ for binary classification.
|
420
|
+
|
421
|
+
This metric computes an aggregate score between 0 and 1 for the F1 score (F-measure) of a
|
422
|
+
classification task. F1 score is defined as 2 * (precision * recall) / (precision + recall).
|
423
|
+
|
424
|
+
.. _f1_score: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html
|
425
|
+
"""
|
426
|
+
return make_metric(eval_fn=_f1_score_eval_fn, greater_is_better=True, name="f1_score")
|
427
|
+
|
428
|
+
|
429
|
+
@experimental(version="2.18.0")
|
430
|
+
def bleu() -> EvaluationMetric:
|
431
|
+
"""
|
432
|
+
This function will create a metric for evaluating `bleu`_.
|
433
|
+
|
434
|
+
The BLEU scores range from 0 to 1, with higher scores indicating greater similarity to
|
435
|
+
reference texts. BLEU considers n-gram precision and brevity penalty. While adding more
|
436
|
+
references can boost the score, perfect scores are rare and not essential for effective
|
437
|
+
evaluation.
|
438
|
+
|
439
|
+
Aggregations calculated for this metric:
|
440
|
+
- mean
|
441
|
+
- variance
|
442
|
+
- p90
|
443
|
+
|
444
|
+
.. _bleu: https://huggingface.co/spaces/evaluate-metric/bleu
|
445
|
+
"""
|
446
|
+
return make_metric(
|
447
|
+
eval_fn=_bleu_eval_fn,
|
448
|
+
greater_is_better=True,
|
449
|
+
name="bleu",
|
450
|
+
version="v1",
|
451
|
+
)
|
452
|
+
|
453
|
+
|
454
|
+
__all__ = [
|
455
|
+
"EvaluationMetric",
|
456
|
+
"MetricValue",
|
457
|
+
"make_metric",
|
458
|
+
"flesch_kincaid_grade_level",
|
459
|
+
"ari_grade_level",
|
460
|
+
"exact_match",
|
461
|
+
"rouge1",
|
462
|
+
"rouge2",
|
463
|
+
"rougeL",
|
464
|
+
"rougeLsum",
|
465
|
+
"toxicity",
|
466
|
+
"mae",
|
467
|
+
"mse",
|
468
|
+
"rmse",
|
469
|
+
"r2_score",
|
470
|
+
"max_error",
|
471
|
+
"mape",
|
472
|
+
"recall_score",
|
473
|
+
"precision_score",
|
474
|
+
"f1_score",
|
475
|
+
"token_count",
|
476
|
+
"latency",
|
477
|
+
"genai",
|
478
|
+
"bleu",
|
479
|
+
]
|
mlflow/metrics/base.py
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import Optional, Union
|
3
|
+
|
4
|
+
import numpy as np
|
5
|
+
|
6
|
+
from mlflow.utils.validation import _is_numeric
|
7
|
+
|
8
|
+
|
9
|
+
def standard_aggregations(scores):
|
10
|
+
return {
|
11
|
+
"mean": np.mean(scores),
|
12
|
+
"variance": np.var(scores),
|
13
|
+
"p90": np.percentile(scores, 90),
|
14
|
+
}
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class MetricValue:
|
19
|
+
"""
|
20
|
+
The value of a metric.
|
21
|
+
|
22
|
+
|
23
|
+
Args:
|
24
|
+
scores: The value of the metric per row
|
25
|
+
justifications: The justification (if applicable) for the respective score
|
26
|
+
aggregate_results: A dictionary mapping the name of the aggregation to its value
|
27
|
+
"""
|
28
|
+
|
29
|
+
scores: Optional[Union[list[str], list[float]]] = None
|
30
|
+
justifications: Optional[list[str]] = None
|
31
|
+
aggregate_results: Optional[dict[str, float]] = None
|
32
|
+
|
33
|
+
def __post_init__(self):
|
34
|
+
if (
|
35
|
+
self.aggregate_results is None
|
36
|
+
and isinstance(self.scores, (list, tuple))
|
37
|
+
and all(_is_numeric(score) for score in self.scores)
|
38
|
+
):
|
39
|
+
self.aggregate_results = standard_aggregations(self.scores)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from mlflow.metrics.genai.base import EvaluationExample
|
2
|
+
from mlflow.metrics.genai.genai_metric import (
|
3
|
+
make_genai_metric,
|
4
|
+
make_genai_metric_from_prompt,
|
5
|
+
retrieve_custom_metrics,
|
6
|
+
)
|
7
|
+
from mlflow.metrics.genai.metric_definitions import (
|
8
|
+
answer_correctness,
|
9
|
+
answer_relevance,
|
10
|
+
answer_similarity,
|
11
|
+
faithfulness,
|
12
|
+
relevance,
|
13
|
+
)
|
14
|
+
|
15
|
+
__all__ = [
|
16
|
+
"EvaluationExample",
|
17
|
+
"make_genai_metric",
|
18
|
+
"make_genai_metric_from_prompt",
|
19
|
+
"answer_similarity",
|
20
|
+
"answer_correctness",
|
21
|
+
"faithfulness",
|
22
|
+
"answer_relevance",
|
23
|
+
"relevance",
|
24
|
+
"retrieve_custom_metrics",
|
25
|
+
]
|
@@ -0,0 +1,101 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import Optional, Union
|
3
|
+
|
4
|
+
from mlflow.metrics.genai.prompt_template import PromptTemplate
|
5
|
+
|
6
|
+
|
7
|
+
@dataclass
|
8
|
+
class EvaluationExample:
|
9
|
+
"""
|
10
|
+
Stores the sample example during few shot learning during LLM evaluation
|
11
|
+
|
12
|
+
Args:
|
13
|
+
input: The input provided to the model
|
14
|
+
output: The output generated by the model
|
15
|
+
score: The score given by the evaluator
|
16
|
+
justification: The justification given by the evaluator
|
17
|
+
grading_context: The grading_context provided to the evaluator for evaluation. Either
|
18
|
+
a dictionary of grading context column names and grading context strings
|
19
|
+
or a single grading context string.
|
20
|
+
|
21
|
+
.. code-block:: python
|
22
|
+
:caption: Example for creating an EvaluationExample
|
23
|
+
|
24
|
+
from mlflow.metrics.genai import EvaluationExample
|
25
|
+
|
26
|
+
example = EvaluationExample(
|
27
|
+
input="What is MLflow?",
|
28
|
+
output="MLflow is an open-source platform for managing machine "
|
29
|
+
"learning workflows, including experiment tracking, model packaging, "
|
30
|
+
"versioning, and deployment, simplifying the ML lifecycle.",
|
31
|
+
score=4,
|
32
|
+
justification="The definition effectively explains what MLflow is "
|
33
|
+
"its purpose, and its developer. It could be more concise for a 5-score.",
|
34
|
+
grading_context={
|
35
|
+
"ground_truth": "MLflow is an open-source platform for managing "
|
36
|
+
"the end-to-end machine learning (ML) lifecycle. It was developed by Databricks, "
|
37
|
+
"a company that specializes in big data and machine learning solutions. MLflow is "
|
38
|
+
"designed to address the challenges that data scientists and machine learning "
|
39
|
+
"engineers face when developing, training, and deploying machine learning models."
|
40
|
+
},
|
41
|
+
)
|
42
|
+
print(str(example))
|
43
|
+
|
44
|
+
.. code-block:: text
|
45
|
+
:caption: Output
|
46
|
+
|
47
|
+
Input: What is MLflow?
|
48
|
+
Provided output: "MLflow is an open-source platform for managing machine "
|
49
|
+
"learning workflows, including experiment tracking, model packaging, "
|
50
|
+
"versioning, and deployment, simplifying the ML lifecycle."
|
51
|
+
Provided ground_truth: "MLflow is an open-source platform for managing "
|
52
|
+
"the end-to-end machine learning (ML) lifecycle. It was developed by Databricks, "
|
53
|
+
"a company that specializes in big data and machine learning solutions. MLflow is "
|
54
|
+
"designed to address the challenges that data scientists and machine learning "
|
55
|
+
"engineers face when developing, training, and deploying machine learning models."
|
56
|
+
Score: 4
|
57
|
+
Justification: "The definition effectively explains what MLflow is "
|
58
|
+
"its purpose, and its developer. It could be more concise for a 5-score."
|
59
|
+
"""
|
60
|
+
|
61
|
+
output: str
|
62
|
+
score: float
|
63
|
+
justification: str
|
64
|
+
input: Optional[str] = None
|
65
|
+
grading_context: Optional[Union[dict[str, str], str]] = None
|
66
|
+
|
67
|
+
def _format_grading_context(self):
|
68
|
+
if isinstance(self.grading_context, dict):
|
69
|
+
return "\n".join(
|
70
|
+
[f"key: {key}\nvalue:\n{value}" for key, value in self.grading_context.items()]
|
71
|
+
)
|
72
|
+
else:
|
73
|
+
return self.grading_context
|
74
|
+
|
75
|
+
def __str__(self) -> str:
|
76
|
+
return PromptTemplate(
|
77
|
+
[
|
78
|
+
"""
|
79
|
+
Example Input:
|
80
|
+
{input}
|
81
|
+
""",
|
82
|
+
"""
|
83
|
+
Example Output:
|
84
|
+
{output}
|
85
|
+
""",
|
86
|
+
"""
|
87
|
+
Additional information used by the model:
|
88
|
+
{grading_context}
|
89
|
+
""",
|
90
|
+
"""
|
91
|
+
Example score: {score}
|
92
|
+
Example justification: {justification}
|
93
|
+
""",
|
94
|
+
]
|
95
|
+
).format(
|
96
|
+
input=self.input,
|
97
|
+
output=self.output,
|
98
|
+
grading_context=self._format_grading_context(),
|
99
|
+
score=self.score,
|
100
|
+
justification=self.justification,
|
101
|
+
)
|