genesis-flow 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (645) hide show
  1. genesis_flow-1.0.0.dist-info/METADATA +822 -0
  2. genesis_flow-1.0.0.dist-info/RECORD +645 -0
  3. genesis_flow-1.0.0.dist-info/WHEEL +5 -0
  4. genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
  5. genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
  6. genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
  7. mlflow/__init__.py +367 -0
  8. mlflow/__main__.py +3 -0
  9. mlflow/ag2/__init__.py +56 -0
  10. mlflow/ag2/ag2_logger.py +294 -0
  11. mlflow/anthropic/__init__.py +40 -0
  12. mlflow/anthropic/autolog.py +129 -0
  13. mlflow/anthropic/chat.py +144 -0
  14. mlflow/artifacts/__init__.py +268 -0
  15. mlflow/autogen/__init__.py +144 -0
  16. mlflow/autogen/chat.py +142 -0
  17. mlflow/azure/__init__.py +26 -0
  18. mlflow/azure/auth_handler.py +257 -0
  19. mlflow/azure/client.py +319 -0
  20. mlflow/azure/config.py +120 -0
  21. mlflow/azure/connection_factory.py +340 -0
  22. mlflow/azure/exceptions.py +27 -0
  23. mlflow/azure/stores.py +327 -0
  24. mlflow/azure/utils.py +183 -0
  25. mlflow/bedrock/__init__.py +45 -0
  26. mlflow/bedrock/_autolog.py +202 -0
  27. mlflow/bedrock/chat.py +122 -0
  28. mlflow/bedrock/stream.py +160 -0
  29. mlflow/bedrock/utils.py +43 -0
  30. mlflow/cli.py +707 -0
  31. mlflow/client.py +12 -0
  32. mlflow/config/__init__.py +56 -0
  33. mlflow/crewai/__init__.py +79 -0
  34. mlflow/crewai/autolog.py +253 -0
  35. mlflow/crewai/chat.py +29 -0
  36. mlflow/data/__init__.py +75 -0
  37. mlflow/data/artifact_dataset_sources.py +170 -0
  38. mlflow/data/code_dataset_source.py +40 -0
  39. mlflow/data/dataset.py +123 -0
  40. mlflow/data/dataset_registry.py +168 -0
  41. mlflow/data/dataset_source.py +110 -0
  42. mlflow/data/dataset_source_registry.py +219 -0
  43. mlflow/data/delta_dataset_source.py +167 -0
  44. mlflow/data/digest_utils.py +108 -0
  45. mlflow/data/evaluation_dataset.py +562 -0
  46. mlflow/data/filesystem_dataset_source.py +81 -0
  47. mlflow/data/http_dataset_source.py +145 -0
  48. mlflow/data/huggingface_dataset.py +258 -0
  49. mlflow/data/huggingface_dataset_source.py +118 -0
  50. mlflow/data/meta_dataset.py +104 -0
  51. mlflow/data/numpy_dataset.py +223 -0
  52. mlflow/data/pandas_dataset.py +231 -0
  53. mlflow/data/polars_dataset.py +352 -0
  54. mlflow/data/pyfunc_dataset_mixin.py +31 -0
  55. mlflow/data/schema.py +76 -0
  56. mlflow/data/sources.py +1 -0
  57. mlflow/data/spark_dataset.py +406 -0
  58. mlflow/data/spark_dataset_source.py +74 -0
  59. mlflow/data/spark_delta_utils.py +118 -0
  60. mlflow/data/tensorflow_dataset.py +350 -0
  61. mlflow/data/uc_volume_dataset_source.py +81 -0
  62. mlflow/db.py +27 -0
  63. mlflow/dspy/__init__.py +17 -0
  64. mlflow/dspy/autolog.py +197 -0
  65. mlflow/dspy/callback.py +398 -0
  66. mlflow/dspy/constant.py +1 -0
  67. mlflow/dspy/load.py +93 -0
  68. mlflow/dspy/save.py +393 -0
  69. mlflow/dspy/util.py +109 -0
  70. mlflow/dspy/wrapper.py +226 -0
  71. mlflow/entities/__init__.py +104 -0
  72. mlflow/entities/_mlflow_object.py +52 -0
  73. mlflow/entities/assessment.py +545 -0
  74. mlflow/entities/assessment_error.py +80 -0
  75. mlflow/entities/assessment_source.py +141 -0
  76. mlflow/entities/dataset.py +92 -0
  77. mlflow/entities/dataset_input.py +51 -0
  78. mlflow/entities/dataset_summary.py +62 -0
  79. mlflow/entities/document.py +48 -0
  80. mlflow/entities/experiment.py +109 -0
  81. mlflow/entities/experiment_tag.py +35 -0
  82. mlflow/entities/file_info.py +45 -0
  83. mlflow/entities/input_tag.py +35 -0
  84. mlflow/entities/lifecycle_stage.py +35 -0
  85. mlflow/entities/logged_model.py +228 -0
  86. mlflow/entities/logged_model_input.py +26 -0
  87. mlflow/entities/logged_model_output.py +32 -0
  88. mlflow/entities/logged_model_parameter.py +46 -0
  89. mlflow/entities/logged_model_status.py +74 -0
  90. mlflow/entities/logged_model_tag.py +33 -0
  91. mlflow/entities/metric.py +200 -0
  92. mlflow/entities/model_registry/__init__.py +29 -0
  93. mlflow/entities/model_registry/_model_registry_entity.py +13 -0
  94. mlflow/entities/model_registry/model_version.py +243 -0
  95. mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
  96. mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
  97. mlflow/entities/model_registry/model_version_search.py +25 -0
  98. mlflow/entities/model_registry/model_version_stages.py +25 -0
  99. mlflow/entities/model_registry/model_version_status.py +35 -0
  100. mlflow/entities/model_registry/model_version_tag.py +35 -0
  101. mlflow/entities/model_registry/prompt.py +73 -0
  102. mlflow/entities/model_registry/prompt_version.py +244 -0
  103. mlflow/entities/model_registry/registered_model.py +175 -0
  104. mlflow/entities/model_registry/registered_model_alias.py +35 -0
  105. mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
  106. mlflow/entities/model_registry/registered_model_search.py +25 -0
  107. mlflow/entities/model_registry/registered_model_tag.py +35 -0
  108. mlflow/entities/multipart_upload.py +74 -0
  109. mlflow/entities/param.py +49 -0
  110. mlflow/entities/run.py +97 -0
  111. mlflow/entities/run_data.py +84 -0
  112. mlflow/entities/run_info.py +188 -0
  113. mlflow/entities/run_inputs.py +59 -0
  114. mlflow/entities/run_outputs.py +43 -0
  115. mlflow/entities/run_status.py +41 -0
  116. mlflow/entities/run_tag.py +36 -0
  117. mlflow/entities/source_type.py +31 -0
  118. mlflow/entities/span.py +774 -0
  119. mlflow/entities/span_event.py +96 -0
  120. mlflow/entities/span_status.py +102 -0
  121. mlflow/entities/trace.py +317 -0
  122. mlflow/entities/trace_data.py +71 -0
  123. mlflow/entities/trace_info.py +220 -0
  124. mlflow/entities/trace_info_v2.py +162 -0
  125. mlflow/entities/trace_location.py +173 -0
  126. mlflow/entities/trace_state.py +39 -0
  127. mlflow/entities/trace_status.py +68 -0
  128. mlflow/entities/view_type.py +51 -0
  129. mlflow/environment_variables.py +866 -0
  130. mlflow/evaluation/__init__.py +16 -0
  131. mlflow/evaluation/assessment.py +369 -0
  132. mlflow/evaluation/evaluation.py +411 -0
  133. mlflow/evaluation/evaluation_tag.py +61 -0
  134. mlflow/evaluation/fluent.py +48 -0
  135. mlflow/evaluation/utils.py +201 -0
  136. mlflow/exceptions.py +213 -0
  137. mlflow/experiments.py +140 -0
  138. mlflow/gemini/__init__.py +81 -0
  139. mlflow/gemini/autolog.py +186 -0
  140. mlflow/gemini/chat.py +261 -0
  141. mlflow/genai/__init__.py +71 -0
  142. mlflow/genai/datasets/__init__.py +67 -0
  143. mlflow/genai/datasets/evaluation_dataset.py +131 -0
  144. mlflow/genai/evaluation/__init__.py +3 -0
  145. mlflow/genai/evaluation/base.py +411 -0
  146. mlflow/genai/evaluation/constant.py +23 -0
  147. mlflow/genai/evaluation/utils.py +244 -0
  148. mlflow/genai/judges/__init__.py +21 -0
  149. mlflow/genai/judges/databricks.py +404 -0
  150. mlflow/genai/label_schemas/__init__.py +153 -0
  151. mlflow/genai/label_schemas/label_schemas.py +209 -0
  152. mlflow/genai/labeling/__init__.py +159 -0
  153. mlflow/genai/labeling/labeling.py +250 -0
  154. mlflow/genai/optimize/__init__.py +13 -0
  155. mlflow/genai/optimize/base.py +198 -0
  156. mlflow/genai/optimize/optimizers/__init__.py +4 -0
  157. mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
  158. mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
  159. mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
  160. mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
  161. mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
  162. mlflow/genai/optimize/types.py +75 -0
  163. mlflow/genai/optimize/util.py +30 -0
  164. mlflow/genai/prompts/__init__.py +206 -0
  165. mlflow/genai/scheduled_scorers.py +431 -0
  166. mlflow/genai/scorers/__init__.py +26 -0
  167. mlflow/genai/scorers/base.py +492 -0
  168. mlflow/genai/scorers/builtin_scorers.py +765 -0
  169. mlflow/genai/scorers/scorer_utils.py +138 -0
  170. mlflow/genai/scorers/validation.py +165 -0
  171. mlflow/genai/utils/data_validation.py +146 -0
  172. mlflow/genai/utils/enum_utils.py +23 -0
  173. mlflow/genai/utils/trace_utils.py +211 -0
  174. mlflow/groq/__init__.py +42 -0
  175. mlflow/groq/_groq_autolog.py +74 -0
  176. mlflow/johnsnowlabs/__init__.py +888 -0
  177. mlflow/langchain/__init__.py +24 -0
  178. mlflow/langchain/api_request_parallel_processor.py +330 -0
  179. mlflow/langchain/autolog.py +147 -0
  180. mlflow/langchain/chat_agent_langgraph.py +340 -0
  181. mlflow/langchain/constant.py +1 -0
  182. mlflow/langchain/constants.py +1 -0
  183. mlflow/langchain/databricks_dependencies.py +444 -0
  184. mlflow/langchain/langchain_tracer.py +597 -0
  185. mlflow/langchain/model.py +919 -0
  186. mlflow/langchain/output_parsers.py +142 -0
  187. mlflow/langchain/retriever_chain.py +153 -0
  188. mlflow/langchain/runnables.py +527 -0
  189. mlflow/langchain/utils/chat.py +402 -0
  190. mlflow/langchain/utils/logging.py +671 -0
  191. mlflow/langchain/utils/serialization.py +36 -0
  192. mlflow/legacy_databricks_cli/__init__.py +0 -0
  193. mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
  194. mlflow/legacy_databricks_cli/configure/provider.py +482 -0
  195. mlflow/litellm/__init__.py +175 -0
  196. mlflow/llama_index/__init__.py +22 -0
  197. mlflow/llama_index/autolog.py +55 -0
  198. mlflow/llama_index/chat.py +43 -0
  199. mlflow/llama_index/constant.py +1 -0
  200. mlflow/llama_index/model.py +577 -0
  201. mlflow/llama_index/pyfunc_wrapper.py +332 -0
  202. mlflow/llama_index/serialize_objects.py +188 -0
  203. mlflow/llama_index/tracer.py +561 -0
  204. mlflow/metrics/__init__.py +479 -0
  205. mlflow/metrics/base.py +39 -0
  206. mlflow/metrics/genai/__init__.py +25 -0
  207. mlflow/metrics/genai/base.py +101 -0
  208. mlflow/metrics/genai/genai_metric.py +771 -0
  209. mlflow/metrics/genai/metric_definitions.py +450 -0
  210. mlflow/metrics/genai/model_utils.py +371 -0
  211. mlflow/metrics/genai/prompt_template.py +68 -0
  212. mlflow/metrics/genai/prompts/__init__.py +0 -0
  213. mlflow/metrics/genai/prompts/v1.py +422 -0
  214. mlflow/metrics/genai/utils.py +6 -0
  215. mlflow/metrics/metric_definitions.py +619 -0
  216. mlflow/mismatch.py +34 -0
  217. mlflow/mistral/__init__.py +34 -0
  218. mlflow/mistral/autolog.py +71 -0
  219. mlflow/mistral/chat.py +135 -0
  220. mlflow/ml_package_versions.py +452 -0
  221. mlflow/models/__init__.py +97 -0
  222. mlflow/models/auth_policy.py +83 -0
  223. mlflow/models/cli.py +354 -0
  224. mlflow/models/container/__init__.py +294 -0
  225. mlflow/models/container/scoring_server/__init__.py +0 -0
  226. mlflow/models/container/scoring_server/nginx.conf +39 -0
  227. mlflow/models/dependencies_schemas.py +287 -0
  228. mlflow/models/display_utils.py +158 -0
  229. mlflow/models/docker_utils.py +211 -0
  230. mlflow/models/evaluation/__init__.py +23 -0
  231. mlflow/models/evaluation/_shap_patch.py +64 -0
  232. mlflow/models/evaluation/artifacts.py +194 -0
  233. mlflow/models/evaluation/base.py +1811 -0
  234. mlflow/models/evaluation/calibration_curve.py +109 -0
  235. mlflow/models/evaluation/default_evaluator.py +996 -0
  236. mlflow/models/evaluation/deprecated.py +23 -0
  237. mlflow/models/evaluation/evaluator_registry.py +80 -0
  238. mlflow/models/evaluation/evaluators/classifier.py +704 -0
  239. mlflow/models/evaluation/evaluators/default.py +233 -0
  240. mlflow/models/evaluation/evaluators/regressor.py +96 -0
  241. mlflow/models/evaluation/evaluators/shap.py +296 -0
  242. mlflow/models/evaluation/lift_curve.py +178 -0
  243. mlflow/models/evaluation/utils/metric.py +123 -0
  244. mlflow/models/evaluation/utils/trace.py +179 -0
  245. mlflow/models/evaluation/validation.py +434 -0
  246. mlflow/models/flavor_backend.py +93 -0
  247. mlflow/models/flavor_backend_registry.py +53 -0
  248. mlflow/models/model.py +1639 -0
  249. mlflow/models/model_config.py +150 -0
  250. mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
  251. mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
  252. mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
  253. mlflow/models/python_api.py +369 -0
  254. mlflow/models/rag_signatures.py +128 -0
  255. mlflow/models/resources.py +321 -0
  256. mlflow/models/signature.py +662 -0
  257. mlflow/models/utils.py +2054 -0
  258. mlflow/models/wheeled_model.py +280 -0
  259. mlflow/openai/__init__.py +57 -0
  260. mlflow/openai/_agent_tracer.py +364 -0
  261. mlflow/openai/api_request_parallel_processor.py +131 -0
  262. mlflow/openai/autolog.py +509 -0
  263. mlflow/openai/constant.py +1 -0
  264. mlflow/openai/model.py +824 -0
  265. mlflow/openai/utils/chat_schema.py +367 -0
  266. mlflow/optuna/__init__.py +3 -0
  267. mlflow/optuna/storage.py +646 -0
  268. mlflow/plugins/__init__.py +72 -0
  269. mlflow/plugins/base.py +358 -0
  270. mlflow/plugins/builtin/__init__.py +24 -0
  271. mlflow/plugins/builtin/pytorch_plugin.py +150 -0
  272. mlflow/plugins/builtin/sklearn_plugin.py +158 -0
  273. mlflow/plugins/builtin/transformers_plugin.py +187 -0
  274. mlflow/plugins/cli.py +321 -0
  275. mlflow/plugins/discovery.py +340 -0
  276. mlflow/plugins/manager.py +465 -0
  277. mlflow/plugins/registry.py +316 -0
  278. mlflow/plugins/templates/framework_plugin_template.py +329 -0
  279. mlflow/prompt/constants.py +20 -0
  280. mlflow/prompt/promptlab_model.py +197 -0
  281. mlflow/prompt/registry_utils.py +248 -0
  282. mlflow/promptflow/__init__.py +495 -0
  283. mlflow/protos/__init__.py +0 -0
  284. mlflow/protos/assessments_pb2.py +174 -0
  285. mlflow/protos/databricks_artifacts_pb2.py +489 -0
  286. mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
  287. mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
  288. mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
  289. mlflow/protos/databricks_pb2.py +267 -0
  290. mlflow/protos/databricks_trace_server_pb2.py +374 -0
  291. mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
  292. mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
  293. mlflow/protos/facet_feature_statistics_pb2.py +296 -0
  294. mlflow/protos/internal_pb2.py +77 -0
  295. mlflow/protos/mlflow_artifacts_pb2.py +336 -0
  296. mlflow/protos/model_registry_pb2.py +1073 -0
  297. mlflow/protos/scalapb/__init__.py +0 -0
  298. mlflow/protos/scalapb/scalapb_pb2.py +104 -0
  299. mlflow/protos/service_pb2.py +2600 -0
  300. mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
  301. mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
  302. mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
  303. mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
  304. mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
  305. mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
  306. mlflow/py.typed +0 -0
  307. mlflow/pydantic_ai/__init__.py +57 -0
  308. mlflow/pydantic_ai/autolog.py +173 -0
  309. mlflow/pyfunc/__init__.py +3844 -0
  310. mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
  311. mlflow/pyfunc/backend.py +523 -0
  312. mlflow/pyfunc/context.py +78 -0
  313. mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
  314. mlflow/pyfunc/loaders/__init__.py +7 -0
  315. mlflow/pyfunc/loaders/chat_agent.py +117 -0
  316. mlflow/pyfunc/loaders/chat_model.py +125 -0
  317. mlflow/pyfunc/loaders/code_model.py +31 -0
  318. mlflow/pyfunc/loaders/responses_agent.py +112 -0
  319. mlflow/pyfunc/mlserver.py +46 -0
  320. mlflow/pyfunc/model.py +1473 -0
  321. mlflow/pyfunc/scoring_server/__init__.py +604 -0
  322. mlflow/pyfunc/scoring_server/app.py +7 -0
  323. mlflow/pyfunc/scoring_server/client.py +146 -0
  324. mlflow/pyfunc/spark_model_cache.py +48 -0
  325. mlflow/pyfunc/stdin_server.py +44 -0
  326. mlflow/pyfunc/utils/__init__.py +3 -0
  327. mlflow/pyfunc/utils/data_validation.py +224 -0
  328. mlflow/pyfunc/utils/environment.py +22 -0
  329. mlflow/pyfunc/utils/input_converter.py +47 -0
  330. mlflow/pyfunc/utils/serving_data_parser.py +11 -0
  331. mlflow/pytorch/__init__.py +1171 -0
  332. mlflow/pytorch/_lightning_autolog.py +580 -0
  333. mlflow/pytorch/_pytorch_autolog.py +50 -0
  334. mlflow/pytorch/pickle_module.py +35 -0
  335. mlflow/rfunc/__init__.py +42 -0
  336. mlflow/rfunc/backend.py +134 -0
  337. mlflow/runs.py +89 -0
  338. mlflow/server/__init__.py +302 -0
  339. mlflow/server/auth/__init__.py +1224 -0
  340. mlflow/server/auth/__main__.py +4 -0
  341. mlflow/server/auth/basic_auth.ini +6 -0
  342. mlflow/server/auth/cli.py +11 -0
  343. mlflow/server/auth/client.py +537 -0
  344. mlflow/server/auth/config.py +34 -0
  345. mlflow/server/auth/db/__init__.py +0 -0
  346. mlflow/server/auth/db/cli.py +18 -0
  347. mlflow/server/auth/db/migrations/__init__.py +0 -0
  348. mlflow/server/auth/db/migrations/alembic.ini +110 -0
  349. mlflow/server/auth/db/migrations/env.py +76 -0
  350. mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
  351. mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
  352. mlflow/server/auth/db/models.py +67 -0
  353. mlflow/server/auth/db/utils.py +37 -0
  354. mlflow/server/auth/entities.py +165 -0
  355. mlflow/server/auth/logo.py +14 -0
  356. mlflow/server/auth/permissions.py +65 -0
  357. mlflow/server/auth/routes.py +18 -0
  358. mlflow/server/auth/sqlalchemy_store.py +263 -0
  359. mlflow/server/graphql/__init__.py +0 -0
  360. mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
  361. mlflow/server/graphql/graphql_custom_scalars.py +24 -0
  362. mlflow/server/graphql/graphql_errors.py +15 -0
  363. mlflow/server/graphql/graphql_no_batching.py +89 -0
  364. mlflow/server/graphql/graphql_schema_extensions.py +74 -0
  365. mlflow/server/handlers.py +3217 -0
  366. mlflow/server/prometheus_exporter.py +17 -0
  367. mlflow/server/validation.py +30 -0
  368. mlflow/shap/__init__.py +691 -0
  369. mlflow/sklearn/__init__.py +1994 -0
  370. mlflow/sklearn/utils.py +1041 -0
  371. mlflow/smolagents/__init__.py +66 -0
  372. mlflow/smolagents/autolog.py +139 -0
  373. mlflow/smolagents/chat.py +29 -0
  374. mlflow/store/__init__.py +10 -0
  375. mlflow/store/_unity_catalog/__init__.py +1 -0
  376. mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
  377. mlflow/store/_unity_catalog/lineage/constants.py +2 -0
  378. mlflow/store/_unity_catalog/registry/__init__.py +6 -0
  379. mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
  380. mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
  381. mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
  382. mlflow/store/_unity_catalog/registry/utils.py +121 -0
  383. mlflow/store/artifact/__init__.py +0 -0
  384. mlflow/store/artifact/artifact_repo.py +472 -0
  385. mlflow/store/artifact/artifact_repository_registry.py +154 -0
  386. mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
  387. mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
  388. mlflow/store/artifact/cli.py +141 -0
  389. mlflow/store/artifact/cloud_artifact_repo.py +332 -0
  390. mlflow/store/artifact/databricks_artifact_repo.py +729 -0
  391. mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
  392. mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
  393. mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
  394. mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
  395. mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
  396. mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
  397. mlflow/store/artifact/ftp_artifact_repo.py +132 -0
  398. mlflow/store/artifact/gcs_artifact_repo.py +296 -0
  399. mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
  400. mlflow/store/artifact/http_artifact_repo.py +218 -0
  401. mlflow/store/artifact/local_artifact_repo.py +142 -0
  402. mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
  403. mlflow/store/artifact/models_artifact_repo.py +259 -0
  404. mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
  405. mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
  406. mlflow/store/artifact/r2_artifact_repo.py +70 -0
  407. mlflow/store/artifact/runs_artifact_repo.py +265 -0
  408. mlflow/store/artifact/s3_artifact_repo.py +330 -0
  409. mlflow/store/artifact/sftp_artifact_repo.py +141 -0
  410. mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
  411. mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
  412. mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
  413. mlflow/store/artifact/utils/__init__.py +0 -0
  414. mlflow/store/artifact/utils/models.py +148 -0
  415. mlflow/store/db/__init__.py +0 -0
  416. mlflow/store/db/base_sql_model.py +3 -0
  417. mlflow/store/db/db_types.py +10 -0
  418. mlflow/store/db/utils.py +314 -0
  419. mlflow/store/db_migrations/__init__.py +0 -0
  420. mlflow/store/db_migrations/alembic.ini +74 -0
  421. mlflow/store/db_migrations/env.py +84 -0
  422. mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
  423. mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
  424. mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
  425. mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
  426. mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
  427. mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
  428. mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
  429. mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
  430. mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
  431. mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
  432. mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
  433. mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
  434. mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
  435. mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
  436. mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
  437. mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
  438. mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
  439. mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
  440. mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
  441. mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
  442. mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
  443. mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
  444. mlflow/store/db_migrations/versions/__init__.py +0 -0
  445. mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
  446. mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
  447. mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
  448. mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
  449. mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
  450. mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
  451. mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
  452. mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
  453. mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
  454. mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
  455. mlflow/store/entities/__init__.py +3 -0
  456. mlflow/store/entities/paged_list.py +18 -0
  457. mlflow/store/model_registry/__init__.py +10 -0
  458. mlflow/store/model_registry/abstract_store.py +1081 -0
  459. mlflow/store/model_registry/base_rest_store.py +44 -0
  460. mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
  461. mlflow/store/model_registry/dbmodels/__init__.py +0 -0
  462. mlflow/store/model_registry/dbmodels/models.py +206 -0
  463. mlflow/store/model_registry/file_store.py +1091 -0
  464. mlflow/store/model_registry/rest_store.py +481 -0
  465. mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
  466. mlflow/store/tracking/__init__.py +23 -0
  467. mlflow/store/tracking/abstract_store.py +816 -0
  468. mlflow/store/tracking/dbmodels/__init__.py +0 -0
  469. mlflow/store/tracking/dbmodels/initial_models.py +243 -0
  470. mlflow/store/tracking/dbmodels/models.py +1073 -0
  471. mlflow/store/tracking/file_store.py +2438 -0
  472. mlflow/store/tracking/postgres_managed_identity.py +146 -0
  473. mlflow/store/tracking/rest_store.py +1131 -0
  474. mlflow/store/tracking/sqlalchemy_store.py +2785 -0
  475. mlflow/system_metrics/__init__.py +61 -0
  476. mlflow/system_metrics/metrics/__init__.py +0 -0
  477. mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
  478. mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
  479. mlflow/system_metrics/metrics/disk_monitor.py +21 -0
  480. mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
  481. mlflow/system_metrics/metrics/network_monitor.py +34 -0
  482. mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
  483. mlflow/system_metrics/system_metrics_monitor.py +198 -0
  484. mlflow/tracing/__init__.py +16 -0
  485. mlflow/tracing/assessment.py +356 -0
  486. mlflow/tracing/client.py +531 -0
  487. mlflow/tracing/config.py +125 -0
  488. mlflow/tracing/constant.py +105 -0
  489. mlflow/tracing/destination.py +81 -0
  490. mlflow/tracing/display/__init__.py +40 -0
  491. mlflow/tracing/display/display_handler.py +196 -0
  492. mlflow/tracing/export/async_export_queue.py +186 -0
  493. mlflow/tracing/export/inference_table.py +138 -0
  494. mlflow/tracing/export/mlflow_v3.py +137 -0
  495. mlflow/tracing/export/utils.py +70 -0
  496. mlflow/tracing/fluent.py +1417 -0
  497. mlflow/tracing/processor/base_mlflow.py +199 -0
  498. mlflow/tracing/processor/inference_table.py +175 -0
  499. mlflow/tracing/processor/mlflow_v3.py +47 -0
  500. mlflow/tracing/processor/otel.py +73 -0
  501. mlflow/tracing/provider.py +487 -0
  502. mlflow/tracing/trace_manager.py +200 -0
  503. mlflow/tracing/utils/__init__.py +616 -0
  504. mlflow/tracing/utils/artifact_utils.py +28 -0
  505. mlflow/tracing/utils/copy.py +55 -0
  506. mlflow/tracing/utils/environment.py +55 -0
  507. mlflow/tracing/utils/exception.py +21 -0
  508. mlflow/tracing/utils/once.py +35 -0
  509. mlflow/tracing/utils/otlp.py +63 -0
  510. mlflow/tracing/utils/processor.py +54 -0
  511. mlflow/tracing/utils/search.py +292 -0
  512. mlflow/tracing/utils/timeout.py +250 -0
  513. mlflow/tracing/utils/token.py +19 -0
  514. mlflow/tracing/utils/truncation.py +124 -0
  515. mlflow/tracing/utils/warning.py +76 -0
  516. mlflow/tracking/__init__.py +39 -0
  517. mlflow/tracking/_model_registry/__init__.py +1 -0
  518. mlflow/tracking/_model_registry/client.py +764 -0
  519. mlflow/tracking/_model_registry/fluent.py +853 -0
  520. mlflow/tracking/_model_registry/registry.py +67 -0
  521. mlflow/tracking/_model_registry/utils.py +251 -0
  522. mlflow/tracking/_tracking_service/__init__.py +0 -0
  523. mlflow/tracking/_tracking_service/client.py +883 -0
  524. mlflow/tracking/_tracking_service/registry.py +56 -0
  525. mlflow/tracking/_tracking_service/utils.py +275 -0
  526. mlflow/tracking/artifact_utils.py +179 -0
  527. mlflow/tracking/client.py +5900 -0
  528. mlflow/tracking/context/__init__.py +0 -0
  529. mlflow/tracking/context/abstract_context.py +35 -0
  530. mlflow/tracking/context/databricks_cluster_context.py +15 -0
  531. mlflow/tracking/context/databricks_command_context.py +15 -0
  532. mlflow/tracking/context/databricks_job_context.py +49 -0
  533. mlflow/tracking/context/databricks_notebook_context.py +41 -0
  534. mlflow/tracking/context/databricks_repo_context.py +43 -0
  535. mlflow/tracking/context/default_context.py +51 -0
  536. mlflow/tracking/context/git_context.py +32 -0
  537. mlflow/tracking/context/registry.py +98 -0
  538. mlflow/tracking/context/system_environment_context.py +15 -0
  539. mlflow/tracking/default_experiment/__init__.py +1 -0
  540. mlflow/tracking/default_experiment/abstract_context.py +43 -0
  541. mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
  542. mlflow/tracking/default_experiment/registry.py +75 -0
  543. mlflow/tracking/fluent.py +3595 -0
  544. mlflow/tracking/metric_value_conversion_utils.py +93 -0
  545. mlflow/tracking/multimedia.py +206 -0
  546. mlflow/tracking/registry.py +86 -0
  547. mlflow/tracking/request_auth/__init__.py +0 -0
  548. mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
  549. mlflow/tracking/request_auth/registry.py +60 -0
  550. mlflow/tracking/request_header/__init__.py +0 -0
  551. mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
  552. mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
  553. mlflow/tracking/request_header/default_request_header_provider.py +17 -0
  554. mlflow/tracking/request_header/registry.py +79 -0
  555. mlflow/transformers/__init__.py +2982 -0
  556. mlflow/transformers/flavor_config.py +258 -0
  557. mlflow/transformers/hub_utils.py +83 -0
  558. mlflow/transformers/llm_inference_utils.py +468 -0
  559. mlflow/transformers/model_io.py +301 -0
  560. mlflow/transformers/peft.py +51 -0
  561. mlflow/transformers/signature.py +183 -0
  562. mlflow/transformers/torch_utils.py +55 -0
  563. mlflow/types/__init__.py +21 -0
  564. mlflow/types/agent.py +270 -0
  565. mlflow/types/chat.py +240 -0
  566. mlflow/types/llm.py +935 -0
  567. mlflow/types/responses.py +139 -0
  568. mlflow/types/responses_helpers.py +416 -0
  569. mlflow/types/schema.py +1505 -0
  570. mlflow/types/type_hints.py +647 -0
  571. mlflow/types/utils.py +753 -0
  572. mlflow/utils/__init__.py +283 -0
  573. mlflow/utils/_capture_modules.py +256 -0
  574. mlflow/utils/_capture_transformers_modules.py +75 -0
  575. mlflow/utils/_spark_utils.py +201 -0
  576. mlflow/utils/_unity_catalog_oss_utils.py +97 -0
  577. mlflow/utils/_unity_catalog_utils.py +479 -0
  578. mlflow/utils/annotations.py +218 -0
  579. mlflow/utils/arguments_utils.py +16 -0
  580. mlflow/utils/async_logging/__init__.py +1 -0
  581. mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
  582. mlflow/utils/async_logging/async_logging_queue.py +366 -0
  583. mlflow/utils/async_logging/run_artifact.py +38 -0
  584. mlflow/utils/async_logging/run_batch.py +58 -0
  585. mlflow/utils/async_logging/run_operations.py +49 -0
  586. mlflow/utils/autologging_utils/__init__.py +737 -0
  587. mlflow/utils/autologging_utils/client.py +432 -0
  588. mlflow/utils/autologging_utils/config.py +33 -0
  589. mlflow/utils/autologging_utils/events.py +294 -0
  590. mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
  591. mlflow/utils/autologging_utils/metrics_queue.py +71 -0
  592. mlflow/utils/autologging_utils/safety.py +1104 -0
  593. mlflow/utils/autologging_utils/versioning.py +95 -0
  594. mlflow/utils/checkpoint_utils.py +206 -0
  595. mlflow/utils/class_utils.py +6 -0
  596. mlflow/utils/cli_args.py +257 -0
  597. mlflow/utils/conda.py +354 -0
  598. mlflow/utils/credentials.py +231 -0
  599. mlflow/utils/data_utils.py +17 -0
  600. mlflow/utils/databricks_utils.py +1436 -0
  601. mlflow/utils/docstring_utils.py +477 -0
  602. mlflow/utils/doctor.py +133 -0
  603. mlflow/utils/download_cloud_file_chunk.py +43 -0
  604. mlflow/utils/env_manager.py +16 -0
  605. mlflow/utils/env_pack.py +131 -0
  606. mlflow/utils/environment.py +1009 -0
  607. mlflow/utils/exception_utils.py +14 -0
  608. mlflow/utils/file_utils.py +978 -0
  609. mlflow/utils/git_utils.py +77 -0
  610. mlflow/utils/gorilla.py +797 -0
  611. mlflow/utils/import_hooks/__init__.py +363 -0
  612. mlflow/utils/lazy_load.py +51 -0
  613. mlflow/utils/logging_utils.py +168 -0
  614. mlflow/utils/mime_type_utils.py +58 -0
  615. mlflow/utils/mlflow_tags.py +103 -0
  616. mlflow/utils/model_utils.py +486 -0
  617. mlflow/utils/name_utils.py +346 -0
  618. mlflow/utils/nfs_on_spark.py +62 -0
  619. mlflow/utils/openai_utils.py +164 -0
  620. mlflow/utils/os.py +12 -0
  621. mlflow/utils/oss_registry_utils.py +29 -0
  622. mlflow/utils/plugins.py +17 -0
  623. mlflow/utils/process.py +182 -0
  624. mlflow/utils/promptlab_utils.py +146 -0
  625. mlflow/utils/proto_json_utils.py +743 -0
  626. mlflow/utils/pydantic_utils.py +54 -0
  627. mlflow/utils/request_utils.py +279 -0
  628. mlflow/utils/requirements_utils.py +704 -0
  629. mlflow/utils/rest_utils.py +673 -0
  630. mlflow/utils/search_logged_model_utils.py +127 -0
  631. mlflow/utils/search_utils.py +2111 -0
  632. mlflow/utils/secure_loading.py +221 -0
  633. mlflow/utils/security_validation.py +384 -0
  634. mlflow/utils/server_cli_utils.py +61 -0
  635. mlflow/utils/spark_utils.py +15 -0
  636. mlflow/utils/string_utils.py +138 -0
  637. mlflow/utils/thread_utils.py +63 -0
  638. mlflow/utils/time.py +54 -0
  639. mlflow/utils/timeout.py +42 -0
  640. mlflow/utils/uri.py +572 -0
  641. mlflow/utils/validation.py +662 -0
  642. mlflow/utils/virtualenv.py +458 -0
  643. mlflow/utils/warnings_utils.py +25 -0
  644. mlflow/utils/yaml_utils.py +179 -0
  645. mlflow/version.py +24 -0
@@ -0,0 +1,1811 @@
1
+ import inspect
2
+ import json
3
+ import keyword
4
+ import logging
5
+ import os
6
+ import pathlib
7
+ import signal
8
+ import urllib
9
+ import urllib.parse
10
+ from abc import ABCMeta, abstractmethod
11
+ from contextlib import contextmanager, nullcontext
12
+ from dataclasses import dataclass
13
+ from inspect import Parameter, Signature
14
+ from types import FunctionType
15
+ from typing import Any, Optional, Union
16
+
17
+ import mlflow
18
+ from mlflow.data.dataset import Dataset
19
+ from mlflow.data.evaluation_dataset import (
20
+ EvaluationDataset,
21
+ convert_data_to_mlflow_dataset,
22
+ )
23
+ from mlflow.entities.dataset_input import DatasetInput
24
+ from mlflow.entities.input_tag import InputTag
25
+ from mlflow.entities.logged_model_input import LoggedModelInput
26
+ from mlflow.exceptions import MlflowException
27
+ from mlflow.models.evaluation.utils.trace import configure_autologging_for_evaluation
28
+ from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
29
+ from mlflow.store.artifact.utils.models import _parse_model_id_if_present
30
+ from mlflow.tracking.artifact_utils import _download_artifact_from_uri
31
+ from mlflow.tracking.client import MlflowClient
32
+ from mlflow.tracking.fluent import _set_active_model
33
+ from mlflow.utils import _get_fully_qualified_class_name
34
+ from mlflow.utils.annotations import developer_stable
35
+ from mlflow.utils.class_utils import _get_class_from_string
36
+ from mlflow.utils.file_utils import TempDir
37
+ from mlflow.utils.mlflow_tags import MLFLOW_DATASET_CONTEXT
38
+ from mlflow.utils.proto_json_utils import NumpyEncoder
39
+
40
+ try:
41
+ # `pandas` is not required for `mlflow-skinny`.
42
+ import pandas as pd
43
+ except ImportError:
44
+ pass
45
+
46
+ _logger = logging.getLogger(__name__)
47
+
48
+
49
+ class _ModelType:
50
+ REGRESSOR = "regressor"
51
+ CLASSIFIER = "classifier"
52
+ QUESTION_ANSWERING = "question-answering"
53
+ TEXT_SUMMARIZATION = "text-summarization"
54
+ TEXT = "text"
55
+ RETRIEVER = "retriever"
56
+ # This model type is used for Mosaic AI Agent evaluation and only available in Databricks
57
+ # https://docs.databricks.com/en/generative-ai/agent-evaluation/index.html
58
+ DATABRICKS_AGENT = "databricks-agent"
59
+
60
+ def __init__(self):
61
+ raise NotImplementedError("This class is not meant to be instantiated.")
62
+
63
+ @classmethod
64
+ def values(cls):
65
+ return (
66
+ cls.REGRESSOR,
67
+ cls.CLASSIFIER,
68
+ cls.QUESTION_ANSWERING,
69
+ cls.TEXT_SUMMARIZATION,
70
+ cls.TEXT,
71
+ cls.RETRIEVER,
72
+ )
73
+
74
+
75
+ class EvaluationMetric:
76
+ '''
77
+ An evaluation metric.
78
+
79
+ Args:
80
+ eval_fn: A function that computes the metric with the following signature:
81
+
82
+ .. code-block:: python
83
+
84
+ def eval_fn(
85
+ predictions: pandas.Series,
86
+ targets: pandas.Series,
87
+ metrics: Dict[str, MetricValue],
88
+ **kwargs,
89
+ ) -> Union[float, MetricValue]:
90
+ """
91
+ Args:
92
+ predictions: A pandas Series containing the predictions made by the model.
93
+ targets: (Optional) A pandas Series containing the corresponding labels
94
+ for the predictions made on that input.
95
+ metrics: (Optional) A dictionary containing the metrics calculated by the
96
+ default evaluator. The keys are the names of the metrics and the values
97
+ are the metric values. To access the MetricValue for the metrics
98
+ calculated by the system, make sure to specify the type hint for this
99
+ parameter as Dict[str, MetricValue]. Refer to the DefaultEvaluator
100
+ behavior section for what metrics will be returned based on the type of
101
+ model (i.e. classifier or regressor).
102
+ kwargs: Includes a list of args that are used to compute the metric. These
103
+ args could be information coming from input data, model outputs,
104
+ other metrics, or parameters specified in the `evaluator_config`
105
+ argument of the `mlflow.evaluate` API.
106
+
107
+ Returns: MetricValue with per-row scores, per-row justifications, and aggregate
108
+ results.
109
+ """
110
+ ...
111
+
112
+ name: The name of the metric.
113
+ greater_is_better: Whether a greater value of the metric is better.
114
+ long_name: (Optional) The long name of the metric. For example,
115
+ ``"root_mean_squared_error"`` for ``"mse"``.
116
+ version: (Optional) The metric version. For example ``v1``.
117
+ metric_details: (Optional) A description of the metric and how it is calculated.
118
+ metric_metadata: (Optional) A dictionary containing metadata for the metric.
119
+ genai_metric_args: (Optional) A dictionary containing arguments specified by users
120
+ when calling make_genai_metric or make_genai_metric_from_prompt. Those args
121
+ are persisted so that we can deserialize the same metric object later.
122
+ '''
123
+
124
+ def __init__(
125
+ self,
126
+ eval_fn,
127
+ name,
128
+ greater_is_better,
129
+ long_name=None,
130
+ version=None,
131
+ metric_details=None,
132
+ metric_metadata=None,
133
+ genai_metric_args=None,
134
+ ):
135
+ self.eval_fn = eval_fn
136
+ self.name = name
137
+ self.greater_is_better = greater_is_better
138
+ self.long_name = long_name or name
139
+ self.version = version
140
+ self.metric_details = metric_details
141
+ self.metric_metadata = metric_metadata
142
+ self.genai_metric_args = genai_metric_args
143
+
144
+ def __str__(self):
145
+ parts = [f"name={self.name}, greater_is_better={self.greater_is_better}"]
146
+
147
+ if self.long_name:
148
+ parts.append(f"long_name={self.long_name}")
149
+ if self.version:
150
+ parts.append(f"version={self.version}")
151
+ if self.metric_details:
152
+ parts.append(f"metric_details={self.metric_details}")
153
+ if self.metric_metadata:
154
+ parts.append(f"metric_metadata={self.metric_metadata}")
155
+
156
+ return "EvaluationMetric(" + ", ".join(parts) + ")"
157
+
158
+
159
+ # NB: we need this function because we cannot modify the signature of
160
+ # a class's __call__ method after the class has been defined.
161
+ # This is also useful to distinguish between the metric signatures with different eval_fn signatures
162
+ def _generate_eval_metric_class(eval_fn, require_strict_signature=False):
163
+ """
164
+ Dynamically generate a GenAIEvaluationMetric class that can be used to evaluate the metric
165
+ on the given input data. The generated class is callable with a __call__ method that
166
+ takes the arguments specified in the signature of the eval_fn function.
167
+
168
+ Args:
169
+ eval_fn: the evaluation function of the EvaluationMetric.
170
+ require_strict_signature: (Optional) Whether the eval_fn needs to follow a strict signature.
171
+ If True, then the eval_fn must follow below signature:
172
+
173
+ .. code-block:: python
174
+
175
+ def eval_fn(
176
+ predictions: "pd.Series",
177
+ metrics: Dict[str, MetricValue],
178
+ inputs: "pd.Series",
179
+ *args,
180
+ ) -> MetricValue:
181
+ pass
182
+
183
+ When generating a metric from `make_genai_metric`, this should be set to True.
184
+ Default to False.
185
+
186
+ Returns:
187
+ A dynamically generated callable CallableEvaluationMetric class.
188
+ """
189
+ from mlflow.metrics.base import MetricValue
190
+
191
+ if require_strict_signature:
192
+ allowed_kwargs_names = [
193
+ param_name
194
+ for param_name in inspect.signature(eval_fn).parameters.keys()
195
+ if param_name not in ["predictions", "metrics", "inputs"]
196
+ ]
197
+
198
+ def genai_call_method(
199
+ self,
200
+ *,
201
+ predictions: Union[pd.Series, str, list[str]],
202
+ inputs: Union[pd.Series, str, list[str]],
203
+ metrics: Optional[dict[str, MetricValue]] = None,
204
+ **kwargs,
205
+ ) -> MetricValue:
206
+ if missed_kwargs := set(allowed_kwargs_names) - set(kwargs.keys()):
207
+ raise MlflowException.invalid_parameter_value(
208
+ f"Missing required arguments: {missed_kwargs}",
209
+ )
210
+ if extra_kwargs := set(kwargs.keys()) - set(allowed_kwargs_names):
211
+ raise MlflowException.invalid_parameter_value(
212
+ f"Unexpected arguments: {extra_kwargs}",
213
+ )
214
+ return self.eval_fn(
215
+ _convert_val_to_pd_Series(predictions, "predictions"),
216
+ metrics or {},
217
+ _convert_val_to_pd_Series(inputs, "inputs"),
218
+ # Note: based on https://github.com/mlflow/mlflow/blob/4fef77afdbe4d76302cb0b1aad2bd72b5cde64e9/mlflow/metrics/genai/genai_metric.py#L49-L53
219
+ # the extra params passed https://github.com/mlflow/mlflow/blob/4fef77afdbe4d76302cb0b1aad2bd72b5cde64e9/mlflow/metrics/genai/genai_metric.py#L513
220
+ # should always be pandas Series
221
+ *[
222
+ _convert_val_to_pd_Series(kwargs[arg_name], arg_name)
223
+ for arg_name in allowed_kwargs_names
224
+ ],
225
+ )
226
+
227
+ genai_call_method.__signature__ = Signature(
228
+ parameters=[
229
+ Parameter("self", Parameter.POSITIONAL_OR_KEYWORD),
230
+ Parameter(
231
+ "predictions",
232
+ Parameter.KEYWORD_ONLY,
233
+ annotation=Union[pd.Series, str, list[str]],
234
+ ),
235
+ Parameter(
236
+ "inputs",
237
+ Parameter.KEYWORD_ONLY,
238
+ annotation=Union[pd.Series, str, list[str]],
239
+ ),
240
+ Parameter(
241
+ "metrics",
242
+ Parameter.KEYWORD_ONLY,
243
+ annotation=Optional[dict[str, MetricValue]],
244
+ default=None,
245
+ ),
246
+ *[
247
+ Parameter(
248
+ name, Parameter.KEYWORD_ONLY, annotation=Union[pd.Series, str, list[str]]
249
+ )
250
+ for name in allowed_kwargs_names
251
+ ],
252
+ ]
253
+ )
254
+ genai_call_method.__doc__ = f"""
255
+ Evaluate the metric on the given inputs and predictions.
256
+ Note: only keyword arguments are supported.
257
+
258
+ Args:
259
+ predictions: predictions made by the model.
260
+ inputs: inputs used to make the predictions.
261
+ metrics: metrics calculated by the default evaluator.
262
+ kwargs: additional arguments used to compute the metric.
263
+ Required arguments: {allowed_kwargs_names}
264
+
265
+ Returns:
266
+ evaluation result as MetricValue object.
267
+ """
268
+ call_method = genai_call_method
269
+
270
+ else:
271
+
272
+ def _call_method(
273
+ self,
274
+ **kwargs,
275
+ ) -> MetricValue:
276
+ return self.eval_fn(**kwargs)
277
+
278
+ allowed_kwargs_params = inspect.signature(eval_fn).parameters
279
+ _call_method.__signature__ = Signature(
280
+ parameters=[
281
+ Parameter("self", Parameter.POSITIONAL_OR_KEYWORD),
282
+ *[
283
+ Parameter(
284
+ name,
285
+ Parameter.KEYWORD_ONLY,
286
+ annotation=allowed_kwargs_params[name].annotation,
287
+ )
288
+ for name in allowed_kwargs_params.keys()
289
+ ],
290
+ ]
291
+ )
292
+ _call_method.__doc__ = f"""
293
+ Evaluate the metric on the given inputs and predictions.
294
+ Note: only keyword arguments are supported.
295
+
296
+ Args:
297
+ kwargs: additional arguments used to compute the metric.
298
+ Required arguments: {list(allowed_kwargs_params.keys())}
299
+
300
+ Returns:
301
+ evaluation result as MetricValue object.
302
+ """
303
+ call_method = _call_method
304
+
305
+ return type(
306
+ "CallableEvaluationMetric",
307
+ (EvaluationMetric,),
308
+ {"__call__": call_method},
309
+ )
310
+
311
+
312
+ def _convert_val_to_pd_Series(val, name):
313
+ if val is not None and not isinstance(val, pd.Series):
314
+ if isinstance(val, str):
315
+ return pd.Series([val])
316
+ elif isinstance(val, list):
317
+ return pd.Series(val)
318
+ else:
319
+ raise TypeError(
320
+ f"Expected {name} to be a string, list, or Pandas Series, got {type(val)}"
321
+ )
322
+ return val
323
+
324
+
325
+ def make_metric(
326
+ *,
327
+ eval_fn,
328
+ greater_is_better,
329
+ name=None,
330
+ long_name=None,
331
+ version=None,
332
+ metric_details=None,
333
+ metric_metadata=None,
334
+ genai_metric_args=None,
335
+ ):
336
+ '''
337
+ A factory function to create an :py:class:`EvaluationMetric` object.
338
+
339
+ Args:
340
+ eval_fn: A function that computes the metric with the following signature:
341
+
342
+ .. code-block:: python
343
+
344
+ def eval_fn(
345
+ predictions: pandas.Series,
346
+ targets: pandas.Series,
347
+ metrics: Dict[str, MetricValue],
348
+ **kwargs,
349
+ ) -> Union[float, MetricValue]:
350
+ """
351
+ Args:
352
+ predictions: A pandas Series containing the predictions made by the model.
353
+ targets: (Optional) A pandas Series containing the corresponding labels
354
+ for the predictions made on that input.
355
+ metrics: (Optional) A dictionary containing the metrics calculated by the
356
+ default evaluator. The keys are the names of the metrics and the values
357
+ are the metric values. To access the MetricValue for the metrics
358
+ calculated by the system, make sure to specify the type hint for this
359
+ parameter as Dict[str, MetricValue]. Refer to the DefaultEvaluator
360
+ behavior section for what metrics will be returned based on the type of
361
+ model (i.e. classifier or regressor). kwargs: Includes a list of args
362
+ that are used to compute the metric. These args could information coming
363
+ from input data, model outputs or parameters specified in the
364
+ `evaluator_config` argument of the `mlflow.evaluate` API.
365
+ kwargs: Includes a list of args that are used to compute the metric. These
366
+ args could be information coming from input data, model outputs,
367
+ other metrics, or parameters specified in the `evaluator_config`
368
+ argument of the `mlflow.evaluate` API.
369
+
370
+ Returns: MetricValue with per-row scores, per-row justifications, and aggregate
371
+ results.
372
+ """
373
+ ...
374
+
375
+ greater_is_better: Whether a greater value of the metric is better.
376
+ name: The name of the metric. This argument must be specified if ``eval_fn`` is a lambda
377
+ function or the ``eval_fn.__name__`` attribute is not available.
378
+ long_name: (Optional) The long name of the metric. For example, ``"mean_squared_error"``
379
+ for ``"mse"``.
380
+ version: (Optional) The metric version. For example ``v1``.
381
+ metric_details: (Optional) A description of the metric and how it is calculated.
382
+ metric_metadata: (Optional) A dictionary containing metadata for the metric.
383
+ genai_metric_args: (Optional) A dictionary containing arguments specified by users
384
+ when calling make_genai_metric or make_genai_metric_from_prompt. Those args
385
+ are persisted so that we can deserialize the same metric object later.
386
+
387
+ .. seealso::
388
+
389
+ - :py:class:`mlflow.models.EvaluationMetric`
390
+ - :py:func:`mlflow.evaluate`
391
+ '''
392
+ return _make_metric(
393
+ eval_fn=eval_fn,
394
+ greater_is_better=greater_is_better,
395
+ name=name,
396
+ long_name=long_name,
397
+ version=version,
398
+ metric_details=metric_details,
399
+ metric_metadata=metric_metadata,
400
+ genai_metric_args=genai_metric_args,
401
+ require_strict_signature=False,
402
+ )
403
+
404
+
405
+ def _make_metric(
406
+ *,
407
+ eval_fn,
408
+ greater_is_better,
409
+ name=None,
410
+ long_name=None,
411
+ version=None,
412
+ metric_details=None,
413
+ metric_metadata=None,
414
+ genai_metric_args=None,
415
+ require_strict_signature=False,
416
+ ):
417
+ '''
418
+ A factory function to create an :py:class:`EvaluationMetric` object.
419
+
420
+ Args:
421
+ eval_fn: A function that computes the metric with the following signature:
422
+
423
+ .. code-block:: python
424
+
425
+ def eval_fn(
426
+ predictions: pandas.Series,
427
+ targets: pandas.Series,
428
+ metrics: Dict[str, MetricValue],
429
+ **kwargs,
430
+ ) -> Union[float, MetricValue]:
431
+ """
432
+ Args:
433
+ predictions: A pandas Series containing the predictions made by the model.
434
+ targets: (Optional) A pandas Series containing the corresponding labels
435
+ for the predictions made on that input.
436
+ metrics: (Optional) A dictionary containing the metrics calculated by the
437
+ default evaluator. The keys are the names of the metrics and the values
438
+ are the metric values. To access the MetricValue for the metrics
439
+ calculated by the system, make sure to specify the type hint for this
440
+ parameter as Dict[str, MetricValue]. Refer to the DefaultEvaluator
441
+ behavior section for what metrics will be returned based on the type of
442
+ model (i.e. classifier or regressor). kwargs: Includes a list of args
443
+ that are used to compute the metric. These args could information coming
444
+ from input data, model outputs or parameters specified in the
445
+ `evaluator_config` argument of the `mlflow.evaluate` API.
446
+ kwargs: Includes a list of args that are used to compute the metric. These
447
+ args could be information coming from input data, model outputs,
448
+ other metrics, or parameters specified in the `evaluator_config`
449
+ argument of the `mlflow.evaluate` API.
450
+
451
+ Returns: MetricValue with per-row scores, per-row justifications, and aggregate
452
+ results.
453
+ """
454
+ ...
455
+
456
+ greater_is_better: Whether a greater value of the metric is better.
457
+ name: The name of the metric. This argument must be specified if ``eval_fn`` is a lambda
458
+ function or the ``eval_fn.__name__`` attribute is not available.
459
+ long_name: (Optional) The long name of the metric. For example, ``"mean_squared_error"``
460
+ for ``"mse"``.
461
+ version: (Optional) The metric version. For example ``v1``.
462
+ metric_details: (Optional) A description of the metric and how it is calculated.
463
+ metric_metadata: (Optional) A dictionary containing metadata for the metric.
464
+ genai_metric_args: (Optional) A dictionary containing arguments specified by users
465
+ when calling make_genai_metric or make_genai_metric_from_prompt. Those args
466
+ are persisted so that we can deserialize the same metric object later.
467
+ require_strict_signature: (Optional) Whether the eval_fn needs to follow a strict signature.
468
+ If True, then the eval_fn must follow below signature:
469
+
470
+ .. code-block:: python
471
+
472
+ def eval_fn(
473
+ predictions: "pd.Series",
474
+ metrics: Dict[str, MetricValue],
475
+ inputs: "pd.Series",
476
+ *args,
477
+ ) -> MetricValue:
478
+ pass
479
+
480
+ When generating a metric from `make_genai_metric`, this should be set to True.
481
+ Default to False.
482
+
483
+ .. seealso::
484
+
485
+ - :py:class:`mlflow.models.EvaluationMetric`
486
+ - :py:func:`mlflow.evaluate`
487
+ '''
488
+ if name is None:
489
+ if isinstance(eval_fn, FunctionType) and eval_fn.__name__ == "<lambda>":
490
+ raise MlflowException(
491
+ "`name` must be specified if `eval_fn` is a lambda function.",
492
+ INVALID_PARAMETER_VALUE,
493
+ )
494
+ if not hasattr(eval_fn, "__name__"):
495
+ raise MlflowException(
496
+ "`name` must be specified if `eval_fn` does not have a `__name__` attribute.",
497
+ INVALID_PARAMETER_VALUE,
498
+ )
499
+ name = eval_fn.__name__
500
+
501
+ if "/" in name:
502
+ raise MlflowException(
503
+ f"Invalid metric name '{name}'. Metric names cannot include forward slashes ('/').",
504
+ INVALID_PARAMETER_VALUE,
505
+ )
506
+
507
+ if not name.isidentifier():
508
+ _logger.warning(
509
+ f"The metric name '{name}' provided is not a valid Python identifier, which will "
510
+ "prevent its use as a base metric for derived metrics. Please use a valid identifier "
511
+ "to enable creation of derived metrics that use the given metric."
512
+ )
513
+
514
+ if keyword.iskeyword(name):
515
+ _logger.warning(
516
+ f"The metric name '{name}' is a reserved Python keyword, which will "
517
+ "prevent its use as a base metric for derived metrics. Please use a valid identifier "
518
+ "to enable creation of derived metrics that use the given metric."
519
+ )
520
+
521
+ if name in ["predictions", "targets", "metrics"]:
522
+ _logger.warning(
523
+ f"The metric name '{name}' is used as a special parameter in MLflow metrics, which "
524
+ "will prevent its use as a base metric for derived metrics. Please use a different "
525
+ "name to enable creation of derived metrics that use the given metric."
526
+ )
527
+
528
+ return _generate_eval_metric_class(eval_fn, require_strict_signature=require_strict_signature)(
529
+ eval_fn=eval_fn,
530
+ name=name,
531
+ greater_is_better=greater_is_better,
532
+ long_name=long_name,
533
+ version=version,
534
+ metric_details=metric_details,
535
+ metric_metadata=metric_metadata,
536
+ genai_metric_args=genai_metric_args,
537
+ )
538
+
539
+
540
+ @developer_stable
541
+ class EvaluationArtifact(metaclass=ABCMeta):
542
+ """
543
+ A model evaluation artifact containing an artifact uri and content.
544
+ """
545
+
546
+ def __init__(self, uri, content=None):
547
+ self._uri = uri
548
+ self._content = content
549
+
550
+ @abstractmethod
551
+ def _load_content_from_file(self, local_artifact_path):
552
+ """
553
+ Abstract interface to load the content from local artifact file path,
554
+ and return the loaded content.
555
+ """
556
+
557
+ def _load(self, local_artifact_path=None):
558
+ """
559
+ If ``local_artifact_path`` is ``None``, download artifact from the artifact uri.
560
+ Otherwise, load artifact content from the specified path. Assign the loaded content to
561
+ ``self._content``, and return the loaded content.
562
+ """
563
+ if local_artifact_path is not None:
564
+ self._content = self._load_content_from_file(local_artifact_path)
565
+ else:
566
+ with TempDir() as temp_dir:
567
+ temp_dir_path = temp_dir.path()
568
+ _download_artifact_from_uri(self._uri, temp_dir_path)
569
+ local_artifact_file = temp_dir.path(os.listdir(temp_dir_path)[0])
570
+ self._content = self._load_content_from_file(local_artifact_file)
571
+ return self._content
572
+
573
+ @abstractmethod
574
+ def _save(self, output_artifact_path):
575
+ """Save artifact content into specified path."""
576
+
577
+ @property
578
+ def content(self):
579
+ """
580
+ The content of the artifact (representation varies)
581
+ """
582
+ if self._content is None:
583
+ self._load()
584
+ return self._content
585
+
586
+ @property
587
+ def uri(self) -> str:
588
+ """
589
+ The URI of the artifact
590
+ """
591
+ return self._uri
592
+
593
+ def __repr__(self):
594
+ return f"{self.__class__.__name__}(uri='{self.uri}')"
595
+
596
+
597
+ class EvaluationResult:
598
+ """
599
+ Represents the model evaluation outputs of a `mlflow.evaluate()` API call, containing
600
+ both scalar metrics and output artifacts such as performance plots.
601
+ """
602
+
603
+ def __init__(self, metrics, artifacts, run_id=None):
604
+ self._metrics = metrics
605
+ self._artifacts = artifacts
606
+ self._run_id = (
607
+ run_id
608
+ if run_id is not None
609
+ else (mlflow.active_run().info.run_id if mlflow.active_run() is not None else None)
610
+ )
611
+
612
+ @classmethod
613
+ def load(cls, path):
614
+ """Load the evaluation results from the specified local filesystem path"""
615
+ with open(os.path.join(path, "metrics.json")) as fp:
616
+ metrics = json.load(fp)
617
+
618
+ with open(os.path.join(path, "artifacts_metadata.json")) as fp:
619
+ artifacts_metadata = json.load(fp)
620
+
621
+ artifacts = {}
622
+
623
+ artifacts_dir = os.path.join(path, "artifacts")
624
+
625
+ for artifact_name, meta in artifacts_metadata.items():
626
+ uri = meta["uri"]
627
+ ArtifactCls = _get_class_from_string(meta["class_name"])
628
+ artifact = ArtifactCls(uri=uri)
629
+ filename = pathlib.Path(urllib.parse.urlparse(uri).path).name
630
+ artifact._load(os.path.join(artifacts_dir, filename))
631
+ artifacts[artifact_name] = artifact
632
+
633
+ return EvaluationResult(metrics=metrics, artifacts=artifacts)
634
+
635
+ def save(self, path):
636
+ """Write the evaluation results to the specified local filesystem path"""
637
+ os.makedirs(path, exist_ok=True)
638
+ with open(os.path.join(path, "metrics.json"), "w") as fp:
639
+ json.dump(self.metrics, fp, cls=NumpyEncoder)
640
+
641
+ artifacts_metadata = {
642
+ artifact_name: {
643
+ "uri": artifact.uri,
644
+ "class_name": _get_fully_qualified_class_name(artifact),
645
+ }
646
+ for artifact_name, artifact in self.artifacts.items()
647
+ }
648
+ with open(os.path.join(path, "artifacts_metadata.json"), "w") as fp:
649
+ json.dump(artifacts_metadata, fp)
650
+
651
+ artifacts_dir = os.path.join(path, "artifacts")
652
+ os.makedirs(artifacts_dir, exist_ok=True)
653
+
654
+ for artifact in self.artifacts.values():
655
+ filename = pathlib.Path(urllib.parse.urlparse(artifact.uri).path).name
656
+ artifact._save(os.path.join(artifacts_dir, filename))
657
+
658
+ @property
659
+ def metrics(self) -> dict[str, Any]:
660
+ """
661
+ A dictionary mapping scalar metric names to scalar metric values
662
+ """
663
+ return self._metrics
664
+
665
+ @property
666
+ def artifacts(self) -> dict[str, "mlflow.models.EvaluationArtifact"]:
667
+ """
668
+ A dictionary mapping standardized artifact names (e.g. "roc_data") to
669
+ artifact content and location information
670
+ """
671
+ return self._artifacts
672
+
673
+ @property
674
+ def run_id(self) -> str:
675
+ """
676
+ The ID of the MLflow Run to which the evaluation results were logged.
677
+ """
678
+ return self._run_id
679
+
680
+ @property
681
+ def tables(self) -> dict[str, "pd.DataFrame"]:
682
+ """
683
+ A dictionary mapping standardized artifact names (e.g. "eval_results_table") to
684
+ corresponding table content as pandas DataFrame.
685
+ """
686
+ eval_tables = {}
687
+ if self._run_id is None:
688
+ _logger.warning("Cannot load eval_results_table because run_id is not specified.")
689
+ return eval_tables
690
+
691
+ for table_name, table_path in self._artifacts.items():
692
+ path = urllib.parse.urlparse(table_path.uri).path
693
+ table_fileName = os.path.basename(path)
694
+ try:
695
+ eval_tables[table_name] = mlflow.load_table(table_fileName, run_ids=[self._run_id])
696
+ except Exception:
697
+ pass # Swallow the exception since we assume its not a table.
698
+
699
+ return eval_tables
700
+
701
+
702
+ @developer_stable
703
+ class ModelEvaluator(metaclass=ABCMeta):
704
+ @classmethod
705
+ @abstractmethod
706
+ def can_evaluate(cls, *, model_type, evaluator_config, **kwargs) -> bool:
707
+ """
708
+ Args:
709
+ model_type: A string describing the model type (e.g., "regressor", "classifier", …).
710
+ evaluator_config: A dictionary of additional configurations for
711
+ the evaluator.
712
+ kwargs: For forwards compatibility, a placeholder for additional arguments
713
+ that may be added to the evaluation interface in the future.
714
+
715
+ Returns:
716
+ True if the evaluator can evaluate the specified model on the
717
+ specified dataset. False otherwise.
718
+ """
719
+
720
+ @abstractmethod
721
+ def evaluate(
722
+ self,
723
+ *,
724
+ model_type,
725
+ dataset,
726
+ run_id,
727
+ evaluator_config,
728
+ model=None,
729
+ extra_metrics=None,
730
+ custom_artifacts=None,
731
+ predictions=None,
732
+ **kwargs,
733
+ ):
734
+ """
735
+ The abstract API to log metrics and artifacts, and return evaluation results.
736
+
737
+ Args:
738
+ model_type: A string describing the model type
739
+ (e.g., ``"regressor"``, ``"classifier"``, …).
740
+ dataset: An instance of `mlflow.models.evaluation.base._EvaluationDataset`
741
+ containing features and labels (optional) for model evaluation.
742
+ run_id: The ID of the MLflow Run to which to log results.
743
+ evaluator_config: A dictionary of additional configurations for
744
+ the evaluator.
745
+ model: A pyfunc model instance. If None, the model output is supposed to be found in
746
+ ``dataset.predictions_data``.
747
+ extra_metrics: A list of :py:class:`EvaluationMetric` objects.
748
+ custom_artifacts: A list of callable custom artifact functions.
749
+ predictions: The column name of the model output column that is used for evaluation.
750
+ This is only used when a model returns a pandas dataframe that contains
751
+ multiple columns.
752
+ kwargs: For forwards compatibility, a placeholder for additional arguments that
753
+ may be added to the evaluation interface in the future.
754
+
755
+ Returns:
756
+ A :py:class:`mlflow.models.EvaluationResult` instance containing
757
+ evaluation metrics and artifacts for the model.
758
+ """
759
+
760
+
761
+ def list_evaluators():
762
+ """
763
+ Return a name list for all available Evaluators.
764
+ """
765
+ # import _model_evaluation_registry inside function to avoid circuit importing
766
+ from mlflow.models.evaluation.evaluator_registry import _model_evaluation_registry
767
+
768
+ return list(_model_evaluation_registry._registry.keys())
769
+
770
+
771
+ @contextmanager
772
+ def _start_run_or_reuse_active_run():
773
+ """
774
+ A manager context return:
775
+ - If there's an active run, return the active run id.
776
+ - otherwise start a mflow run with the specified run_id,
777
+ if specified run_id is None, start a new run.
778
+ """
779
+ active_run = mlflow.active_run()
780
+ if not active_run:
781
+ # Note `mlflow.start_run` throws if `run_id` is not found.
782
+ with mlflow.start_run() as run:
783
+ yield run.info.run_id
784
+ else:
785
+ yield active_run.info.run_id
786
+
787
+
788
+ # NB: We often pass around evaluator name, config, and its instance together. Ideally, the
789
+ # evaluator class should have name and config as class attributes, however, it was not
790
+ # designed that way. Adding them while keeping backward compatibility is not trivial.
791
+ # So, we use a dataclass to bundle them together.
792
+ @dataclass
793
+ class EvaluatorBundle:
794
+ name: str
795
+ evaluator: ModelEvaluator
796
+ config: dict[str, Any]
797
+
798
+
799
+ def _resolve_default_evaluator(model_type, default_config) -> list[EvaluatorBundle]:
800
+ """
801
+ Determine which built-in evaluators should be used for the given model type by default.
802
+
803
+ Previously, MLflow evaluate API only had a single "default" evaluator used for all models like
804
+ classifier, regressor, etc. We split it into multiple built-in evaluators for different model
805
+ types for maintainability, but in order to maintain backward compatibility, we need to map
806
+ the "default" provided by users to the correct built-in evaluators.
807
+
808
+ Args:
809
+ model_type: A string describing the model type (e.g., "regressor", "classifier", …).
810
+ default_config: A dictionary of configurations for the "default" evaluator. If any
811
+ non-default built-in evaluator is applicable, this config will be applied to them.
812
+ """
813
+ from mlflow.models.evaluation.evaluator_registry import _model_evaluation_registry
814
+
815
+ builtin_evaluators = []
816
+ for name in _model_evaluation_registry._registry:
817
+ evaluator = _model_evaluation_registry.get_evaluator(name)
818
+ if (
819
+ name != "default"
820
+ and _model_evaluation_registry.is_builtin(name)
821
+ and evaluator.can_evaluate(model_type=model_type, evaluator_config=default_config)
822
+ ):
823
+ builtin_evaluators.append(EvaluatorBundle(name, evaluator, default_config))
824
+
825
+ # We should use DefaultEvaluator only if there is no other built-in evaluator applicable.
826
+ if not builtin_evaluators:
827
+ default_evaluator = _model_evaluation_registry.get_evaluator("default")
828
+ builtin_evaluators = [EvaluatorBundle("default", default_evaluator, default_config)]
829
+
830
+ return builtin_evaluators
831
+
832
+
833
+ def resolve_evaluators_and_configs(
834
+ evaluators: Union[str, list[str], None],
835
+ evaluator_config: Union[dict[str, Any], None],
836
+ model_type: Optional[str] = None,
837
+ ) -> list[EvaluatorBundle]:
838
+ """
839
+ The `evaluators` and `evaluator_config` arguments of the `evaluate` API can be specified
840
+ in multiple ways. This function normalizes the arguments into a single format for easier
841
+ downstream processing.
842
+
843
+ Args:
844
+ evaluators: A string or a list of strings specifying the evaluators to use for model
845
+ evaluation. If None, all available evaluators will be used.
846
+ evaluator_config: A dictionary containing configuration items for the evaluators.
847
+ model_type: A string describing the model type (e.g., "regressor", "classifier", …).
848
+
849
+ Returns:
850
+ A list of EvaluatorBundle that contains name, evaluator, config for each evaluator.
851
+ """
852
+ from mlflow.models.evaluation.evaluator_registry import _model_evaluation_registry as rg
853
+
854
+ # NB: The `databricks-agents` package must be installed to use the 'databricks-agent' model
855
+ # type. Ideally this check should be done in the 'databricks-agent' evaluator implementation,
856
+ # but we need to do it here because the code won't reach the evaluator implementation if the
857
+ # package is not installed.
858
+ if model_type == _ModelType.DATABRICKS_AGENT:
859
+ try:
860
+ import databricks.agents # noqa: F401
861
+ except ImportError as e:
862
+ raise MlflowException(
863
+ message="Databricks Agents SDK must be installed to use the "
864
+ f"`{_ModelType.DATABRICKS_AGENT}` model type. Run `pip install databricks-agents` "
865
+ "to install the package and try again.",
866
+ error_code=INVALID_PARAMETER_VALUE,
867
+ ) from e
868
+
869
+ def check_nesting_config_dict(_evaluator_name_list, _evaluator_name_to_conf_map):
870
+ return isinstance(_evaluator_name_to_conf_map, dict) and all(
871
+ k in _evaluator_name_list and isinstance(v, dict)
872
+ for k, v in _evaluator_name_to_conf_map.items()
873
+ )
874
+
875
+ if evaluators is None:
876
+ # If no evaluators are specified, use all available evaluators.
877
+ evaluators = list(rg._registry.keys())
878
+
879
+ evaluator_config = evaluator_config or {}
880
+ if evaluator_config is not None and not any(
881
+ name in evaluator_config for name in evaluators
882
+ ):
883
+ # If evaluator config is passed but any of available evaluator key is not
884
+ # in the evaluator config, we assume the evaluator config to be a flat dict,
885
+ # which is globally applied to all evaluators.
886
+ evaluator_config = dict.fromkeys(evaluators, evaluator_config)
887
+
888
+ # Filter out evaluators that cannot evaluate the model type.
889
+ resolved = []
890
+ for name in evaluators:
891
+ evaluator = rg.get_evaluator(name)
892
+ config = evaluator_config.get(name, {})
893
+ if evaluator.can_evaluate(model_type=model_type, evaluator_config=config):
894
+ resolved.append(EvaluatorBundle(name=name, evaluator=evaluator, config=config))
895
+
896
+ # If any of built-in evaluator can apply, skip "default" evaluator.
897
+ default = next((ev for ev in resolved if ev.name == "default"), None)
898
+ non_default_builtins = [
899
+ ev for ev in resolved if ev.name != "default" and rg.is_builtin(ev.name)
900
+ ]
901
+ if default and non_default_builtins:
902
+ resolved.remove(default)
903
+ # Apply default config (passed like `evaluator_config={"default": config}`) to
904
+ # non-default built-in evaluators (e.g., ClassifierEvaluator) if they don't have
905
+ # explicitly specified configs. This is for backward compatibility where we only
906
+ # had a single "default" evaluator used for all models.
907
+ # For example, if the user passes this for a classifier model:
908
+ # evaluator_config = {"default": my_config}
909
+ # it should be equivalent to
910
+ # evaluator_config = {"classifier": my_config, "shap": my_config}
911
+ for ev in non_default_builtins:
912
+ ev.config = ev.config or default.config
913
+
914
+ return resolved
915
+
916
+ elif isinstance(evaluators, str):
917
+ # Single evaluator name specified
918
+ if not (evaluator_config is None or isinstance(evaluator_config, dict)):
919
+ raise MlflowException(
920
+ message="If `evaluators` argument is the name of an evaluator, evaluator_config"
921
+ " must be None or a dict containing config items for the evaluator.",
922
+ error_code=INVALID_PARAMETER_VALUE,
923
+ )
924
+
925
+ evaluator_config = evaluator_config or {}
926
+ if evaluators == "default":
927
+ # Previously we only had a single "default" evaluator used for all models.
928
+ # We need to map "default" to the new dedicated builtin evaluators.
929
+ return _resolve_default_evaluator(model_type, evaluator_config)
930
+ elif rg.is_registered(evaluators):
931
+ return [EvaluatorBundle(evaluators, rg.get_evaluator(evaluators), evaluator_config)]
932
+ else:
933
+ return []
934
+
935
+ elif isinstance(evaluators, list):
936
+ if evaluator_config is not None and not check_nesting_config_dict(
937
+ evaluators, evaluator_config
938
+ ):
939
+ raise MlflowException(
940
+ message="If `evaluators` argument is an evaluator name list, evaluator_config "
941
+ "must be a dict containing mapping from evaluator name to individual "
942
+ "evaluator config dict.",
943
+ error_code=INVALID_PARAMETER_VALUE,
944
+ )
945
+ evaluator_config = evaluator_config or {}
946
+
947
+ # Previously we only had a single "default" evaluator used for all models.
948
+ # We need to map "default" to the new dedicated builtin evaluators.
949
+ resolved = []
950
+ for name in evaluators:
951
+ config = evaluator_config.get(name, {})
952
+ if name == "default":
953
+ builtin_evaluators = _resolve_default_evaluator(model_type, config)
954
+ resolved.extend(builtin_evaluators)
955
+ else:
956
+ resolved.append(EvaluatorBundle(name, rg.get_evaluator(name), config))
957
+ return resolved
958
+ else:
959
+ raise MlflowException(
960
+ message="Invalid `evaluators` and `evaluator_config` arguments. "
961
+ "Please refer to the documentation for correct usage.",
962
+ error_code=INVALID_PARAMETER_VALUE,
963
+ )
964
+
965
+
966
+ def _model_validation_contains_model_comparison(validation_thresholds):
967
+ """
968
+ Helper function for determining if validation_thresholds contains
969
+ thresholds for model comparison: either min_relative_change or min_absolute_change
970
+ """
971
+ if not validation_thresholds:
972
+ return False
973
+ thresholds = validation_thresholds.values()
974
+ return any(
975
+ threshold.min_relative_change or threshold.min_absolute_change for threshold in thresholds
976
+ )
977
+
978
+
979
+ _last_failed_evaluator = None
980
+
981
+
982
+ def _get_last_failed_evaluator():
983
+ """
984
+ Return the evaluator name of the last failed evaluator when calling `evaluate`.
985
+ This can be used to check which evaluator fail when `evaluate` API fail.
986
+ """
987
+ return _last_failed_evaluator
988
+
989
+
990
+ # DO NOT CHANGE THE ORDER OF THE ARGUMENTS
991
+ # The order of the arguments need to be preserved. You can add new arguments at the end
992
+ # of the argument list, but do not change the order of the existing arguments.
993
+ def _evaluate(
994
+ *,
995
+ model,
996
+ model_type,
997
+ model_id,
998
+ dataset,
999
+ run_id,
1000
+ # The `evaluator_name_list` and `evaluator_name_to_conf_map` are not used by MLflow at all,
1001
+ # but we need to keep these for backward compatibility.
1002
+ evaluator_name_list,
1003
+ evaluator_name_to_conf_map,
1004
+ extra_metrics,
1005
+ custom_artifacts,
1006
+ predictions,
1007
+ evaluators,
1008
+ ):
1009
+ """
1010
+ The public API "evaluate" will verify argument first, and then pass normalized arguments
1011
+ to the _evaluate method.
1012
+ """
1013
+ global _last_failed_evaluator
1014
+ _last_failed_evaluator = None
1015
+
1016
+ client = MlflowClient()
1017
+
1018
+ model_uuid = getattr(model, "metadata", None)
1019
+
1020
+ if model_uuid is not None:
1021
+ model_uuid = model_uuid.model_uuid
1022
+ dataset._log_dataset_tag(client, run_id, model_uuid)
1023
+
1024
+ eval_results = []
1025
+ should_enable_tracing = model is not None # Do not enable tracing if static dataset is provided
1026
+ for eval_ in evaluators:
1027
+ _logger.debug(f"Evaluating the model with the {eval_.name} evaluator.")
1028
+ _last_failed_evaluator = eval_.name
1029
+ if eval_.evaluator.can_evaluate(model_type=model_type, evaluator_config=eval_.config):
1030
+ with configure_autologging_for_evaluation(enable_tracing=should_enable_tracing):
1031
+ eval_result = eval_.evaluator.evaluate(
1032
+ model=model,
1033
+ model_type=model_type,
1034
+ model_id=model_id,
1035
+ dataset=dataset,
1036
+ run_id=run_id,
1037
+ evaluator_config=eval_.config,
1038
+ extra_metrics=extra_metrics,
1039
+ custom_artifacts=custom_artifacts,
1040
+ predictions=predictions,
1041
+ )
1042
+
1043
+ if eval_result is not None:
1044
+ eval_results.append(eval_result)
1045
+
1046
+ _last_failed_evaluator = None
1047
+
1048
+ if len(eval_results) == 0:
1049
+ raise MlflowException(
1050
+ message="The model could not be evaluated by any of the registered evaluators, please "
1051
+ "verify that the model type and other configs are set correctly.",
1052
+ error_code=INVALID_PARAMETER_VALUE,
1053
+ )
1054
+
1055
+ merged_eval_result = EvaluationResult({}, {}, None)
1056
+
1057
+ for eval_result in eval_results:
1058
+ merged_eval_result.metrics.update(eval_result.metrics)
1059
+ merged_eval_result.artifacts.update(eval_result.artifacts)
1060
+
1061
+ return merged_eval_result
1062
+
1063
+
1064
+ def _get_model_from_function(fn):
1065
+ from mlflow.pyfunc.model import _PythonModelPyfuncWrapper
1066
+
1067
+ class ModelFromFunction(mlflow.pyfunc.PythonModel):
1068
+ def predict(self, context, model_input: pd.DataFrame):
1069
+ return fn(model_input)
1070
+
1071
+ python_model = ModelFromFunction()
1072
+ return _PythonModelPyfuncWrapper(python_model, None, None)
1073
+
1074
+
1075
+ def _is_model_deployment_endpoint_uri(model: Any) -> bool:
1076
+ if not isinstance(model, str):
1077
+ return False
1078
+
1079
+ from mlflow.metrics.genai.model_utils import _parse_model_uri
1080
+
1081
+ try:
1082
+ schema, path = _parse_model_uri(model)
1083
+ return schema == "endpoints"
1084
+ except MlflowException:
1085
+ return False
1086
+
1087
+
1088
+ def _get_model_from_deployment_endpoint_uri(
1089
+ endpoint_uri: str, params: Optional[dict[str, Any]] = None
1090
+ ):
1091
+ from mlflow.metrics.genai.model_utils import _parse_model_uri
1092
+ from mlflow.pyfunc.model import ModelFromDeploymentEndpoint, _PythonModelPyfuncWrapper
1093
+
1094
+ _, endpoint = _parse_model_uri(endpoint_uri)
1095
+ params = params or {}
1096
+
1097
+ python_model = ModelFromDeploymentEndpoint(endpoint, params)
1098
+ return _PythonModelPyfuncWrapper(python_model, None, None)
1099
+
1100
+
1101
+ def evaluate( # noqa: D417
1102
+ model=None,
1103
+ data=None,
1104
+ *,
1105
+ model_type=None,
1106
+ targets=None,
1107
+ predictions=None,
1108
+ dataset_path=None,
1109
+ feature_names=None,
1110
+ evaluators=None,
1111
+ evaluator_config=None,
1112
+ extra_metrics=None,
1113
+ custom_artifacts=None,
1114
+ env_manager="local",
1115
+ model_config=None,
1116
+ inference_params=None,
1117
+ model_id=None,
1118
+ _called_from_genai_evaluate=False,
1119
+ ):
1120
+ '''
1121
+ Evaluate the model performance on given data and selected metrics.
1122
+
1123
+ This function evaluates a PyFunc model or custom callable on the specified dataset using
1124
+ specified ``evaluators``, and logs resulting metrics & artifacts to MLflow tracking server.
1125
+ Users can also skip setting ``model`` and put the model outputs in ``data`` directly for
1126
+ evaluation. For detailed information, please read
1127
+ `the Model Evaluation documentation <../../model-evaluation/index.html>`_.
1128
+
1129
+ Default Evaluator behavior:
1130
+ - The default evaluator, which can be invoked with ``evaluators="default"`` or
1131
+ ``evaluators=None``, supports model types listed below. For each pre-defined model type, the
1132
+ default evaluator evaluates your model on a selected set of metrics and generate artifacts
1133
+ like plots. Please find more details below.
1134
+
1135
+ - For both the ``"regressor"`` and ``"classifier"`` model types, the default evaluator
1136
+ generates model summary plots and feature importance plots using
1137
+ `SHAP <https://shap.readthedocs.io/en/latest/index.html>`_.
1138
+
1139
+ - For regressor models, the default evaluator additionally logs:
1140
+ - **metrics**: example_count, mean_absolute_error, mean_squared_error,
1141
+ root_mean_squared_error, sum_on_target, mean_on_target, r2_score, max_error,
1142
+ mean_absolute_percentage_error.
1143
+
1144
+ - For binary classifiers, the default evaluator additionally logs:
1145
+ - **metrics**: true_negatives, false_positives, false_negatives, true_positives, recall,
1146
+ precision, f1_score, accuracy_score, example_count, log_loss, roc_auc,
1147
+ precision_recall_auc.
1148
+ - **artifacts**: lift curve plot, precision-recall plot, ROC plot.
1149
+
1150
+ - For multiclass classifiers, the default evaluator additionally logs:
1151
+ - **metrics**: accuracy_score, example_count, f1_score_micro, f1_score_macro, log_loss
1152
+ - **artifacts**: A CSV file for "per_class_metrics" (per-class metrics includes
1153
+ true_negatives/false_positives/false_negatives/true_positives/recall/precision/roc_auc,
1154
+ precision_recall_auc), precision-recall merged curves plot, ROC merged curves plot.
1155
+
1156
+ - For question-answering models, the default evaluator logs:
1157
+ - **metrics**: ``exact_match``, ``token_count``, `toxicity`_ (requires `evaluate`_,
1158
+ `torch`_, `flesch_kincaid_grade_level`_ (requires `textstat`_) and `ari_grade_level`_.
1159
+ - **artifacts**: A JSON file containing the inputs, outputs, targets (if the ``targets``
1160
+ argument is supplied), and per-row metrics of the model in tabular format.
1161
+
1162
+ .. _toxicity:
1163
+ https://huggingface.co/spaces/evaluate-measurement/toxicity
1164
+
1165
+ .. _torch:
1166
+ https://pytorch.org/get-started/locally/
1167
+
1168
+ .. _transformers:
1169
+ https://huggingface.co/docs/transformers/installation
1170
+
1171
+ .. _ari_grade_level:
1172
+ https://en.wikipedia.org/wiki/Automated_readability_index
1173
+
1174
+ .. _flesch_kincaid_grade_level:
1175
+ https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch%E2%80%93Kincaid_grade_level
1176
+
1177
+ .. _evaluate:
1178
+ https://pypi.org/project/evaluate
1179
+
1180
+ .. _textstat:
1181
+ https://pypi.org/project/textstat
1182
+
1183
+ - For text-summarization models, the default evaluator logs:
1184
+ - **metrics**: ``token_count``, `ROUGE`_ (requires `evaluate`_, `nltk`_, and
1185
+ `rouge_score`_ to be installed), `toxicity`_ (requires `evaluate`_, `torch`_,
1186
+ `transformers`_), `ari_grade_level`_ (requires `textstat`_),
1187
+ `flesch_kincaid_grade_level`_ (requires `textstat`_).
1188
+ - **artifacts**: A JSON file containing the inputs, outputs, targets (if the ``targets``
1189
+ argument is supplied), and per-row metrics of the model in the tabular format.
1190
+
1191
+ .. _ROUGE:
1192
+ https://huggingface.co/spaces/evaluate-metric/rouge
1193
+
1194
+ .. _toxicity:
1195
+ https://huggingface.co/spaces/evaluate-measurement/toxicity
1196
+
1197
+ .. _torch:
1198
+ https://pytorch.org/get-started/locally/
1199
+
1200
+ .. _transformers:
1201
+ https://huggingface.co/docs/transformers/installation
1202
+
1203
+ .. _ari_grade_level:
1204
+ https://en.wikipedia.org/wiki/Automated_readability_index
1205
+
1206
+ .. _flesch_kincaid_grade_level:
1207
+ https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch%E2%80%93Kincaid_grade_level
1208
+
1209
+ .. _evaluate:
1210
+ https://pypi.org/project/evaluate
1211
+
1212
+ .. _nltk:
1213
+ https://pypi.org/project/nltk
1214
+
1215
+ .. _rouge_score:
1216
+ https://pypi.org/project/rouge-score
1217
+
1218
+ .. _textstat:
1219
+ https://pypi.org/project/textstat
1220
+
1221
+ - For text models, the default evaluator logs:
1222
+ - **metrics**: ``token_count``, `toxicity`_ (requires `evaluate`_, `torch`_,
1223
+ `transformers`_), `ari_grade_level`_ (requires `textstat`_),
1224
+ `flesch_kincaid_grade_level`_ (requires `textstat`_).
1225
+ - **artifacts**: A JSON file containing the inputs, outputs, targets (if the ``targets``
1226
+ argument is supplied), and per-row metrics of the model in tabular format.
1227
+
1228
+ .. _evaluate:
1229
+ https://pypi.org/project/evaluate
1230
+
1231
+ .. _toxicity:
1232
+ https://huggingface.co/spaces/evaluate-measurement/toxicity
1233
+
1234
+ .. _torch:
1235
+ https://pytorch.org/get-started/locally/
1236
+
1237
+ .. _transformers:
1238
+ https://huggingface.co/docs/transformers/installation
1239
+
1240
+ .. _ari_grade_level:
1241
+ https://en.wikipedia.org/wiki/Automated_readability_index
1242
+
1243
+ .. _flesch_kincaid_grade_level:
1244
+ https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests#Flesch%E2%80%93Kincaid_grade_level
1245
+
1246
+ .. _textstat:
1247
+ https://pypi.org/project/textstat
1248
+
1249
+ - For retriever models, the default evaluator logs:
1250
+ - **metrics**: :mod:`precision_at_k(k) <mlflow.metrics.precision_at_k>`,
1251
+ :mod:`recall_at_k(k) <mlflow.metrics.recall_at_k>` and
1252
+ :mod:`ndcg_at_k(k) <mlflow.metrics.ndcg_at_k>` - all have a default value of
1253
+ ``retriever_k`` = 3.
1254
+ - **artifacts**: A JSON file containing the inputs, outputs, targets, and per-row metrics
1255
+ of the model in tabular format.
1256
+
1257
+ - For sklearn models, the default evaluator additionally logs the model's evaluation criterion
1258
+ (e.g. mean accuracy for a classifier) computed by `model.score` method.
1259
+
1260
+ - The metrics/artifacts listed above are logged to the active MLflow run.
1261
+ If no active run exists, a new MLflow run is created for logging these metrics and
1262
+ artifacts.
1263
+
1264
+ - Additionally, information about the specified dataset - hash, name (if specified), path
1265
+ (if specified), and the UUID of the model that evaluated it - is logged to the
1266
+ ``mlflow.datasets`` tag.
1267
+
1268
+ - The available ``evaluator_config`` options for the default evaluator include:
1269
+ - **log_model_explainability**: A boolean value specifying whether or not to log model
1270
+ explainability insights, default value is True.
1271
+ - **log_explainer**: If True, log the explainer used to compute model explainability
1272
+ insights as a model. Default value is False.
1273
+ - **explainability_algorithm**: A string to specify the SHAP Explainer algorithm for model
1274
+ explainability. Supported algorithm includes: 'exact', 'permutation', 'partition',
1275
+ 'kernel'.
1276
+ If not set, ``shap.Explainer`` is used with the "auto" algorithm, which chooses the best
1277
+ Explainer based on the model.
1278
+ - **explainability_nsamples**: The number of sample rows to use for computing model
1279
+ explainability insights. Default value is 2000.
1280
+ - **explainability_kernel_link**: The kernel link function used by shap kernel explainer.
1281
+ Available values are "identity" and "logit". Default value is "identity".
1282
+ - **max_classes_for_multiclass_roc_pr**:
1283
+ For multiclass classification tasks, the maximum number of classes for which to log
1284
+ the per-class ROC curve and Precision-Recall curve. If the number of classes is
1285
+ larger than the configured maximum, these curves are not logged.
1286
+ - **metric_prefix**: An optional prefix to prepend to the name of each metric and artifact
1287
+ produced during evaluation.
1288
+ - **log_metrics_with_dataset_info**: A boolean value specifying whether or not to include
1289
+ information about the evaluation dataset in the name of each metric logged to MLflow
1290
+ Tracking during evaluation, default value is True.
1291
+ - **pos_label**: If specified, the positive label to use when computing classification
1292
+ metrics such as precision, recall, f1, etc. for binary classification models. For
1293
+ multiclass classification and regression models, this parameter will be ignored.
1294
+ - **average**: The averaging method to use when computing classification metrics such as
1295
+ precision, recall, f1, etc. for multiclass classification models
1296
+ (default: ``'weighted'``). For binary classification and regression models, this
1297
+ parameter will be ignored.
1298
+ - **sample_weights**: Weights for each sample to apply when computing model performance
1299
+ metrics.
1300
+ - **col_mapping**: A dictionary mapping column names in the input dataset or output
1301
+ predictions to column names used when invoking the evaluation functions.
1302
+ - **retriever_k**: A parameter used when ``model_type="retriever"`` as the number of
1303
+ top-ranked retrieved documents to use when computing the built-in metric
1304
+ :mod:`precision_at_k(k) <mlflow.metrics.precision_at_k>`,
1305
+ :mod:`recall_at_k(k) <mlflow.metrics.recall_at_k>` and
1306
+ :mod:`ndcg_at_k(k) <mlflow.metrics.ndcg_at_k>`. Default value is 3. For all other
1307
+ model types, this parameter will be ignored.
1308
+
1309
+ - Limitations of evaluation dataset:
1310
+ - For classification tasks, dataset labels are used to infer the total number of classes.
1311
+ - For binary classification tasks, the negative label value must be 0 or -1 or False, and
1312
+ the positive label value must be 1 or True.
1313
+
1314
+ - Limitations of metrics/artifacts computation:
1315
+ - For classification tasks, some metric and artifact computations require the model to
1316
+ output class probabilities. Currently, for scikit-learn models, the default evaluator
1317
+ calls the ``predict_proba`` method on the underlying model to obtain probabilities. For
1318
+ other model types, the default evaluator does not compute metrics/artifacts that require
1319
+ probability outputs.
1320
+
1321
+ - Limitations of default evaluator logging model explainability insights:
1322
+ - The ``shap.Explainer`` ``auto`` algorithm uses the ``Linear`` explainer for linear models
1323
+ and the ``Tree`` explainer for tree models. Because SHAP's ``Linear`` and ``Tree``
1324
+ explainers do not support multi-class classification, the default evaluator falls back to
1325
+ using the ``Exact`` or ``Permutation`` explainers for multi-class classification tasks.
1326
+ - Logging model explainability insights is not currently supported for PySpark models.
1327
+ - The evaluation dataset label values must be numeric or boolean, all feature values
1328
+ must be numeric, and each feature column must only contain scalar values.
1329
+
1330
+ - Limitations when environment restoration is enabled:
1331
+ - When environment restoration is enabled for the evaluated model (i.e. a non-local
1332
+ ``env_manager`` is specified), the model is loaded as a client that invokes a MLflow
1333
+ Model Scoring Server process in an independent Python environment with the model's
1334
+ training time dependencies installed. As such, methods like ``predict_proba`` (for
1335
+ probability outputs) or ``score`` (computes the evaluation criterian for sklearn models)
1336
+ of the model become inaccessible and the default evaluator does not compute metrics or
1337
+ artifacts that require those methods.
1338
+ - Because the model is an MLflow Model Server process, SHAP explanations are slower to
1339
+ compute. As such, model explainaibility is disabled when a non-local ``env_manager``
1340
+ specified, unless the ``evaluator_config`` option **log_model_explainability** is
1341
+ explicitly set to ``True``.
1342
+
1343
+ Args:
1344
+ model: Optional. If specified, it should be one of the following:
1345
+
1346
+ - A pyfunc model instance
1347
+ - A URI referring to a pyfunc model
1348
+ - A URI referring to an MLflow Deployments endpoint e.g. ``"endpoints:/my-chat"``
1349
+ - A callable function: This function should be able to take in model input and
1350
+ return predictions. It should follow the signature of the
1351
+ :py:func:`predict <mlflow.pyfunc.PyFuncModel.predict>` method. Here's an example
1352
+ of a valid function:
1353
+
1354
+ .. code-block:: python
1355
+
1356
+ model = mlflow.pyfunc.load_model(model_uri)
1357
+
1358
+
1359
+ def fn(model_input):
1360
+ return model.predict(model_input)
1361
+
1362
+ If omitted, it indicates a static dataset will be used for evaluation instead of a
1363
+ model. In this case, the ``data`` argument must be a Pandas DataFrame or an mlflow
1364
+ PandasDataset that contains model outputs, and the ``predictions`` argument must be the
1365
+ name of the column in ``data`` that contains model outputs.
1366
+
1367
+ data: One of the
1368
+ following:
1369
+
1370
+ - A numpy array or list of evaluation features, excluding labels.
1371
+ - A Pandas DataFrame containing evaluation features, labels, and optionally model
1372
+ outputs. Model outputs are required to be provided when model is unspecified.
1373
+ If ``feature_names`` argument not specified, all columns except for the label
1374
+ column and model_output column are regarded as feature columns. Otherwise,
1375
+ only column names present in ``feature_names`` are regarded as feature columns.
1376
+ - A Spark DataFrame containing evaluation features and labels. If
1377
+ ``feature_names`` argument not specified, all columns except for the label
1378
+ column are regarded as feature columns. Otherwise, only column names present in
1379
+ ``feature_names`` are regarded as feature columns. Only the first 10000 rows in
1380
+ the Spark DataFrame will be used as evaluation data.
1381
+ - A :py:class:`mlflow.data.dataset.Dataset` instance containing evaluation
1382
+ features, labels, and optionally model outputs. Model outputs are only supported
1383
+ with a PandasDataset. Model outputs are required when model is unspecified, and
1384
+ should be specified via the ``predictions`` prerty of the PandasDataset.
1385
+
1386
+ model_type: (Optional) A string describing the model type. The default evaluator
1387
+ supports the following model types:
1388
+
1389
+ - ``'classifier'``
1390
+ - ``'regressor'``
1391
+ - ``'question-answering'``
1392
+ - ``'text-summarization'``
1393
+ - ``'text'``
1394
+ - ``'retriever'``
1395
+
1396
+ If no ``model_type`` is specified, then you must provide a a list of
1397
+ metrics to compute via the ``extra_metrics`` param.
1398
+
1399
+ .. note::
1400
+ ``'question-answering'``, ``'text-summarization'``, ``'text'``, and
1401
+ ``'retriever'`` are experimental and may be changed or removed in a
1402
+ future release.
1403
+
1404
+ targets: If ``data`` is a numpy array or list, a numpy array or list of evaluation
1405
+ labels. If ``data`` is a DataFrame, the string name of a column from ``data``
1406
+ that contains evaluation labels. Required for classifier and regressor models,
1407
+ but optional for question-answering, text-summarization, and text models. If
1408
+ ``data`` is a :py:class:`mlflow.data.dataset.Dataset` that defines targets,
1409
+ then ``targets`` is optional.
1410
+
1411
+ predictions: Optional. The name of the column that contains model outputs.
1412
+
1413
+ - When ``model`` is specified and outputs multiple columns, ``predictions`` can be used
1414
+ to specify the name of the column that will be used to store model outputs for
1415
+ evaluation.
1416
+ - When ``model`` is not specified and ``data`` is a pandas dataframe,
1417
+ ``predictions`` can be used to specify the name of the column in ``data`` that
1418
+ contains model outputs.
1419
+
1420
+ .. code-block:: python
1421
+ :caption: Example usage of predictions
1422
+
1423
+ # Evaluate a model that outputs multiple columns
1424
+ data = pd.DataFrame({"question": ["foo"]})
1425
+
1426
+
1427
+ def model(inputs):
1428
+ return pd.DataFrame({"answer": ["bar"], "source": ["baz"]})
1429
+
1430
+
1431
+ results = evaluate(
1432
+ model=model,
1433
+ data=data,
1434
+ predictions="answer",
1435
+ # other arguments if needed
1436
+ )
1437
+
1438
+ # Evaluate a static dataset
1439
+ data = pd.DataFrame({"question": ["foo"], "answer": ["bar"], "source": ["baz"]})
1440
+ results = evaluate(
1441
+ data=data,
1442
+ predictions="answer",
1443
+ # other arguments if needed
1444
+ )
1445
+ dataset_path: (Optional) The path where the data is stored. Must not contain double
1446
+ quotes (``"``). If specified, the path is logged to the ``mlflow.datasets``
1447
+ tag for lineage tracking purposes.
1448
+
1449
+ feature_names: (Optional) A list. If the ``data`` argument is a numpy array or list,
1450
+ ``feature_names`` is a list of the feature names for each feature. If
1451
+ ``feature_names=None``, then the ``feature_names`` are generated using the
1452
+ format ``feature_{feature_index}``. If the ``data`` argument is a Pandas
1453
+ DataFrame or a Spark DataFrame, ``feature_names`` is a list of the names
1454
+ of the feature columns in the DataFrame. If ``feature_names=None``, then
1455
+ all columns except the label column and the predictions column are
1456
+ regarded as feature columns.
1457
+
1458
+ evaluators: The name of the evaluator to use for model evaluation, or a list of
1459
+ evaluator names. If unspecified, all evaluators capable of evaluating the
1460
+ specified model on the specified dataset are used. The default evaluator
1461
+ can be referred to by the name ``"default"``. To see all available
1462
+ evaluators, call :py:func:`mlflow.models.list_evaluators`.
1463
+
1464
+ evaluator_config: A dictionary of additional configurations to supply to the evaluator.
1465
+ If multiple evaluators are specified, each configuration should be
1466
+ supplied as a nested dictionary whose key is the evaluator name.
1467
+
1468
+ extra_metrics:
1469
+ (Optional) A list of :py:class:`EvaluationMetric <mlflow.models.EvaluationMetric>`
1470
+ objects. These metrics are computed in addition to the default metrics associated with
1471
+ pre-defined `model_type`, and setting `model_type=None` will only compute the metrics
1472
+ specified in `extra_metrics`. See the `mlflow.metrics` module for more information about
1473
+ the builtin metrics and how to define extra metrics.
1474
+
1475
+ .. code-block:: python
1476
+ :caption: Example usage of extra metrics
1477
+
1478
+ import mlflow
1479
+ import numpy as np
1480
+
1481
+
1482
+ def root_mean_squared_error(eval_df, _builtin_metrics):
1483
+ return np.sqrt((np.abs(eval_df["prediction"] - eval_df["target"]) ** 2).mean())
1484
+
1485
+
1486
+ rmse_metric = mlflow.models.make_metric(
1487
+ eval_fn=root_mean_squared_error,
1488
+ greater_is_better=False,
1489
+ )
1490
+ mlflow.evaluate(..., extra_metrics=[rmse_metric])
1491
+
1492
+ custom_artifacts:
1493
+ (Optional) A list of custom artifact functions with the following signature:
1494
+
1495
+ .. code-block:: python
1496
+
1497
+ def custom_artifact(
1498
+ eval_df: Union[pandas.Dataframe, pyspark.sql.DataFrame],
1499
+ builtin_metrics: Dict[str, float],
1500
+ artifacts_dir: str,
1501
+ ) -> Dict[str, Any]:
1502
+ """
1503
+ Args:
1504
+ eval_df:
1505
+ A Pandas or Spark DataFrame containing ``prediction`` and ``target``
1506
+ column. The ``prediction`` column contains the predictions made by the
1507
+ model. The ``target`` column contains the corresponding labels to the
1508
+ predictions made on that row.
1509
+ builtin_metrics:
1510
+ A dictionary containing the metrics calculated by the default evaluator.
1511
+ The keys are the names of the metrics and the values are the scalar
1512
+ values of the metrics. Refer to the DefaultEvaluator behavior section
1513
+ for what metrics will be returned based on the type of model (i.e.
1514
+ classifier or regressor).
1515
+ artifacts_dir:
1516
+ A temporary directory path that can be used by the custom artifacts
1517
+ function to temporarily store produced artifacts. The directory will be
1518
+ deleted after the artifacts are logged.
1519
+
1520
+ Returns:
1521
+ A dictionary that maps artifact names to artifact objects
1522
+ (e.g. a Matplotlib Figure) or to artifact paths within ``artifacts_dir``.
1523
+ """
1524
+ ...
1525
+
1526
+ Object types that artifacts can be represented as:
1527
+
1528
+ - A string uri representing the file path to the artifact. MLflow will infer the
1529
+ type of the artifact based on the file extension.
1530
+ - A string representation of a JSON object. This will be saved as a .json artifact.
1531
+ - Pandas DataFrame. This will be resolved as a CSV artifact.
1532
+ - Numpy array. This will be saved as a .npy artifact.
1533
+ - Matplotlib Figure. This will be saved as an image artifact. Note that
1534
+ ``matplotlib.pyplot.savefig`` is called behind the scene with default
1535
+ configurations.
1536
+ To customize, either save the figure with the desired configurations and return
1537
+ its file path or define customizations through environment variables in
1538
+ ``matplotlib.rcParams``.
1539
+ - Other objects will be attempted to be pickled with the default protocol.
1540
+
1541
+ .. code-block:: python
1542
+ :caption: Example usage of custom artifacts
1543
+
1544
+ import mlflow
1545
+ import matplotlib.pyplot as plt
1546
+
1547
+
1548
+ def scatter_plot(eval_df, builtin_metrics, artifacts_dir):
1549
+ plt.scatter(eval_df["prediction"], eval_df["target"])
1550
+ plt.xlabel("Targets")
1551
+ plt.ylabel("Predictions")
1552
+ plt.title("Targets vs. Predictions")
1553
+ plt.savefig(os.path.join(artifacts_dir, "example.png"))
1554
+ plt.close()
1555
+ return {"pred_target_scatter": os.path.join(artifacts_dir, "example.png")}
1556
+
1557
+
1558
+ def pred_sample(eval_df, _builtin_metrics, _artifacts_dir):
1559
+ return {"pred_sample": pred_sample.head(10)}
1560
+
1561
+
1562
+ mlflow.evaluate(..., custom_artifacts=[scatter_plot, pred_sample])
1563
+
1564
+ env_manager: Specify an environment manager to load the candidate ``model`` in
1565
+ isolated Python environments and restore their
1566
+ dependencies. Default value is ``local``, and the following values are
1567
+ supported:
1568
+
1569
+ - ``virtualenv``: (Recommended) Use virtualenv to restore the python
1570
+ environment that was used to train the model.
1571
+ - ``conda``: Use Conda to restore the software environment that was used
1572
+ to train the model.
1573
+ - ``local``: Use the current Python environment for model inference, which
1574
+ may differ from the environment used to train the model and may lead to
1575
+ errors or invalid predictions.
1576
+
1577
+ model_config: the model configuration to use for loading the model with pyfunc. Inspect
1578
+ the model's pyfunc flavor to know which keys are supported for your
1579
+ specific model. If not indicated, the default model configuration
1580
+ from the model is used (if any).
1581
+
1582
+ inference_params: (Optional) A dictionary of inference parameters to be passed to the model
1583
+ when making predictions, such as ``{"max_tokens": 100}``. This is only used when
1584
+ the ``model`` is an MLflow Deployments endpoint URI e.g. ``"endpoints:/my-chat"``
1585
+
1586
+ model_id: (Optional) The ID of the MLflow LoggedModel or Model Version to which the
1587
+ evaluation results (e.g. metrics and traces) will be linked. If `model_id` is not
1588
+ specified but `model` is specified, the ID from `model` will be used.
1589
+
1590
+ _called_from_genai_evaluate: (Optional) Only used internally.
1591
+
1592
+ Returns:
1593
+ An :py:class:`mlflow.models.EvaluationResult` instance containing
1594
+ metrics of evaluating the model with the given dataset.
1595
+ '''
1596
+ from mlflow.models.evaluation.evaluator_registry import _model_evaluation_registry
1597
+ from mlflow.pyfunc import PyFuncModel, _load_model_or_server, _ServedPyFuncModel
1598
+ from mlflow.utils import env_manager as _EnvManager
1599
+
1600
+ # Inference params are currently only supported for passing a deployment endpoint as the model.
1601
+ # TODO: We should support inference_params for other model types
1602
+
1603
+ if inference_params is not None and not _is_model_deployment_endpoint_uri(model):
1604
+ raise MlflowException(
1605
+ message="The inference_params argument can only be specified when the model "
1606
+ "is an MLflow Deployments endpoint URI like `endpoints:/my-chat`",
1607
+ error_code=INVALID_PARAMETER_VALUE,
1608
+ )
1609
+
1610
+ if evaluator_config is not None:
1611
+ col_mapping = evaluator_config.get("col_mapping", {})
1612
+
1613
+ if isinstance(targets, str):
1614
+ targets = col_mapping.get(targets, targets)
1615
+
1616
+ if isinstance(predictions, str):
1617
+ predictions = col_mapping.get(predictions, predictions)
1618
+
1619
+ if data is None:
1620
+ raise MlflowException(
1621
+ message="The data argument cannot be None.",
1622
+ error_code=INVALID_PARAMETER_VALUE,
1623
+ )
1624
+
1625
+ _EnvManager.validate(env_manager)
1626
+
1627
+ # If Dataset is provided, the targets can only be specified by the Dataset,
1628
+ # not the targets parameters of the mlflow.evaluate() API.
1629
+ if isinstance(data, Dataset) and targets is not None:
1630
+ raise MlflowException(
1631
+ message="The top-level targets parameter should not be specified since a Dataset "
1632
+ "is used. Please only specify the targets column name in the Dataset. For example: "
1633
+ "`data = mlflow.data.from_pandas(df=X.assign(y=y), targets='y')`. "
1634
+ "Meanwhile, please specify `mlflow.evaluate(..., targets=None, ...)`.",
1635
+ error_code=INVALID_PARAMETER_VALUE,
1636
+ )
1637
+ # If Dataset is provided and model is None, then the predictions can only be specified by the
1638
+ # Dataset, not the predictions parameters of the mlflow.evaluate() API.
1639
+ if isinstance(data, Dataset) and model is None and predictions is not None:
1640
+ raise MlflowException(
1641
+ message="The top-level predictions parameter should not be specified since a Dataset "
1642
+ "is used. Please only specify the predictions column name in the Dataset. For example:"
1643
+ " `data = mlflow.data.from_pandas(df=X.assign(y=y), predictions='y')`"
1644
+ "Meanwhile, please specify `mlflow.evaluate(..., predictions=None, ...)`.",
1645
+ error_code=INVALID_PARAMETER_VALUE,
1646
+ )
1647
+ # If Dataset is provided and model is specified, then the data.predictions cannot be specified.
1648
+ if (
1649
+ isinstance(data, Dataset)
1650
+ and model is not None
1651
+ and getattr(data, "predictions", None) is not None
1652
+ ):
1653
+ raise MlflowException(
1654
+ message="The predictions parameter should not be specified in the Dataset since a "
1655
+ "model is specified. Please remove the predictions column from the Dataset.",
1656
+ error_code=INVALID_PARAMETER_VALUE,
1657
+ )
1658
+
1659
+ if model_type in [_ModelType.REGRESSOR, _ModelType.CLASSIFIER]:
1660
+ if isinstance(data, Dataset):
1661
+ if getattr(data, "targets", None) is not None:
1662
+ targets = data.targets
1663
+ else:
1664
+ raise MlflowException(
1665
+ message="The targets column name must be specified in the provided Dataset "
1666
+ f"for {model_type} models. For example: "
1667
+ "`data = mlflow.data.from_pandas(df=X.assign(y=y), targets='y')`",
1668
+ error_code=INVALID_PARAMETER_VALUE,
1669
+ )
1670
+ else:
1671
+ if targets is None:
1672
+ raise MlflowException(
1673
+ f"The targets argument must be specified for {model_type} models.",
1674
+ error_code=INVALID_PARAMETER_VALUE,
1675
+ )
1676
+ elif model_type is None:
1677
+ if not extra_metrics:
1678
+ raise MlflowException(
1679
+ message="The extra_metrics argument must be specified model_type is None.",
1680
+ error_code=INVALID_PARAMETER_VALUE,
1681
+ )
1682
+
1683
+ specified_model_id = model_id
1684
+ model_id = None
1685
+ if isinstance(model, str):
1686
+ model_id = _parse_model_id_if_present(model)
1687
+ if _is_model_deployment_endpoint_uri(model):
1688
+ model = _get_model_from_deployment_endpoint_uri(model, inference_params)
1689
+ else:
1690
+ model = _load_model_or_server(model, env_manager, model_config)
1691
+ elif env_manager != _EnvManager.LOCAL:
1692
+ raise MlflowException(
1693
+ message="The model argument must be a string URI referring to an MLflow model when a "
1694
+ "non-local env_manager is specified.",
1695
+ error_code=INVALID_PARAMETER_VALUE,
1696
+ )
1697
+ elif isinstance(model, PyFuncModel):
1698
+ model_id = model.model_id
1699
+ if model_config:
1700
+ raise MlflowException(
1701
+ message="Indicating ``model_config`` when passing a `PyFuncModel`` object as "
1702
+ "model argument is not allowed. If you need to change the model configuration "
1703
+ "for the evaluation model, use "
1704
+ "``mlflow.pyfunc.load_model(model_uri, model_config=<value>)`` and indicate "
1705
+ "the desired configuration there.",
1706
+ error_code=INVALID_PARAMETER_VALUE,
1707
+ )
1708
+ elif callable(model):
1709
+ model = _get_model_from_function(model)
1710
+ elif model is not None:
1711
+ raise MlflowException(
1712
+ message="The model argument must be a string URI referring to an MLflow model, "
1713
+ "an MLflow Deployments endpoint URI, an instance of `mlflow.pyfunc.PyFuncModel`, "
1714
+ "a function, or None.",
1715
+ error_code=INVALID_PARAMETER_VALUE,
1716
+ )
1717
+
1718
+ # If model_id is specified, verify it matches the derived model_id
1719
+ if specified_model_id is not None and model_id is not None and specified_model_id != model_id:
1720
+ raise MlflowException(
1721
+ message=(
1722
+ f"The specified value of the 'model_id' parameter '{specified_model_id}' "
1723
+ f"contradicts the model_id '{model_id}' associated with the model. Please ensure "
1724
+ f"they match or omit the 'model_id' parameter."
1725
+ ),
1726
+ error_code=INVALID_PARAMETER_VALUE,
1727
+ )
1728
+
1729
+ # Use specified model_id if provided, otherwise use derived model_id
1730
+ model_id = specified_model_id if specified_model_id is not None else model_id
1731
+ # If none of the model_id and model is specified, use the active model_id
1732
+ model_id = model_id or mlflow.get_active_model_id()
1733
+
1734
+ evaluators: list[EvaluatorBundle] = resolve_evaluators_and_configs(
1735
+ evaluators, evaluator_config, model_type
1736
+ )
1737
+
1738
+ # NB: MLflow do not use either of these two variables. However, we need to pass these to
1739
+ # _evaluate() function for backward compatibility.
1740
+ evaluator_name_list = [evaluator.name for evaluator in evaluators]
1741
+ evaluator_name_to_conf_map = {evaluator.name: evaluator.config for evaluator in evaluators}
1742
+
1743
+ with _start_run_or_reuse_active_run() as run_id:
1744
+ if not isinstance(data, Dataset):
1745
+ # Convert data to `mlflow.data.dataset.Dataset`.
1746
+ if model is None:
1747
+ data = convert_data_to_mlflow_dataset(
1748
+ data=data, targets=targets, predictions=predictions
1749
+ )
1750
+ else:
1751
+ data = convert_data_to_mlflow_dataset(data=data, targets=targets)
1752
+
1753
+ from mlflow.data.pyfunc_dataset_mixin import PyFuncConvertibleDatasetMixin
1754
+
1755
+ # model_id could be None
1756
+ with _set_active_model(model_id=model_id) if model_id else nullcontext():
1757
+ if isinstance(data, Dataset) and issubclass(
1758
+ data.__class__, PyFuncConvertibleDatasetMixin
1759
+ ):
1760
+ dataset = data.to_evaluation_dataset(dataset_path, feature_names)
1761
+
1762
+ # Use metrix_prefix configured for builtin evaluators as a dataset tag
1763
+ context = None
1764
+ for e in evaluators:
1765
+ if _model_evaluation_registry.is_builtin(e.name) and e.config.get(
1766
+ "metric_prefix"
1767
+ ):
1768
+ context = e.config.get("metric_prefix")
1769
+ break
1770
+
1771
+ client = MlflowClient()
1772
+ tags = [InputTag(key=MLFLOW_DATASET_CONTEXT, value=context)] if context else []
1773
+ dataset_input = DatasetInput(dataset=data._to_mlflow_entity(), tags=tags)
1774
+ client.log_inputs(
1775
+ run_id,
1776
+ [dataset_input],
1777
+ models=[LoggedModelInput(model_id)] if model_id else None,
1778
+ )
1779
+ else:
1780
+ dataset = EvaluationDataset(
1781
+ data,
1782
+ targets=targets,
1783
+ path=dataset_path,
1784
+ feature_names=feature_names,
1785
+ predictions=predictions,
1786
+ )
1787
+ predictions_expected_in_model_output = predictions if model is not None else None
1788
+
1789
+ try:
1790
+ evaluate_result = _evaluate(
1791
+ model=model,
1792
+ model_type=model_type,
1793
+ model_id=model_id,
1794
+ dataset=dataset,
1795
+ run_id=run_id,
1796
+ evaluator_name_list=evaluator_name_list,
1797
+ evaluator_name_to_conf_map=evaluator_name_to_conf_map,
1798
+ extra_metrics=extra_metrics,
1799
+ custom_artifacts=custom_artifacts,
1800
+ predictions=predictions_expected_in_model_output,
1801
+ evaluators=evaluators,
1802
+ )
1803
+ finally:
1804
+ if isinstance(model, _ServedPyFuncModel):
1805
+ os.kill(model.pid, signal.SIGTERM)
1806
+
1807
+ # if model_id is specified log metrics to the eval run and logged model
1808
+ if model_id is not None:
1809
+ mlflow.log_metrics(metrics=evaluate_result.metrics, dataset=data, model_id=model_id)
1810
+
1811
+ return evaluate_result