genesis-flow 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (645) hide show
  1. genesis_flow-1.0.0.dist-info/METADATA +822 -0
  2. genesis_flow-1.0.0.dist-info/RECORD +645 -0
  3. genesis_flow-1.0.0.dist-info/WHEEL +5 -0
  4. genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
  5. genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
  6. genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
  7. mlflow/__init__.py +367 -0
  8. mlflow/__main__.py +3 -0
  9. mlflow/ag2/__init__.py +56 -0
  10. mlflow/ag2/ag2_logger.py +294 -0
  11. mlflow/anthropic/__init__.py +40 -0
  12. mlflow/anthropic/autolog.py +129 -0
  13. mlflow/anthropic/chat.py +144 -0
  14. mlflow/artifacts/__init__.py +268 -0
  15. mlflow/autogen/__init__.py +144 -0
  16. mlflow/autogen/chat.py +142 -0
  17. mlflow/azure/__init__.py +26 -0
  18. mlflow/azure/auth_handler.py +257 -0
  19. mlflow/azure/client.py +319 -0
  20. mlflow/azure/config.py +120 -0
  21. mlflow/azure/connection_factory.py +340 -0
  22. mlflow/azure/exceptions.py +27 -0
  23. mlflow/azure/stores.py +327 -0
  24. mlflow/azure/utils.py +183 -0
  25. mlflow/bedrock/__init__.py +45 -0
  26. mlflow/bedrock/_autolog.py +202 -0
  27. mlflow/bedrock/chat.py +122 -0
  28. mlflow/bedrock/stream.py +160 -0
  29. mlflow/bedrock/utils.py +43 -0
  30. mlflow/cli.py +707 -0
  31. mlflow/client.py +12 -0
  32. mlflow/config/__init__.py +56 -0
  33. mlflow/crewai/__init__.py +79 -0
  34. mlflow/crewai/autolog.py +253 -0
  35. mlflow/crewai/chat.py +29 -0
  36. mlflow/data/__init__.py +75 -0
  37. mlflow/data/artifact_dataset_sources.py +170 -0
  38. mlflow/data/code_dataset_source.py +40 -0
  39. mlflow/data/dataset.py +123 -0
  40. mlflow/data/dataset_registry.py +168 -0
  41. mlflow/data/dataset_source.py +110 -0
  42. mlflow/data/dataset_source_registry.py +219 -0
  43. mlflow/data/delta_dataset_source.py +167 -0
  44. mlflow/data/digest_utils.py +108 -0
  45. mlflow/data/evaluation_dataset.py +562 -0
  46. mlflow/data/filesystem_dataset_source.py +81 -0
  47. mlflow/data/http_dataset_source.py +145 -0
  48. mlflow/data/huggingface_dataset.py +258 -0
  49. mlflow/data/huggingface_dataset_source.py +118 -0
  50. mlflow/data/meta_dataset.py +104 -0
  51. mlflow/data/numpy_dataset.py +223 -0
  52. mlflow/data/pandas_dataset.py +231 -0
  53. mlflow/data/polars_dataset.py +352 -0
  54. mlflow/data/pyfunc_dataset_mixin.py +31 -0
  55. mlflow/data/schema.py +76 -0
  56. mlflow/data/sources.py +1 -0
  57. mlflow/data/spark_dataset.py +406 -0
  58. mlflow/data/spark_dataset_source.py +74 -0
  59. mlflow/data/spark_delta_utils.py +118 -0
  60. mlflow/data/tensorflow_dataset.py +350 -0
  61. mlflow/data/uc_volume_dataset_source.py +81 -0
  62. mlflow/db.py +27 -0
  63. mlflow/dspy/__init__.py +17 -0
  64. mlflow/dspy/autolog.py +197 -0
  65. mlflow/dspy/callback.py +398 -0
  66. mlflow/dspy/constant.py +1 -0
  67. mlflow/dspy/load.py +93 -0
  68. mlflow/dspy/save.py +393 -0
  69. mlflow/dspy/util.py +109 -0
  70. mlflow/dspy/wrapper.py +226 -0
  71. mlflow/entities/__init__.py +104 -0
  72. mlflow/entities/_mlflow_object.py +52 -0
  73. mlflow/entities/assessment.py +545 -0
  74. mlflow/entities/assessment_error.py +80 -0
  75. mlflow/entities/assessment_source.py +141 -0
  76. mlflow/entities/dataset.py +92 -0
  77. mlflow/entities/dataset_input.py +51 -0
  78. mlflow/entities/dataset_summary.py +62 -0
  79. mlflow/entities/document.py +48 -0
  80. mlflow/entities/experiment.py +109 -0
  81. mlflow/entities/experiment_tag.py +35 -0
  82. mlflow/entities/file_info.py +45 -0
  83. mlflow/entities/input_tag.py +35 -0
  84. mlflow/entities/lifecycle_stage.py +35 -0
  85. mlflow/entities/logged_model.py +228 -0
  86. mlflow/entities/logged_model_input.py +26 -0
  87. mlflow/entities/logged_model_output.py +32 -0
  88. mlflow/entities/logged_model_parameter.py +46 -0
  89. mlflow/entities/logged_model_status.py +74 -0
  90. mlflow/entities/logged_model_tag.py +33 -0
  91. mlflow/entities/metric.py +200 -0
  92. mlflow/entities/model_registry/__init__.py +29 -0
  93. mlflow/entities/model_registry/_model_registry_entity.py +13 -0
  94. mlflow/entities/model_registry/model_version.py +243 -0
  95. mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
  96. mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
  97. mlflow/entities/model_registry/model_version_search.py +25 -0
  98. mlflow/entities/model_registry/model_version_stages.py +25 -0
  99. mlflow/entities/model_registry/model_version_status.py +35 -0
  100. mlflow/entities/model_registry/model_version_tag.py +35 -0
  101. mlflow/entities/model_registry/prompt.py +73 -0
  102. mlflow/entities/model_registry/prompt_version.py +244 -0
  103. mlflow/entities/model_registry/registered_model.py +175 -0
  104. mlflow/entities/model_registry/registered_model_alias.py +35 -0
  105. mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
  106. mlflow/entities/model_registry/registered_model_search.py +25 -0
  107. mlflow/entities/model_registry/registered_model_tag.py +35 -0
  108. mlflow/entities/multipart_upload.py +74 -0
  109. mlflow/entities/param.py +49 -0
  110. mlflow/entities/run.py +97 -0
  111. mlflow/entities/run_data.py +84 -0
  112. mlflow/entities/run_info.py +188 -0
  113. mlflow/entities/run_inputs.py +59 -0
  114. mlflow/entities/run_outputs.py +43 -0
  115. mlflow/entities/run_status.py +41 -0
  116. mlflow/entities/run_tag.py +36 -0
  117. mlflow/entities/source_type.py +31 -0
  118. mlflow/entities/span.py +774 -0
  119. mlflow/entities/span_event.py +96 -0
  120. mlflow/entities/span_status.py +102 -0
  121. mlflow/entities/trace.py +317 -0
  122. mlflow/entities/trace_data.py +71 -0
  123. mlflow/entities/trace_info.py +220 -0
  124. mlflow/entities/trace_info_v2.py +162 -0
  125. mlflow/entities/trace_location.py +173 -0
  126. mlflow/entities/trace_state.py +39 -0
  127. mlflow/entities/trace_status.py +68 -0
  128. mlflow/entities/view_type.py +51 -0
  129. mlflow/environment_variables.py +866 -0
  130. mlflow/evaluation/__init__.py +16 -0
  131. mlflow/evaluation/assessment.py +369 -0
  132. mlflow/evaluation/evaluation.py +411 -0
  133. mlflow/evaluation/evaluation_tag.py +61 -0
  134. mlflow/evaluation/fluent.py +48 -0
  135. mlflow/evaluation/utils.py +201 -0
  136. mlflow/exceptions.py +213 -0
  137. mlflow/experiments.py +140 -0
  138. mlflow/gemini/__init__.py +81 -0
  139. mlflow/gemini/autolog.py +186 -0
  140. mlflow/gemini/chat.py +261 -0
  141. mlflow/genai/__init__.py +71 -0
  142. mlflow/genai/datasets/__init__.py +67 -0
  143. mlflow/genai/datasets/evaluation_dataset.py +131 -0
  144. mlflow/genai/evaluation/__init__.py +3 -0
  145. mlflow/genai/evaluation/base.py +411 -0
  146. mlflow/genai/evaluation/constant.py +23 -0
  147. mlflow/genai/evaluation/utils.py +244 -0
  148. mlflow/genai/judges/__init__.py +21 -0
  149. mlflow/genai/judges/databricks.py +404 -0
  150. mlflow/genai/label_schemas/__init__.py +153 -0
  151. mlflow/genai/label_schemas/label_schemas.py +209 -0
  152. mlflow/genai/labeling/__init__.py +159 -0
  153. mlflow/genai/labeling/labeling.py +250 -0
  154. mlflow/genai/optimize/__init__.py +13 -0
  155. mlflow/genai/optimize/base.py +198 -0
  156. mlflow/genai/optimize/optimizers/__init__.py +4 -0
  157. mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
  158. mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
  159. mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
  160. mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
  161. mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
  162. mlflow/genai/optimize/types.py +75 -0
  163. mlflow/genai/optimize/util.py +30 -0
  164. mlflow/genai/prompts/__init__.py +206 -0
  165. mlflow/genai/scheduled_scorers.py +431 -0
  166. mlflow/genai/scorers/__init__.py +26 -0
  167. mlflow/genai/scorers/base.py +492 -0
  168. mlflow/genai/scorers/builtin_scorers.py +765 -0
  169. mlflow/genai/scorers/scorer_utils.py +138 -0
  170. mlflow/genai/scorers/validation.py +165 -0
  171. mlflow/genai/utils/data_validation.py +146 -0
  172. mlflow/genai/utils/enum_utils.py +23 -0
  173. mlflow/genai/utils/trace_utils.py +211 -0
  174. mlflow/groq/__init__.py +42 -0
  175. mlflow/groq/_groq_autolog.py +74 -0
  176. mlflow/johnsnowlabs/__init__.py +888 -0
  177. mlflow/langchain/__init__.py +24 -0
  178. mlflow/langchain/api_request_parallel_processor.py +330 -0
  179. mlflow/langchain/autolog.py +147 -0
  180. mlflow/langchain/chat_agent_langgraph.py +340 -0
  181. mlflow/langchain/constant.py +1 -0
  182. mlflow/langchain/constants.py +1 -0
  183. mlflow/langchain/databricks_dependencies.py +444 -0
  184. mlflow/langchain/langchain_tracer.py +597 -0
  185. mlflow/langchain/model.py +919 -0
  186. mlflow/langchain/output_parsers.py +142 -0
  187. mlflow/langchain/retriever_chain.py +153 -0
  188. mlflow/langchain/runnables.py +527 -0
  189. mlflow/langchain/utils/chat.py +402 -0
  190. mlflow/langchain/utils/logging.py +671 -0
  191. mlflow/langchain/utils/serialization.py +36 -0
  192. mlflow/legacy_databricks_cli/__init__.py +0 -0
  193. mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
  194. mlflow/legacy_databricks_cli/configure/provider.py +482 -0
  195. mlflow/litellm/__init__.py +175 -0
  196. mlflow/llama_index/__init__.py +22 -0
  197. mlflow/llama_index/autolog.py +55 -0
  198. mlflow/llama_index/chat.py +43 -0
  199. mlflow/llama_index/constant.py +1 -0
  200. mlflow/llama_index/model.py +577 -0
  201. mlflow/llama_index/pyfunc_wrapper.py +332 -0
  202. mlflow/llama_index/serialize_objects.py +188 -0
  203. mlflow/llama_index/tracer.py +561 -0
  204. mlflow/metrics/__init__.py +479 -0
  205. mlflow/metrics/base.py +39 -0
  206. mlflow/metrics/genai/__init__.py +25 -0
  207. mlflow/metrics/genai/base.py +101 -0
  208. mlflow/metrics/genai/genai_metric.py +771 -0
  209. mlflow/metrics/genai/metric_definitions.py +450 -0
  210. mlflow/metrics/genai/model_utils.py +371 -0
  211. mlflow/metrics/genai/prompt_template.py +68 -0
  212. mlflow/metrics/genai/prompts/__init__.py +0 -0
  213. mlflow/metrics/genai/prompts/v1.py +422 -0
  214. mlflow/metrics/genai/utils.py +6 -0
  215. mlflow/metrics/metric_definitions.py +619 -0
  216. mlflow/mismatch.py +34 -0
  217. mlflow/mistral/__init__.py +34 -0
  218. mlflow/mistral/autolog.py +71 -0
  219. mlflow/mistral/chat.py +135 -0
  220. mlflow/ml_package_versions.py +452 -0
  221. mlflow/models/__init__.py +97 -0
  222. mlflow/models/auth_policy.py +83 -0
  223. mlflow/models/cli.py +354 -0
  224. mlflow/models/container/__init__.py +294 -0
  225. mlflow/models/container/scoring_server/__init__.py +0 -0
  226. mlflow/models/container/scoring_server/nginx.conf +39 -0
  227. mlflow/models/dependencies_schemas.py +287 -0
  228. mlflow/models/display_utils.py +158 -0
  229. mlflow/models/docker_utils.py +211 -0
  230. mlflow/models/evaluation/__init__.py +23 -0
  231. mlflow/models/evaluation/_shap_patch.py +64 -0
  232. mlflow/models/evaluation/artifacts.py +194 -0
  233. mlflow/models/evaluation/base.py +1811 -0
  234. mlflow/models/evaluation/calibration_curve.py +109 -0
  235. mlflow/models/evaluation/default_evaluator.py +996 -0
  236. mlflow/models/evaluation/deprecated.py +23 -0
  237. mlflow/models/evaluation/evaluator_registry.py +80 -0
  238. mlflow/models/evaluation/evaluators/classifier.py +704 -0
  239. mlflow/models/evaluation/evaluators/default.py +233 -0
  240. mlflow/models/evaluation/evaluators/regressor.py +96 -0
  241. mlflow/models/evaluation/evaluators/shap.py +296 -0
  242. mlflow/models/evaluation/lift_curve.py +178 -0
  243. mlflow/models/evaluation/utils/metric.py +123 -0
  244. mlflow/models/evaluation/utils/trace.py +179 -0
  245. mlflow/models/evaluation/validation.py +434 -0
  246. mlflow/models/flavor_backend.py +93 -0
  247. mlflow/models/flavor_backend_registry.py +53 -0
  248. mlflow/models/model.py +1639 -0
  249. mlflow/models/model_config.py +150 -0
  250. mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
  251. mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
  252. mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
  253. mlflow/models/python_api.py +369 -0
  254. mlflow/models/rag_signatures.py +128 -0
  255. mlflow/models/resources.py +321 -0
  256. mlflow/models/signature.py +662 -0
  257. mlflow/models/utils.py +2054 -0
  258. mlflow/models/wheeled_model.py +280 -0
  259. mlflow/openai/__init__.py +57 -0
  260. mlflow/openai/_agent_tracer.py +364 -0
  261. mlflow/openai/api_request_parallel_processor.py +131 -0
  262. mlflow/openai/autolog.py +509 -0
  263. mlflow/openai/constant.py +1 -0
  264. mlflow/openai/model.py +824 -0
  265. mlflow/openai/utils/chat_schema.py +367 -0
  266. mlflow/optuna/__init__.py +3 -0
  267. mlflow/optuna/storage.py +646 -0
  268. mlflow/plugins/__init__.py +72 -0
  269. mlflow/plugins/base.py +358 -0
  270. mlflow/plugins/builtin/__init__.py +24 -0
  271. mlflow/plugins/builtin/pytorch_plugin.py +150 -0
  272. mlflow/plugins/builtin/sklearn_plugin.py +158 -0
  273. mlflow/plugins/builtin/transformers_plugin.py +187 -0
  274. mlflow/plugins/cli.py +321 -0
  275. mlflow/plugins/discovery.py +340 -0
  276. mlflow/plugins/manager.py +465 -0
  277. mlflow/plugins/registry.py +316 -0
  278. mlflow/plugins/templates/framework_plugin_template.py +329 -0
  279. mlflow/prompt/constants.py +20 -0
  280. mlflow/prompt/promptlab_model.py +197 -0
  281. mlflow/prompt/registry_utils.py +248 -0
  282. mlflow/promptflow/__init__.py +495 -0
  283. mlflow/protos/__init__.py +0 -0
  284. mlflow/protos/assessments_pb2.py +174 -0
  285. mlflow/protos/databricks_artifacts_pb2.py +489 -0
  286. mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
  287. mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
  288. mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
  289. mlflow/protos/databricks_pb2.py +267 -0
  290. mlflow/protos/databricks_trace_server_pb2.py +374 -0
  291. mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
  292. mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
  293. mlflow/protos/facet_feature_statistics_pb2.py +296 -0
  294. mlflow/protos/internal_pb2.py +77 -0
  295. mlflow/protos/mlflow_artifacts_pb2.py +336 -0
  296. mlflow/protos/model_registry_pb2.py +1073 -0
  297. mlflow/protos/scalapb/__init__.py +0 -0
  298. mlflow/protos/scalapb/scalapb_pb2.py +104 -0
  299. mlflow/protos/service_pb2.py +2600 -0
  300. mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
  301. mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
  302. mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
  303. mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
  304. mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
  305. mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
  306. mlflow/py.typed +0 -0
  307. mlflow/pydantic_ai/__init__.py +57 -0
  308. mlflow/pydantic_ai/autolog.py +173 -0
  309. mlflow/pyfunc/__init__.py +3844 -0
  310. mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
  311. mlflow/pyfunc/backend.py +523 -0
  312. mlflow/pyfunc/context.py +78 -0
  313. mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
  314. mlflow/pyfunc/loaders/__init__.py +7 -0
  315. mlflow/pyfunc/loaders/chat_agent.py +117 -0
  316. mlflow/pyfunc/loaders/chat_model.py +125 -0
  317. mlflow/pyfunc/loaders/code_model.py +31 -0
  318. mlflow/pyfunc/loaders/responses_agent.py +112 -0
  319. mlflow/pyfunc/mlserver.py +46 -0
  320. mlflow/pyfunc/model.py +1473 -0
  321. mlflow/pyfunc/scoring_server/__init__.py +604 -0
  322. mlflow/pyfunc/scoring_server/app.py +7 -0
  323. mlflow/pyfunc/scoring_server/client.py +146 -0
  324. mlflow/pyfunc/spark_model_cache.py +48 -0
  325. mlflow/pyfunc/stdin_server.py +44 -0
  326. mlflow/pyfunc/utils/__init__.py +3 -0
  327. mlflow/pyfunc/utils/data_validation.py +224 -0
  328. mlflow/pyfunc/utils/environment.py +22 -0
  329. mlflow/pyfunc/utils/input_converter.py +47 -0
  330. mlflow/pyfunc/utils/serving_data_parser.py +11 -0
  331. mlflow/pytorch/__init__.py +1171 -0
  332. mlflow/pytorch/_lightning_autolog.py +580 -0
  333. mlflow/pytorch/_pytorch_autolog.py +50 -0
  334. mlflow/pytorch/pickle_module.py +35 -0
  335. mlflow/rfunc/__init__.py +42 -0
  336. mlflow/rfunc/backend.py +134 -0
  337. mlflow/runs.py +89 -0
  338. mlflow/server/__init__.py +302 -0
  339. mlflow/server/auth/__init__.py +1224 -0
  340. mlflow/server/auth/__main__.py +4 -0
  341. mlflow/server/auth/basic_auth.ini +6 -0
  342. mlflow/server/auth/cli.py +11 -0
  343. mlflow/server/auth/client.py +537 -0
  344. mlflow/server/auth/config.py +34 -0
  345. mlflow/server/auth/db/__init__.py +0 -0
  346. mlflow/server/auth/db/cli.py +18 -0
  347. mlflow/server/auth/db/migrations/__init__.py +0 -0
  348. mlflow/server/auth/db/migrations/alembic.ini +110 -0
  349. mlflow/server/auth/db/migrations/env.py +76 -0
  350. mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
  351. mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
  352. mlflow/server/auth/db/models.py +67 -0
  353. mlflow/server/auth/db/utils.py +37 -0
  354. mlflow/server/auth/entities.py +165 -0
  355. mlflow/server/auth/logo.py +14 -0
  356. mlflow/server/auth/permissions.py +65 -0
  357. mlflow/server/auth/routes.py +18 -0
  358. mlflow/server/auth/sqlalchemy_store.py +263 -0
  359. mlflow/server/graphql/__init__.py +0 -0
  360. mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
  361. mlflow/server/graphql/graphql_custom_scalars.py +24 -0
  362. mlflow/server/graphql/graphql_errors.py +15 -0
  363. mlflow/server/graphql/graphql_no_batching.py +89 -0
  364. mlflow/server/graphql/graphql_schema_extensions.py +74 -0
  365. mlflow/server/handlers.py +3217 -0
  366. mlflow/server/prometheus_exporter.py +17 -0
  367. mlflow/server/validation.py +30 -0
  368. mlflow/shap/__init__.py +691 -0
  369. mlflow/sklearn/__init__.py +1994 -0
  370. mlflow/sklearn/utils.py +1041 -0
  371. mlflow/smolagents/__init__.py +66 -0
  372. mlflow/smolagents/autolog.py +139 -0
  373. mlflow/smolagents/chat.py +29 -0
  374. mlflow/store/__init__.py +10 -0
  375. mlflow/store/_unity_catalog/__init__.py +1 -0
  376. mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
  377. mlflow/store/_unity_catalog/lineage/constants.py +2 -0
  378. mlflow/store/_unity_catalog/registry/__init__.py +6 -0
  379. mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
  380. mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
  381. mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
  382. mlflow/store/_unity_catalog/registry/utils.py +121 -0
  383. mlflow/store/artifact/__init__.py +0 -0
  384. mlflow/store/artifact/artifact_repo.py +472 -0
  385. mlflow/store/artifact/artifact_repository_registry.py +154 -0
  386. mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
  387. mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
  388. mlflow/store/artifact/cli.py +141 -0
  389. mlflow/store/artifact/cloud_artifact_repo.py +332 -0
  390. mlflow/store/artifact/databricks_artifact_repo.py +729 -0
  391. mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
  392. mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
  393. mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
  394. mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
  395. mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
  396. mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
  397. mlflow/store/artifact/ftp_artifact_repo.py +132 -0
  398. mlflow/store/artifact/gcs_artifact_repo.py +296 -0
  399. mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
  400. mlflow/store/artifact/http_artifact_repo.py +218 -0
  401. mlflow/store/artifact/local_artifact_repo.py +142 -0
  402. mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
  403. mlflow/store/artifact/models_artifact_repo.py +259 -0
  404. mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
  405. mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
  406. mlflow/store/artifact/r2_artifact_repo.py +70 -0
  407. mlflow/store/artifact/runs_artifact_repo.py +265 -0
  408. mlflow/store/artifact/s3_artifact_repo.py +330 -0
  409. mlflow/store/artifact/sftp_artifact_repo.py +141 -0
  410. mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
  411. mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
  412. mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
  413. mlflow/store/artifact/utils/__init__.py +0 -0
  414. mlflow/store/artifact/utils/models.py +148 -0
  415. mlflow/store/db/__init__.py +0 -0
  416. mlflow/store/db/base_sql_model.py +3 -0
  417. mlflow/store/db/db_types.py +10 -0
  418. mlflow/store/db/utils.py +314 -0
  419. mlflow/store/db_migrations/__init__.py +0 -0
  420. mlflow/store/db_migrations/alembic.ini +74 -0
  421. mlflow/store/db_migrations/env.py +84 -0
  422. mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
  423. mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
  424. mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
  425. mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
  426. mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
  427. mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
  428. mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
  429. mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
  430. mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
  431. mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
  432. mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
  433. mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
  434. mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
  435. mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
  436. mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
  437. mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
  438. mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
  439. mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
  440. mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
  441. mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
  442. mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
  443. mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
  444. mlflow/store/db_migrations/versions/__init__.py +0 -0
  445. mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
  446. mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
  447. mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
  448. mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
  449. mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
  450. mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
  451. mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
  452. mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
  453. mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
  454. mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
  455. mlflow/store/entities/__init__.py +3 -0
  456. mlflow/store/entities/paged_list.py +18 -0
  457. mlflow/store/model_registry/__init__.py +10 -0
  458. mlflow/store/model_registry/abstract_store.py +1081 -0
  459. mlflow/store/model_registry/base_rest_store.py +44 -0
  460. mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
  461. mlflow/store/model_registry/dbmodels/__init__.py +0 -0
  462. mlflow/store/model_registry/dbmodels/models.py +206 -0
  463. mlflow/store/model_registry/file_store.py +1091 -0
  464. mlflow/store/model_registry/rest_store.py +481 -0
  465. mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
  466. mlflow/store/tracking/__init__.py +23 -0
  467. mlflow/store/tracking/abstract_store.py +816 -0
  468. mlflow/store/tracking/dbmodels/__init__.py +0 -0
  469. mlflow/store/tracking/dbmodels/initial_models.py +243 -0
  470. mlflow/store/tracking/dbmodels/models.py +1073 -0
  471. mlflow/store/tracking/file_store.py +2438 -0
  472. mlflow/store/tracking/postgres_managed_identity.py +146 -0
  473. mlflow/store/tracking/rest_store.py +1131 -0
  474. mlflow/store/tracking/sqlalchemy_store.py +2785 -0
  475. mlflow/system_metrics/__init__.py +61 -0
  476. mlflow/system_metrics/metrics/__init__.py +0 -0
  477. mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
  478. mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
  479. mlflow/system_metrics/metrics/disk_monitor.py +21 -0
  480. mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
  481. mlflow/system_metrics/metrics/network_monitor.py +34 -0
  482. mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
  483. mlflow/system_metrics/system_metrics_monitor.py +198 -0
  484. mlflow/tracing/__init__.py +16 -0
  485. mlflow/tracing/assessment.py +356 -0
  486. mlflow/tracing/client.py +531 -0
  487. mlflow/tracing/config.py +125 -0
  488. mlflow/tracing/constant.py +105 -0
  489. mlflow/tracing/destination.py +81 -0
  490. mlflow/tracing/display/__init__.py +40 -0
  491. mlflow/tracing/display/display_handler.py +196 -0
  492. mlflow/tracing/export/async_export_queue.py +186 -0
  493. mlflow/tracing/export/inference_table.py +138 -0
  494. mlflow/tracing/export/mlflow_v3.py +137 -0
  495. mlflow/tracing/export/utils.py +70 -0
  496. mlflow/tracing/fluent.py +1417 -0
  497. mlflow/tracing/processor/base_mlflow.py +199 -0
  498. mlflow/tracing/processor/inference_table.py +175 -0
  499. mlflow/tracing/processor/mlflow_v3.py +47 -0
  500. mlflow/tracing/processor/otel.py +73 -0
  501. mlflow/tracing/provider.py +487 -0
  502. mlflow/tracing/trace_manager.py +200 -0
  503. mlflow/tracing/utils/__init__.py +616 -0
  504. mlflow/tracing/utils/artifact_utils.py +28 -0
  505. mlflow/tracing/utils/copy.py +55 -0
  506. mlflow/tracing/utils/environment.py +55 -0
  507. mlflow/tracing/utils/exception.py +21 -0
  508. mlflow/tracing/utils/once.py +35 -0
  509. mlflow/tracing/utils/otlp.py +63 -0
  510. mlflow/tracing/utils/processor.py +54 -0
  511. mlflow/tracing/utils/search.py +292 -0
  512. mlflow/tracing/utils/timeout.py +250 -0
  513. mlflow/tracing/utils/token.py +19 -0
  514. mlflow/tracing/utils/truncation.py +124 -0
  515. mlflow/tracing/utils/warning.py +76 -0
  516. mlflow/tracking/__init__.py +39 -0
  517. mlflow/tracking/_model_registry/__init__.py +1 -0
  518. mlflow/tracking/_model_registry/client.py +764 -0
  519. mlflow/tracking/_model_registry/fluent.py +853 -0
  520. mlflow/tracking/_model_registry/registry.py +67 -0
  521. mlflow/tracking/_model_registry/utils.py +251 -0
  522. mlflow/tracking/_tracking_service/__init__.py +0 -0
  523. mlflow/tracking/_tracking_service/client.py +883 -0
  524. mlflow/tracking/_tracking_service/registry.py +56 -0
  525. mlflow/tracking/_tracking_service/utils.py +275 -0
  526. mlflow/tracking/artifact_utils.py +179 -0
  527. mlflow/tracking/client.py +5900 -0
  528. mlflow/tracking/context/__init__.py +0 -0
  529. mlflow/tracking/context/abstract_context.py +35 -0
  530. mlflow/tracking/context/databricks_cluster_context.py +15 -0
  531. mlflow/tracking/context/databricks_command_context.py +15 -0
  532. mlflow/tracking/context/databricks_job_context.py +49 -0
  533. mlflow/tracking/context/databricks_notebook_context.py +41 -0
  534. mlflow/tracking/context/databricks_repo_context.py +43 -0
  535. mlflow/tracking/context/default_context.py +51 -0
  536. mlflow/tracking/context/git_context.py +32 -0
  537. mlflow/tracking/context/registry.py +98 -0
  538. mlflow/tracking/context/system_environment_context.py +15 -0
  539. mlflow/tracking/default_experiment/__init__.py +1 -0
  540. mlflow/tracking/default_experiment/abstract_context.py +43 -0
  541. mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
  542. mlflow/tracking/default_experiment/registry.py +75 -0
  543. mlflow/tracking/fluent.py +3595 -0
  544. mlflow/tracking/metric_value_conversion_utils.py +93 -0
  545. mlflow/tracking/multimedia.py +206 -0
  546. mlflow/tracking/registry.py +86 -0
  547. mlflow/tracking/request_auth/__init__.py +0 -0
  548. mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
  549. mlflow/tracking/request_auth/registry.py +60 -0
  550. mlflow/tracking/request_header/__init__.py +0 -0
  551. mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
  552. mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
  553. mlflow/tracking/request_header/default_request_header_provider.py +17 -0
  554. mlflow/tracking/request_header/registry.py +79 -0
  555. mlflow/transformers/__init__.py +2982 -0
  556. mlflow/transformers/flavor_config.py +258 -0
  557. mlflow/transformers/hub_utils.py +83 -0
  558. mlflow/transformers/llm_inference_utils.py +468 -0
  559. mlflow/transformers/model_io.py +301 -0
  560. mlflow/transformers/peft.py +51 -0
  561. mlflow/transformers/signature.py +183 -0
  562. mlflow/transformers/torch_utils.py +55 -0
  563. mlflow/types/__init__.py +21 -0
  564. mlflow/types/agent.py +270 -0
  565. mlflow/types/chat.py +240 -0
  566. mlflow/types/llm.py +935 -0
  567. mlflow/types/responses.py +139 -0
  568. mlflow/types/responses_helpers.py +416 -0
  569. mlflow/types/schema.py +1505 -0
  570. mlflow/types/type_hints.py +647 -0
  571. mlflow/types/utils.py +753 -0
  572. mlflow/utils/__init__.py +283 -0
  573. mlflow/utils/_capture_modules.py +256 -0
  574. mlflow/utils/_capture_transformers_modules.py +75 -0
  575. mlflow/utils/_spark_utils.py +201 -0
  576. mlflow/utils/_unity_catalog_oss_utils.py +97 -0
  577. mlflow/utils/_unity_catalog_utils.py +479 -0
  578. mlflow/utils/annotations.py +218 -0
  579. mlflow/utils/arguments_utils.py +16 -0
  580. mlflow/utils/async_logging/__init__.py +1 -0
  581. mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
  582. mlflow/utils/async_logging/async_logging_queue.py +366 -0
  583. mlflow/utils/async_logging/run_artifact.py +38 -0
  584. mlflow/utils/async_logging/run_batch.py +58 -0
  585. mlflow/utils/async_logging/run_operations.py +49 -0
  586. mlflow/utils/autologging_utils/__init__.py +737 -0
  587. mlflow/utils/autologging_utils/client.py +432 -0
  588. mlflow/utils/autologging_utils/config.py +33 -0
  589. mlflow/utils/autologging_utils/events.py +294 -0
  590. mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
  591. mlflow/utils/autologging_utils/metrics_queue.py +71 -0
  592. mlflow/utils/autologging_utils/safety.py +1104 -0
  593. mlflow/utils/autologging_utils/versioning.py +95 -0
  594. mlflow/utils/checkpoint_utils.py +206 -0
  595. mlflow/utils/class_utils.py +6 -0
  596. mlflow/utils/cli_args.py +257 -0
  597. mlflow/utils/conda.py +354 -0
  598. mlflow/utils/credentials.py +231 -0
  599. mlflow/utils/data_utils.py +17 -0
  600. mlflow/utils/databricks_utils.py +1436 -0
  601. mlflow/utils/docstring_utils.py +477 -0
  602. mlflow/utils/doctor.py +133 -0
  603. mlflow/utils/download_cloud_file_chunk.py +43 -0
  604. mlflow/utils/env_manager.py +16 -0
  605. mlflow/utils/env_pack.py +131 -0
  606. mlflow/utils/environment.py +1009 -0
  607. mlflow/utils/exception_utils.py +14 -0
  608. mlflow/utils/file_utils.py +978 -0
  609. mlflow/utils/git_utils.py +77 -0
  610. mlflow/utils/gorilla.py +797 -0
  611. mlflow/utils/import_hooks/__init__.py +363 -0
  612. mlflow/utils/lazy_load.py +51 -0
  613. mlflow/utils/logging_utils.py +168 -0
  614. mlflow/utils/mime_type_utils.py +58 -0
  615. mlflow/utils/mlflow_tags.py +103 -0
  616. mlflow/utils/model_utils.py +486 -0
  617. mlflow/utils/name_utils.py +346 -0
  618. mlflow/utils/nfs_on_spark.py +62 -0
  619. mlflow/utils/openai_utils.py +164 -0
  620. mlflow/utils/os.py +12 -0
  621. mlflow/utils/oss_registry_utils.py +29 -0
  622. mlflow/utils/plugins.py +17 -0
  623. mlflow/utils/process.py +182 -0
  624. mlflow/utils/promptlab_utils.py +146 -0
  625. mlflow/utils/proto_json_utils.py +743 -0
  626. mlflow/utils/pydantic_utils.py +54 -0
  627. mlflow/utils/request_utils.py +279 -0
  628. mlflow/utils/requirements_utils.py +704 -0
  629. mlflow/utils/rest_utils.py +673 -0
  630. mlflow/utils/search_logged_model_utils.py +127 -0
  631. mlflow/utils/search_utils.py +2111 -0
  632. mlflow/utils/secure_loading.py +221 -0
  633. mlflow/utils/security_validation.py +384 -0
  634. mlflow/utils/server_cli_utils.py +61 -0
  635. mlflow/utils/spark_utils.py +15 -0
  636. mlflow/utils/string_utils.py +138 -0
  637. mlflow/utils/thread_utils.py +63 -0
  638. mlflow/utils/time.py +54 -0
  639. mlflow/utils/timeout.py +42 -0
  640. mlflow/utils/uri.py +572 -0
  641. mlflow/utils/validation.py +662 -0
  642. mlflow/utils/virtualenv.py +458 -0
  643. mlflow/utils/warnings_utils.py +25 -0
  644. mlflow/utils/yaml_utils.py +179 -0
  645. mlflow/version.py +24 -0
@@ -0,0 +1,61 @@
1
+ """System metrics logging module."""
2
+
3
+ from mlflow.environment_variables import (
4
+ MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING,
5
+ MLFLOW_SYSTEM_METRICS_NODE_ID,
6
+ MLFLOW_SYSTEM_METRICS_SAMPLES_BEFORE_LOGGING,
7
+ MLFLOW_SYSTEM_METRICS_SAMPLING_INTERVAL,
8
+ )
9
+
10
+
11
+ def disable_system_metrics_logging():
12
+ """Disable system metrics logging globally.
13
+
14
+ Calling this function will disable system metrics logging globally, but users can still opt in
15
+ system metrics logging for individual runs by `mlflow.start_run(log_system_metrics=True)`.
16
+ """
17
+ MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING.set(False)
18
+
19
+
20
+ def enable_system_metrics_logging():
21
+ """Enable system metrics logging globally.
22
+
23
+ Calling this function will enable system metrics logging globally, but users can still opt out
24
+ system metrics logging for individual runs by `mlflow.start_run(log_system_metrics=False)`.
25
+ """
26
+ MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING.set(True)
27
+
28
+
29
+ def set_system_metrics_sampling_interval(interval):
30
+ """Set the system metrics sampling interval.
31
+
32
+ Every `interval` seconds, the system metrics will be collected. By default `interval=10`.
33
+ """
34
+ if interval is None:
35
+ MLFLOW_SYSTEM_METRICS_SAMPLING_INTERVAL.unset()
36
+ else:
37
+ MLFLOW_SYSTEM_METRICS_SAMPLING_INTERVAL.set(interval)
38
+
39
+
40
+ def set_system_metrics_samples_before_logging(samples):
41
+ """Set the number of samples before logging system metrics.
42
+
43
+ Every time `samples` samples have been collected, the system metrics will be logged to mlflow.
44
+ By default `samples=1`.
45
+ """
46
+ if samples is None:
47
+ MLFLOW_SYSTEM_METRICS_SAMPLES_BEFORE_LOGGING.unset()
48
+ else:
49
+ MLFLOW_SYSTEM_METRICS_SAMPLES_BEFORE_LOGGING.set(samples)
50
+
51
+
52
+ def set_system_metrics_node_id(node_id):
53
+ """Set the system metrics node id.
54
+
55
+ node_id is the identifier of the machine where the metrics are collected. This is useful in
56
+ multi-node (distributed training) setup.
57
+ """
58
+ if node_id is None:
59
+ MLFLOW_SYSTEM_METRICS_NODE_ID.unset()
60
+ else:
61
+ MLFLOW_SYSTEM_METRICS_NODE_ID.set(node_id)
File without changes
@@ -0,0 +1,32 @@
1
+ """Base class of system metrics monitor."""
2
+
3
+ import abc
4
+ from collections import defaultdict
5
+
6
+
7
+ class BaseMetricsMonitor(abc.ABC):
8
+ """Base class of system metrics monitor."""
9
+
10
+ def __init__(self):
11
+ self._metrics = defaultdict(list)
12
+
13
+ @abc.abstractmethod
14
+ def collect_metrics(self):
15
+ """Method to collect metrics.
16
+
17
+ Subclass should implement this method to collect metrics and store in `self._metrics`.
18
+ """
19
+
20
+ @abc.abstractmethod
21
+ def aggregate_metrics(self):
22
+ """Method to aggregate metrics.
23
+
24
+ Subclass should implement this method to aggregate the metrics and return it in a dict.
25
+ """
26
+
27
+ @property
28
+ def metrics(self):
29
+ return self._metrics
30
+
31
+ def clear_metrics(self):
32
+ self._metrics.clear()
@@ -0,0 +1,23 @@
1
+ """Class for monitoring CPU stats."""
2
+
3
+ import psutil
4
+
5
+ from mlflow.system_metrics.metrics.base_metrics_monitor import BaseMetricsMonitor
6
+
7
+
8
+ class CPUMonitor(BaseMetricsMonitor):
9
+ """Class for monitoring CPU stats."""
10
+
11
+ def collect_metrics(self):
12
+ # Get CPU metrics.
13
+ cpu_percent = psutil.cpu_percent()
14
+ self._metrics["cpu_utilization_percentage"].append(cpu_percent)
15
+
16
+ system_memory = psutil.virtual_memory()
17
+ self._metrics["system_memory_usage_megabytes"].append(system_memory.used / 1e6)
18
+ self._metrics["system_memory_usage_percentage"].append(
19
+ system_memory.used / system_memory.total * 100
20
+ )
21
+
22
+ def aggregate_metrics(self):
23
+ return {k: round(sum(v) / len(v), 1) for k, v in self._metrics.items()}
@@ -0,0 +1,21 @@
1
+ """Class for monitoring disk stats."""
2
+
3
+ import os
4
+
5
+ import psutil
6
+
7
+ from mlflow.system_metrics.metrics.base_metrics_monitor import BaseMetricsMonitor
8
+
9
+
10
+ class DiskMonitor(BaseMetricsMonitor):
11
+ """Class for monitoring disk stats."""
12
+
13
+ def collect_metrics(self):
14
+ # Get disk usage metrics.
15
+ disk_usage = psutil.disk_usage(os.sep)
16
+ self._metrics["disk_usage_percentage"].append(disk_usage.percent)
17
+ self._metrics["disk_usage_megabytes"].append(disk_usage.used / 1e6)
18
+ self._metrics["disk_available_megabytes"].append(disk_usage.free / 1e6)
19
+
20
+ def aggregate_metrics(self):
21
+ return {k: round(sum(v) / len(v), 1) for k, v in self._metrics.items()}
@@ -0,0 +1,71 @@
1
+ """Class for monitoring GPU stats."""
2
+
3
+ import logging
4
+ import sys
5
+
6
+ from mlflow.system_metrics.metrics.base_metrics_monitor import BaseMetricsMonitor
7
+
8
+ _logger = logging.getLogger(__name__)
9
+
10
+ try:
11
+ import pynvml
12
+ except ImportError:
13
+ # If `pynvml` is not installed, a warning will be logged at monitor instantiation.
14
+ # We don't log a warning here to avoid spamming warning at every import.
15
+ pass
16
+
17
+
18
+ class GPUMonitor(BaseMetricsMonitor):
19
+ """Class for monitoring GPU stats."""
20
+
21
+ def __init__(self):
22
+ if "pynvml" not in sys.modules:
23
+ # Only instantiate if `pynvml` is installed.
24
+ raise ImportError(
25
+ "`pynvml` is not installed, to log GPU metrics please run `pip install pynvml` "
26
+ "to install it."
27
+ )
28
+ try:
29
+ # `nvmlInit()` will fail if no GPU is found.
30
+ pynvml.nvmlInit()
31
+ except pynvml.NVMLError as e:
32
+ raise RuntimeError(f"Failed to initialize NVML, skip logging GPU metrics: {e}")
33
+
34
+ super().__init__()
35
+ self.num_gpus = pynvml.nvmlDeviceGetCount()
36
+ self.gpu_handles = [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(self.num_gpus)]
37
+
38
+ def collect_metrics(self):
39
+ # Get GPU metrics.
40
+ for i, handle in enumerate(self.gpu_handles):
41
+ try:
42
+ memory = pynvml.nvmlDeviceGetMemoryInfo(handle)
43
+ self._metrics[f"gpu_{i}_memory_usage_percentage"].append(
44
+ round(memory.used / memory.total * 100, 1)
45
+ )
46
+ self._metrics[f"gpu_{i}_memory_usage_megabytes"].append(memory.used / 1e6)
47
+ except pynvml.NVMLError as e:
48
+ _logger.warning(f"Encountered error {e} when trying to collect GPU memory metrics.")
49
+
50
+ try:
51
+ device_utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
52
+ self._metrics[f"gpu_{i}_utilization_percentage"].append(device_utilization.gpu)
53
+ except pynvml.NVMLError as e:
54
+ _logger.warning(
55
+ f"Encountered error {e} when trying to collect GPU utilization metrics."
56
+ )
57
+
58
+ try:
59
+ power_milliwatts = pynvml.nvmlDeviceGetPowerUsage(handle)
60
+ power_capacity_milliwatts = pynvml.nvmlDeviceGetEnforcedPowerLimit(handle)
61
+ self._metrics[f"gpu_{i}_power_usage_watts"].append(power_milliwatts / 1000)
62
+ self._metrics[f"gpu_{i}_power_usage_percentage"].append(
63
+ (power_milliwatts / power_capacity_milliwatts) * 100
64
+ )
65
+ except pynvml.NVMLError as e:
66
+ _logger.warning(
67
+ f"Encountered error {e} when trying to collect GPU power usage metrics."
68
+ )
69
+
70
+ def aggregate_metrics(self):
71
+ return {k: round(sum(v) / len(v), 1) for k, v in self._metrics.items()}
@@ -0,0 +1,34 @@
1
+ """Class for monitoring network stats."""
2
+
3
+ import psutil
4
+
5
+ from mlflow.system_metrics.metrics.base_metrics_monitor import BaseMetricsMonitor
6
+
7
+
8
+ class NetworkMonitor(BaseMetricsMonitor):
9
+ def __init__(self):
10
+ super().__init__()
11
+ self._set_initial_metrics()
12
+
13
+ def _set_initial_metrics(self):
14
+ # Set initial network usage metrics. `psutil.net_io_counters()` counts the stats since the
15
+ # system boot, so to set network usage metrics as 0 when we start logging, we need to keep
16
+ # the initial network usage metrics.
17
+ network_usage = psutil.net_io_counters()
18
+ self._initial_receive_megabytes = network_usage.bytes_recv / 1e6
19
+ self._initial_transmit_megabytes = network_usage.bytes_sent / 1e6
20
+
21
+ def collect_metrics(self):
22
+ # Get network usage metrics.
23
+ network_usage = psutil.net_io_counters()
24
+ # Usage metrics will be the diff between current and initial metrics.
25
+ self._metrics["network_receive_megabytes"] = (
26
+ network_usage.bytes_recv / 1e6 - self._initial_receive_megabytes
27
+ )
28
+ self._metrics["network_transmit_megabytes"] = (
29
+ network_usage.bytes_sent / 1e6 - self._initial_transmit_megabytes
30
+ )
31
+
32
+ def aggregate_metrics(self):
33
+ # Network metrics don't need to be averaged.
34
+ return dict(self._metrics)
@@ -0,0 +1,123 @@
1
+ """Class for monitoring GPU stats on HIP devices.
2
+ Inspired by GPUMonitor, but with the pynvml method
3
+ named replaced by pyrsmi method names
4
+ """
5
+
6
+ import contextlib
7
+ import io
8
+ import logging
9
+ import sys
10
+
11
+ from mlflow.system_metrics.metrics.base_metrics_monitor import BaseMetricsMonitor
12
+
13
+ _logger = logging.getLogger(__name__)
14
+
15
+ is_rocml_available = False
16
+ try:
17
+ from pyrsmi import rocml
18
+
19
+ is_rocml_available = True
20
+ except ImportError:
21
+ # If `pyrsmi` is not installed, a warning will be logged at monitor instantiation.
22
+ # We don't log a warning here to avoid spamming warning at every import.
23
+ pass
24
+
25
+
26
+ class ROCMMonitor(BaseMetricsMonitor):
27
+ """
28
+ Class for monitoring AMD GPU stats. This is
29
+ class has been modified and has been inspired by
30
+ the original GPUMonitor class written by MLflow.
31
+ This class uses the package pyrsmi which is an
32
+ official ROCM python package which tracks and monitor
33
+ AMD GPU's, has been tested on AMD MI250x 128GB GPUs
34
+
35
+ For more information see:
36
+ https://github.com/ROCm/pyrsmi
37
+
38
+ PyPi package:
39
+ https://pypi.org/project/pyrsmi/
40
+
41
+
42
+ """
43
+
44
+ def __init__(self):
45
+ if "pyrsmi" not in sys.modules:
46
+ # Only instantiate if `pyrsmi` is installed.
47
+ raise ImportError(
48
+ "`pyrsmi` is not installed, to log GPU metrics please run `pip install pyrsmi` "
49
+ "to install it."
50
+ )
51
+
52
+ try:
53
+ rocml.smi_initialize()
54
+ except RuntimeError:
55
+ raise RuntimeError("Failed to initialize RSMI, skip logging GPU metrics")
56
+
57
+ super().__init__()
58
+
59
+ # Check if GPU is virtual. If so, collect power information from physical GPU
60
+ self.physical_idx = []
61
+ for i in range(rocml.smi_get_device_count()):
62
+ try:
63
+ self.raise_error(rocml.smi_get_device_average_power, i)
64
+ # physical GPU if no error is raised
65
+ self.physical_idx.append(i)
66
+ except SystemError:
67
+ # virtual if error is raised
68
+ # all virtual GPUs must share physical GPU with previous virtual/physical GPU
69
+ assert i >= 1
70
+ self.physical_idx.append(self.physical_idx[-1])
71
+
72
+ @staticmethod
73
+ def raise_error(func, *args, **kwargs):
74
+ """Raise error if message containing 'error' is printed out to stdout or stderr."""
75
+ stdout = io.StringIO()
76
+ stderr = io.StringIO()
77
+
78
+ with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
79
+ func(*args, **kwargs)
80
+
81
+ out = stdout.getvalue()
82
+ err = stderr.getvalue()
83
+
84
+ # Check if there is an error message in either stdout or stderr
85
+ if "error" in out.lower():
86
+ raise SystemError(out)
87
+ if "error" in err.lower():
88
+ raise SystemError(err)
89
+
90
+ def collect_metrics(self):
91
+ # Get GPU metrics.
92
+ self.num_gpus = rocml.smi_get_device_count()
93
+
94
+ for i in range(self.num_gpus):
95
+ memory_used = rocml.smi_get_device_memory_used(i)
96
+ memory_total = rocml.smi_get_device_memory_total(i)
97
+ self._metrics[f"gpu_{i}_memory_usage_percentage"].append(
98
+ round(memory_used / memory_total * 100, 1)
99
+ )
100
+ self._metrics[f"gpu_{i}_memory_usage_gigabytes"].append(memory_used / 1e9)
101
+
102
+ device_utilization = rocml.smi_get_device_utilization(i)
103
+ self._metrics[f"gpu_{i}_utilization_percentage"].append(device_utilization)
104
+
105
+ power_watts = rocml.smi_get_device_average_power(self.physical_idx[i])
106
+ power_capacity_watts = 500 # hard coded for now, should get this from rocm-smi
107
+ self._metrics[f"gpu_{i}_power_usage_watts"].append(power_watts)
108
+ self._metrics[f"gpu_{i}_power_usage_percentage"].append(
109
+ (power_watts / power_capacity_watts) * 100
110
+ )
111
+
112
+ # TODO:
113
+ # memory_busy (and other useful metrics) are available in pyrsmi>1.1.0.
114
+ # We are currently on pyrsmi==1.0.1, so these are not available
115
+ # memory_busy = rocml.smi_get_device_memory_busy(i)
116
+ # self._metrics[f"gpu_{i}_memory_busy_time_percent"].append(memory_busy)
117
+
118
+ def aggregate_metrics(self):
119
+ return {k: round(sum(v) / len(v), 1) for k, v in self._metrics.items()}
120
+
121
+ def __del__(self):
122
+ if is_rocml_available:
123
+ rocml.smi_shutdown()
@@ -0,0 +1,198 @@
1
+ """Class for monitoring system stats."""
2
+
3
+ import logging
4
+ import threading
5
+ from typing import Optional
6
+
7
+ from mlflow.environment_variables import (
8
+ MLFLOW_SYSTEM_METRICS_NODE_ID,
9
+ MLFLOW_SYSTEM_METRICS_SAMPLES_BEFORE_LOGGING,
10
+ MLFLOW_SYSTEM_METRICS_SAMPLING_INTERVAL,
11
+ )
12
+ from mlflow.exceptions import MlflowException
13
+ from mlflow.system_metrics.metrics.base_metrics_monitor import BaseMetricsMonitor
14
+ from mlflow.system_metrics.metrics.cpu_monitor import CPUMonitor
15
+ from mlflow.system_metrics.metrics.disk_monitor import DiskMonitor
16
+ from mlflow.system_metrics.metrics.gpu_monitor import GPUMonitor
17
+ from mlflow.system_metrics.metrics.network_monitor import NetworkMonitor
18
+ from mlflow.system_metrics.metrics.rocm_monitor import ROCMMonitor
19
+
20
+ _logger = logging.getLogger(__name__)
21
+
22
+
23
+ class SystemMetricsMonitor:
24
+ """Class for monitoring system stats.
25
+
26
+ This class is used for pulling system metrics and logging them to MLflow. Calling `start()` will
27
+ spawn a thread that logs system metrics periodically. Calling `finish()` will stop the thread.
28
+ Logging is done on a different frequency from pulling metrics, so that the metrics are
29
+ aggregated over the period. Users can change the logging frequency by setting
30
+ `MLFLOW_SYSTEM_METRICS_SAMPLING_INTERVAL` and `MLFLOW_SYSTEM_METRICS_SAMPLES_BEFORE_LOGGING`
31
+ environment variables, e.g., run `export MLFLOW_SYSTEM_METRICS_SAMPLING_INTERVAL=10` in terminal
32
+ will set the sampling interval to 10 seconds.
33
+
34
+ System metrics are logged with a prefix "system/", e.g., "system/cpu_utilization_percentage".
35
+
36
+ Args:
37
+ run_id: string, the MLflow run ID.
38
+ sampling_interval: float, default to 10. The interval (in seconds) at which to pull system
39
+ metrics. Will be overridden by `MLFLOW_SYSTEM_METRICS_SAMPLING_INTERVAL` environment
40
+ variable.
41
+ samples_before_logging: int, default to 1. The number of samples to aggregate before
42
+ logging. Will be overridden by `MLFLOW_SYSTEM_METRICS_SAMPLES_BEFORE_LOGGING`
43
+ evnironment variable.
44
+ resume_logging: bool, default to False. If True, we will resume the system metrics logging
45
+ from the `run_id`, and the first step to log will be the last step of `run_id` + 1, if
46
+ False, system metrics logging will start from step 0.
47
+ node_id: string, default to None. The node ID of the machine where the metrics are
48
+ collected. Will be overridden by `MLFLOW_SYSTEM_METRICS_NODE_ID`
49
+ evnironment variable. This is useful in multi-node training to distinguish the metrics
50
+ from different nodes. For example, if you set node_id to "node_0", the system metrics
51
+ getting logged will be of format "system/node_0/cpu_utilization_percentage".
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ run_id,
57
+ sampling_interval=10,
58
+ samples_before_logging=1,
59
+ resume_logging=False,
60
+ node_id=None,
61
+ ):
62
+ from mlflow.utils.autologging_utils import BatchMetricsLogger
63
+
64
+ # Instantiate default monitors.
65
+ self.monitors = [CPUMonitor(), DiskMonitor(), NetworkMonitor()]
66
+
67
+ if gpu_monitor := self._initialize_gpu_monitor():
68
+ self.monitors.append(gpu_monitor)
69
+
70
+ self.sampling_interval = MLFLOW_SYSTEM_METRICS_SAMPLING_INTERVAL.get() or sampling_interval
71
+ self.samples_before_logging = (
72
+ MLFLOW_SYSTEM_METRICS_SAMPLES_BEFORE_LOGGING.get() or samples_before_logging
73
+ )
74
+
75
+ self._run_id = run_id
76
+ self.mlflow_logger = BatchMetricsLogger(self._run_id)
77
+ self._shutdown_event = threading.Event()
78
+ self._process = None
79
+ self._metrics_prefix = "system/"
80
+ self.node_id = MLFLOW_SYSTEM_METRICS_NODE_ID.get() or node_id
81
+ self._logging_step = self._get_next_logging_step(run_id) if resume_logging else 0
82
+
83
+ def _get_next_logging_step(self, run_id):
84
+ from mlflow.tracking.client import MlflowClient
85
+
86
+ client = MlflowClient()
87
+ try:
88
+ run = client.get_run(run_id)
89
+ except MlflowException:
90
+ return 0
91
+ system_metric_name = None
92
+ for metric_name in run.data.metrics.keys():
93
+ if metric_name.startswith(self._metrics_prefix):
94
+ system_metric_name = metric_name
95
+ break
96
+ if system_metric_name is None:
97
+ return 0
98
+ metric_history = client.get_metric_history(run_id, system_metric_name)
99
+ return metric_history[-1].step + 1
100
+
101
+ def start(self):
102
+ """Start monitoring system metrics."""
103
+ try:
104
+ self._process = threading.Thread(
105
+ target=self.monitor,
106
+ daemon=True,
107
+ name="SystemMetricsMonitor",
108
+ )
109
+ self._process.start()
110
+ _logger.info("Started monitoring system metrics.")
111
+ except Exception as e:
112
+ _logger.warning(f"Failed to start monitoring system metrics: {e}")
113
+ self._process = None
114
+
115
+ def monitor(self):
116
+ """Main monitoring loop, which consistently collect and log system metrics."""
117
+ from mlflow.tracking.fluent import get_run
118
+
119
+ while not self._shutdown_event.is_set():
120
+ for _ in range(self.samples_before_logging):
121
+ self.collect_metrics()
122
+ self._shutdown_event.wait(self.sampling_interval)
123
+ try:
124
+ # Get the MLflow run to check if the run is not RUNNING.
125
+ run = get_run(self._run_id)
126
+ except Exception as e:
127
+ _logger.warning(f"Failed to get mlflow run: {e}.")
128
+ return
129
+ if run.info.status != "RUNNING" or self._shutdown_event.is_set():
130
+ # If the mlflow run is terminated or receives the shutdown signal, stop
131
+ # monitoring.
132
+ return
133
+ metrics = self.aggregate_metrics()
134
+ try:
135
+ self.publish_metrics(metrics)
136
+ except Exception as e:
137
+ _logger.warning(
138
+ f"Failed to log system metrics: {e}, this is expected if the experiment/run is "
139
+ "already terminated."
140
+ )
141
+ return
142
+
143
+ def collect_metrics(self):
144
+ """Collect system metrics."""
145
+ metrics = {}
146
+ for monitor in self.monitors:
147
+ monitor.collect_metrics()
148
+ metrics.update(monitor._metrics)
149
+ return metrics
150
+
151
+ def aggregate_metrics(self):
152
+ """Aggregate collected metrics."""
153
+ metrics = {}
154
+ for monitor in self.monitors:
155
+ metrics.update(monitor.aggregate_metrics())
156
+ return metrics
157
+
158
+ def publish_metrics(self, metrics):
159
+ """Log collected metrics to MLflow."""
160
+ # Add prefix "system/" to the metrics name for grouping. If `self.node_id` is not None, also
161
+ # add it to the metrics name.
162
+ prefix = self._metrics_prefix + (self.node_id + "/" if self.node_id else "")
163
+ metrics = {prefix + k: v for k, v in metrics.items()}
164
+
165
+ self.mlflow_logger.record_metrics(metrics, self._logging_step)
166
+ self._logging_step += 1
167
+ for monitor in self.monitors:
168
+ monitor.clear_metrics()
169
+
170
+ def finish(self):
171
+ """Stop monitoring system metrics."""
172
+ if self._process is None:
173
+ return
174
+ _logger.info("Stopping system metrics monitoring...")
175
+ self._shutdown_event.set()
176
+ try:
177
+ self._process.join()
178
+ self.mlflow_logger.flush()
179
+ _logger.info("Successfully terminated system metrics monitoring!")
180
+ except Exception as e:
181
+ _logger.error(f"Error terminating system metrics monitoring process: {e}.")
182
+ self._process = None
183
+
184
+ def _initialize_gpu_monitor(self) -> Optional[BaseMetricsMonitor]:
185
+ # NVIDIA GPU
186
+ try:
187
+ return GPUMonitor()
188
+ except Exception:
189
+ _logger.debug("Failed to initialize GPU monitor for NVIDIA GPU.", exc_info=True)
190
+
191
+ # Falling back to pyrocml (AMD/HIP GPU)
192
+ try:
193
+ return ROCMMonitor()
194
+ except Exception:
195
+ _logger.debug("Failed to initialize GPU monitor for AMD/HIP GPU.", exc_info=True)
196
+
197
+ _logger.info("Skip logging GPU metrics. Set logger level to DEBUG for more details.")
198
+ return None
@@ -0,0 +1,16 @@
1
+ from mlflow.tracing.config import configure
2
+ from mlflow.tracing.display import disable_notebook_display, enable_notebook_display
3
+ from mlflow.tracing.provider import disable, enable, reset, set_destination
4
+ from mlflow.tracing.utils import set_span_chat_messages, set_span_chat_tools
5
+
6
+ __all__ = [
7
+ "configure",
8
+ "disable",
9
+ "enable",
10
+ "disable_notebook_display",
11
+ "enable_notebook_display",
12
+ "set_span_chat_messages",
13
+ "set_span_chat_tools",
14
+ "set_destination",
15
+ "reset",
16
+ ]