genesis-flow 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (645) hide show
  1. genesis_flow-1.0.0.dist-info/METADATA +822 -0
  2. genesis_flow-1.0.0.dist-info/RECORD +645 -0
  3. genesis_flow-1.0.0.dist-info/WHEEL +5 -0
  4. genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
  5. genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
  6. genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
  7. mlflow/__init__.py +367 -0
  8. mlflow/__main__.py +3 -0
  9. mlflow/ag2/__init__.py +56 -0
  10. mlflow/ag2/ag2_logger.py +294 -0
  11. mlflow/anthropic/__init__.py +40 -0
  12. mlflow/anthropic/autolog.py +129 -0
  13. mlflow/anthropic/chat.py +144 -0
  14. mlflow/artifacts/__init__.py +268 -0
  15. mlflow/autogen/__init__.py +144 -0
  16. mlflow/autogen/chat.py +142 -0
  17. mlflow/azure/__init__.py +26 -0
  18. mlflow/azure/auth_handler.py +257 -0
  19. mlflow/azure/client.py +319 -0
  20. mlflow/azure/config.py +120 -0
  21. mlflow/azure/connection_factory.py +340 -0
  22. mlflow/azure/exceptions.py +27 -0
  23. mlflow/azure/stores.py +327 -0
  24. mlflow/azure/utils.py +183 -0
  25. mlflow/bedrock/__init__.py +45 -0
  26. mlflow/bedrock/_autolog.py +202 -0
  27. mlflow/bedrock/chat.py +122 -0
  28. mlflow/bedrock/stream.py +160 -0
  29. mlflow/bedrock/utils.py +43 -0
  30. mlflow/cli.py +707 -0
  31. mlflow/client.py +12 -0
  32. mlflow/config/__init__.py +56 -0
  33. mlflow/crewai/__init__.py +79 -0
  34. mlflow/crewai/autolog.py +253 -0
  35. mlflow/crewai/chat.py +29 -0
  36. mlflow/data/__init__.py +75 -0
  37. mlflow/data/artifact_dataset_sources.py +170 -0
  38. mlflow/data/code_dataset_source.py +40 -0
  39. mlflow/data/dataset.py +123 -0
  40. mlflow/data/dataset_registry.py +168 -0
  41. mlflow/data/dataset_source.py +110 -0
  42. mlflow/data/dataset_source_registry.py +219 -0
  43. mlflow/data/delta_dataset_source.py +167 -0
  44. mlflow/data/digest_utils.py +108 -0
  45. mlflow/data/evaluation_dataset.py +562 -0
  46. mlflow/data/filesystem_dataset_source.py +81 -0
  47. mlflow/data/http_dataset_source.py +145 -0
  48. mlflow/data/huggingface_dataset.py +258 -0
  49. mlflow/data/huggingface_dataset_source.py +118 -0
  50. mlflow/data/meta_dataset.py +104 -0
  51. mlflow/data/numpy_dataset.py +223 -0
  52. mlflow/data/pandas_dataset.py +231 -0
  53. mlflow/data/polars_dataset.py +352 -0
  54. mlflow/data/pyfunc_dataset_mixin.py +31 -0
  55. mlflow/data/schema.py +76 -0
  56. mlflow/data/sources.py +1 -0
  57. mlflow/data/spark_dataset.py +406 -0
  58. mlflow/data/spark_dataset_source.py +74 -0
  59. mlflow/data/spark_delta_utils.py +118 -0
  60. mlflow/data/tensorflow_dataset.py +350 -0
  61. mlflow/data/uc_volume_dataset_source.py +81 -0
  62. mlflow/db.py +27 -0
  63. mlflow/dspy/__init__.py +17 -0
  64. mlflow/dspy/autolog.py +197 -0
  65. mlflow/dspy/callback.py +398 -0
  66. mlflow/dspy/constant.py +1 -0
  67. mlflow/dspy/load.py +93 -0
  68. mlflow/dspy/save.py +393 -0
  69. mlflow/dspy/util.py +109 -0
  70. mlflow/dspy/wrapper.py +226 -0
  71. mlflow/entities/__init__.py +104 -0
  72. mlflow/entities/_mlflow_object.py +52 -0
  73. mlflow/entities/assessment.py +545 -0
  74. mlflow/entities/assessment_error.py +80 -0
  75. mlflow/entities/assessment_source.py +141 -0
  76. mlflow/entities/dataset.py +92 -0
  77. mlflow/entities/dataset_input.py +51 -0
  78. mlflow/entities/dataset_summary.py +62 -0
  79. mlflow/entities/document.py +48 -0
  80. mlflow/entities/experiment.py +109 -0
  81. mlflow/entities/experiment_tag.py +35 -0
  82. mlflow/entities/file_info.py +45 -0
  83. mlflow/entities/input_tag.py +35 -0
  84. mlflow/entities/lifecycle_stage.py +35 -0
  85. mlflow/entities/logged_model.py +228 -0
  86. mlflow/entities/logged_model_input.py +26 -0
  87. mlflow/entities/logged_model_output.py +32 -0
  88. mlflow/entities/logged_model_parameter.py +46 -0
  89. mlflow/entities/logged_model_status.py +74 -0
  90. mlflow/entities/logged_model_tag.py +33 -0
  91. mlflow/entities/metric.py +200 -0
  92. mlflow/entities/model_registry/__init__.py +29 -0
  93. mlflow/entities/model_registry/_model_registry_entity.py +13 -0
  94. mlflow/entities/model_registry/model_version.py +243 -0
  95. mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
  96. mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
  97. mlflow/entities/model_registry/model_version_search.py +25 -0
  98. mlflow/entities/model_registry/model_version_stages.py +25 -0
  99. mlflow/entities/model_registry/model_version_status.py +35 -0
  100. mlflow/entities/model_registry/model_version_tag.py +35 -0
  101. mlflow/entities/model_registry/prompt.py +73 -0
  102. mlflow/entities/model_registry/prompt_version.py +244 -0
  103. mlflow/entities/model_registry/registered_model.py +175 -0
  104. mlflow/entities/model_registry/registered_model_alias.py +35 -0
  105. mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
  106. mlflow/entities/model_registry/registered_model_search.py +25 -0
  107. mlflow/entities/model_registry/registered_model_tag.py +35 -0
  108. mlflow/entities/multipart_upload.py +74 -0
  109. mlflow/entities/param.py +49 -0
  110. mlflow/entities/run.py +97 -0
  111. mlflow/entities/run_data.py +84 -0
  112. mlflow/entities/run_info.py +188 -0
  113. mlflow/entities/run_inputs.py +59 -0
  114. mlflow/entities/run_outputs.py +43 -0
  115. mlflow/entities/run_status.py +41 -0
  116. mlflow/entities/run_tag.py +36 -0
  117. mlflow/entities/source_type.py +31 -0
  118. mlflow/entities/span.py +774 -0
  119. mlflow/entities/span_event.py +96 -0
  120. mlflow/entities/span_status.py +102 -0
  121. mlflow/entities/trace.py +317 -0
  122. mlflow/entities/trace_data.py +71 -0
  123. mlflow/entities/trace_info.py +220 -0
  124. mlflow/entities/trace_info_v2.py +162 -0
  125. mlflow/entities/trace_location.py +173 -0
  126. mlflow/entities/trace_state.py +39 -0
  127. mlflow/entities/trace_status.py +68 -0
  128. mlflow/entities/view_type.py +51 -0
  129. mlflow/environment_variables.py +866 -0
  130. mlflow/evaluation/__init__.py +16 -0
  131. mlflow/evaluation/assessment.py +369 -0
  132. mlflow/evaluation/evaluation.py +411 -0
  133. mlflow/evaluation/evaluation_tag.py +61 -0
  134. mlflow/evaluation/fluent.py +48 -0
  135. mlflow/evaluation/utils.py +201 -0
  136. mlflow/exceptions.py +213 -0
  137. mlflow/experiments.py +140 -0
  138. mlflow/gemini/__init__.py +81 -0
  139. mlflow/gemini/autolog.py +186 -0
  140. mlflow/gemini/chat.py +261 -0
  141. mlflow/genai/__init__.py +71 -0
  142. mlflow/genai/datasets/__init__.py +67 -0
  143. mlflow/genai/datasets/evaluation_dataset.py +131 -0
  144. mlflow/genai/evaluation/__init__.py +3 -0
  145. mlflow/genai/evaluation/base.py +411 -0
  146. mlflow/genai/evaluation/constant.py +23 -0
  147. mlflow/genai/evaluation/utils.py +244 -0
  148. mlflow/genai/judges/__init__.py +21 -0
  149. mlflow/genai/judges/databricks.py +404 -0
  150. mlflow/genai/label_schemas/__init__.py +153 -0
  151. mlflow/genai/label_schemas/label_schemas.py +209 -0
  152. mlflow/genai/labeling/__init__.py +159 -0
  153. mlflow/genai/labeling/labeling.py +250 -0
  154. mlflow/genai/optimize/__init__.py +13 -0
  155. mlflow/genai/optimize/base.py +198 -0
  156. mlflow/genai/optimize/optimizers/__init__.py +4 -0
  157. mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
  158. mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
  159. mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
  160. mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
  161. mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
  162. mlflow/genai/optimize/types.py +75 -0
  163. mlflow/genai/optimize/util.py +30 -0
  164. mlflow/genai/prompts/__init__.py +206 -0
  165. mlflow/genai/scheduled_scorers.py +431 -0
  166. mlflow/genai/scorers/__init__.py +26 -0
  167. mlflow/genai/scorers/base.py +492 -0
  168. mlflow/genai/scorers/builtin_scorers.py +765 -0
  169. mlflow/genai/scorers/scorer_utils.py +138 -0
  170. mlflow/genai/scorers/validation.py +165 -0
  171. mlflow/genai/utils/data_validation.py +146 -0
  172. mlflow/genai/utils/enum_utils.py +23 -0
  173. mlflow/genai/utils/trace_utils.py +211 -0
  174. mlflow/groq/__init__.py +42 -0
  175. mlflow/groq/_groq_autolog.py +74 -0
  176. mlflow/johnsnowlabs/__init__.py +888 -0
  177. mlflow/langchain/__init__.py +24 -0
  178. mlflow/langchain/api_request_parallel_processor.py +330 -0
  179. mlflow/langchain/autolog.py +147 -0
  180. mlflow/langchain/chat_agent_langgraph.py +340 -0
  181. mlflow/langchain/constant.py +1 -0
  182. mlflow/langchain/constants.py +1 -0
  183. mlflow/langchain/databricks_dependencies.py +444 -0
  184. mlflow/langchain/langchain_tracer.py +597 -0
  185. mlflow/langchain/model.py +919 -0
  186. mlflow/langchain/output_parsers.py +142 -0
  187. mlflow/langchain/retriever_chain.py +153 -0
  188. mlflow/langchain/runnables.py +527 -0
  189. mlflow/langchain/utils/chat.py +402 -0
  190. mlflow/langchain/utils/logging.py +671 -0
  191. mlflow/langchain/utils/serialization.py +36 -0
  192. mlflow/legacy_databricks_cli/__init__.py +0 -0
  193. mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
  194. mlflow/legacy_databricks_cli/configure/provider.py +482 -0
  195. mlflow/litellm/__init__.py +175 -0
  196. mlflow/llama_index/__init__.py +22 -0
  197. mlflow/llama_index/autolog.py +55 -0
  198. mlflow/llama_index/chat.py +43 -0
  199. mlflow/llama_index/constant.py +1 -0
  200. mlflow/llama_index/model.py +577 -0
  201. mlflow/llama_index/pyfunc_wrapper.py +332 -0
  202. mlflow/llama_index/serialize_objects.py +188 -0
  203. mlflow/llama_index/tracer.py +561 -0
  204. mlflow/metrics/__init__.py +479 -0
  205. mlflow/metrics/base.py +39 -0
  206. mlflow/metrics/genai/__init__.py +25 -0
  207. mlflow/metrics/genai/base.py +101 -0
  208. mlflow/metrics/genai/genai_metric.py +771 -0
  209. mlflow/metrics/genai/metric_definitions.py +450 -0
  210. mlflow/metrics/genai/model_utils.py +371 -0
  211. mlflow/metrics/genai/prompt_template.py +68 -0
  212. mlflow/metrics/genai/prompts/__init__.py +0 -0
  213. mlflow/metrics/genai/prompts/v1.py +422 -0
  214. mlflow/metrics/genai/utils.py +6 -0
  215. mlflow/metrics/metric_definitions.py +619 -0
  216. mlflow/mismatch.py +34 -0
  217. mlflow/mistral/__init__.py +34 -0
  218. mlflow/mistral/autolog.py +71 -0
  219. mlflow/mistral/chat.py +135 -0
  220. mlflow/ml_package_versions.py +452 -0
  221. mlflow/models/__init__.py +97 -0
  222. mlflow/models/auth_policy.py +83 -0
  223. mlflow/models/cli.py +354 -0
  224. mlflow/models/container/__init__.py +294 -0
  225. mlflow/models/container/scoring_server/__init__.py +0 -0
  226. mlflow/models/container/scoring_server/nginx.conf +39 -0
  227. mlflow/models/dependencies_schemas.py +287 -0
  228. mlflow/models/display_utils.py +158 -0
  229. mlflow/models/docker_utils.py +211 -0
  230. mlflow/models/evaluation/__init__.py +23 -0
  231. mlflow/models/evaluation/_shap_patch.py +64 -0
  232. mlflow/models/evaluation/artifacts.py +194 -0
  233. mlflow/models/evaluation/base.py +1811 -0
  234. mlflow/models/evaluation/calibration_curve.py +109 -0
  235. mlflow/models/evaluation/default_evaluator.py +996 -0
  236. mlflow/models/evaluation/deprecated.py +23 -0
  237. mlflow/models/evaluation/evaluator_registry.py +80 -0
  238. mlflow/models/evaluation/evaluators/classifier.py +704 -0
  239. mlflow/models/evaluation/evaluators/default.py +233 -0
  240. mlflow/models/evaluation/evaluators/regressor.py +96 -0
  241. mlflow/models/evaluation/evaluators/shap.py +296 -0
  242. mlflow/models/evaluation/lift_curve.py +178 -0
  243. mlflow/models/evaluation/utils/metric.py +123 -0
  244. mlflow/models/evaluation/utils/trace.py +179 -0
  245. mlflow/models/evaluation/validation.py +434 -0
  246. mlflow/models/flavor_backend.py +93 -0
  247. mlflow/models/flavor_backend_registry.py +53 -0
  248. mlflow/models/model.py +1639 -0
  249. mlflow/models/model_config.py +150 -0
  250. mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
  251. mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
  252. mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
  253. mlflow/models/python_api.py +369 -0
  254. mlflow/models/rag_signatures.py +128 -0
  255. mlflow/models/resources.py +321 -0
  256. mlflow/models/signature.py +662 -0
  257. mlflow/models/utils.py +2054 -0
  258. mlflow/models/wheeled_model.py +280 -0
  259. mlflow/openai/__init__.py +57 -0
  260. mlflow/openai/_agent_tracer.py +364 -0
  261. mlflow/openai/api_request_parallel_processor.py +131 -0
  262. mlflow/openai/autolog.py +509 -0
  263. mlflow/openai/constant.py +1 -0
  264. mlflow/openai/model.py +824 -0
  265. mlflow/openai/utils/chat_schema.py +367 -0
  266. mlflow/optuna/__init__.py +3 -0
  267. mlflow/optuna/storage.py +646 -0
  268. mlflow/plugins/__init__.py +72 -0
  269. mlflow/plugins/base.py +358 -0
  270. mlflow/plugins/builtin/__init__.py +24 -0
  271. mlflow/plugins/builtin/pytorch_plugin.py +150 -0
  272. mlflow/plugins/builtin/sklearn_plugin.py +158 -0
  273. mlflow/plugins/builtin/transformers_plugin.py +187 -0
  274. mlflow/plugins/cli.py +321 -0
  275. mlflow/plugins/discovery.py +340 -0
  276. mlflow/plugins/manager.py +465 -0
  277. mlflow/plugins/registry.py +316 -0
  278. mlflow/plugins/templates/framework_plugin_template.py +329 -0
  279. mlflow/prompt/constants.py +20 -0
  280. mlflow/prompt/promptlab_model.py +197 -0
  281. mlflow/prompt/registry_utils.py +248 -0
  282. mlflow/promptflow/__init__.py +495 -0
  283. mlflow/protos/__init__.py +0 -0
  284. mlflow/protos/assessments_pb2.py +174 -0
  285. mlflow/protos/databricks_artifacts_pb2.py +489 -0
  286. mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
  287. mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
  288. mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
  289. mlflow/protos/databricks_pb2.py +267 -0
  290. mlflow/protos/databricks_trace_server_pb2.py +374 -0
  291. mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
  292. mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
  293. mlflow/protos/facet_feature_statistics_pb2.py +296 -0
  294. mlflow/protos/internal_pb2.py +77 -0
  295. mlflow/protos/mlflow_artifacts_pb2.py +336 -0
  296. mlflow/protos/model_registry_pb2.py +1073 -0
  297. mlflow/protos/scalapb/__init__.py +0 -0
  298. mlflow/protos/scalapb/scalapb_pb2.py +104 -0
  299. mlflow/protos/service_pb2.py +2600 -0
  300. mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
  301. mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
  302. mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
  303. mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
  304. mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
  305. mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
  306. mlflow/py.typed +0 -0
  307. mlflow/pydantic_ai/__init__.py +57 -0
  308. mlflow/pydantic_ai/autolog.py +173 -0
  309. mlflow/pyfunc/__init__.py +3844 -0
  310. mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
  311. mlflow/pyfunc/backend.py +523 -0
  312. mlflow/pyfunc/context.py +78 -0
  313. mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
  314. mlflow/pyfunc/loaders/__init__.py +7 -0
  315. mlflow/pyfunc/loaders/chat_agent.py +117 -0
  316. mlflow/pyfunc/loaders/chat_model.py +125 -0
  317. mlflow/pyfunc/loaders/code_model.py +31 -0
  318. mlflow/pyfunc/loaders/responses_agent.py +112 -0
  319. mlflow/pyfunc/mlserver.py +46 -0
  320. mlflow/pyfunc/model.py +1473 -0
  321. mlflow/pyfunc/scoring_server/__init__.py +604 -0
  322. mlflow/pyfunc/scoring_server/app.py +7 -0
  323. mlflow/pyfunc/scoring_server/client.py +146 -0
  324. mlflow/pyfunc/spark_model_cache.py +48 -0
  325. mlflow/pyfunc/stdin_server.py +44 -0
  326. mlflow/pyfunc/utils/__init__.py +3 -0
  327. mlflow/pyfunc/utils/data_validation.py +224 -0
  328. mlflow/pyfunc/utils/environment.py +22 -0
  329. mlflow/pyfunc/utils/input_converter.py +47 -0
  330. mlflow/pyfunc/utils/serving_data_parser.py +11 -0
  331. mlflow/pytorch/__init__.py +1171 -0
  332. mlflow/pytorch/_lightning_autolog.py +580 -0
  333. mlflow/pytorch/_pytorch_autolog.py +50 -0
  334. mlflow/pytorch/pickle_module.py +35 -0
  335. mlflow/rfunc/__init__.py +42 -0
  336. mlflow/rfunc/backend.py +134 -0
  337. mlflow/runs.py +89 -0
  338. mlflow/server/__init__.py +302 -0
  339. mlflow/server/auth/__init__.py +1224 -0
  340. mlflow/server/auth/__main__.py +4 -0
  341. mlflow/server/auth/basic_auth.ini +6 -0
  342. mlflow/server/auth/cli.py +11 -0
  343. mlflow/server/auth/client.py +537 -0
  344. mlflow/server/auth/config.py +34 -0
  345. mlflow/server/auth/db/__init__.py +0 -0
  346. mlflow/server/auth/db/cli.py +18 -0
  347. mlflow/server/auth/db/migrations/__init__.py +0 -0
  348. mlflow/server/auth/db/migrations/alembic.ini +110 -0
  349. mlflow/server/auth/db/migrations/env.py +76 -0
  350. mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
  351. mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
  352. mlflow/server/auth/db/models.py +67 -0
  353. mlflow/server/auth/db/utils.py +37 -0
  354. mlflow/server/auth/entities.py +165 -0
  355. mlflow/server/auth/logo.py +14 -0
  356. mlflow/server/auth/permissions.py +65 -0
  357. mlflow/server/auth/routes.py +18 -0
  358. mlflow/server/auth/sqlalchemy_store.py +263 -0
  359. mlflow/server/graphql/__init__.py +0 -0
  360. mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
  361. mlflow/server/graphql/graphql_custom_scalars.py +24 -0
  362. mlflow/server/graphql/graphql_errors.py +15 -0
  363. mlflow/server/graphql/graphql_no_batching.py +89 -0
  364. mlflow/server/graphql/graphql_schema_extensions.py +74 -0
  365. mlflow/server/handlers.py +3217 -0
  366. mlflow/server/prometheus_exporter.py +17 -0
  367. mlflow/server/validation.py +30 -0
  368. mlflow/shap/__init__.py +691 -0
  369. mlflow/sklearn/__init__.py +1994 -0
  370. mlflow/sklearn/utils.py +1041 -0
  371. mlflow/smolagents/__init__.py +66 -0
  372. mlflow/smolagents/autolog.py +139 -0
  373. mlflow/smolagents/chat.py +29 -0
  374. mlflow/store/__init__.py +10 -0
  375. mlflow/store/_unity_catalog/__init__.py +1 -0
  376. mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
  377. mlflow/store/_unity_catalog/lineage/constants.py +2 -0
  378. mlflow/store/_unity_catalog/registry/__init__.py +6 -0
  379. mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
  380. mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
  381. mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
  382. mlflow/store/_unity_catalog/registry/utils.py +121 -0
  383. mlflow/store/artifact/__init__.py +0 -0
  384. mlflow/store/artifact/artifact_repo.py +472 -0
  385. mlflow/store/artifact/artifact_repository_registry.py +154 -0
  386. mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
  387. mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
  388. mlflow/store/artifact/cli.py +141 -0
  389. mlflow/store/artifact/cloud_artifact_repo.py +332 -0
  390. mlflow/store/artifact/databricks_artifact_repo.py +729 -0
  391. mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
  392. mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
  393. mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
  394. mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
  395. mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
  396. mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
  397. mlflow/store/artifact/ftp_artifact_repo.py +132 -0
  398. mlflow/store/artifact/gcs_artifact_repo.py +296 -0
  399. mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
  400. mlflow/store/artifact/http_artifact_repo.py +218 -0
  401. mlflow/store/artifact/local_artifact_repo.py +142 -0
  402. mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
  403. mlflow/store/artifact/models_artifact_repo.py +259 -0
  404. mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
  405. mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
  406. mlflow/store/artifact/r2_artifact_repo.py +70 -0
  407. mlflow/store/artifact/runs_artifact_repo.py +265 -0
  408. mlflow/store/artifact/s3_artifact_repo.py +330 -0
  409. mlflow/store/artifact/sftp_artifact_repo.py +141 -0
  410. mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
  411. mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
  412. mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
  413. mlflow/store/artifact/utils/__init__.py +0 -0
  414. mlflow/store/artifact/utils/models.py +148 -0
  415. mlflow/store/db/__init__.py +0 -0
  416. mlflow/store/db/base_sql_model.py +3 -0
  417. mlflow/store/db/db_types.py +10 -0
  418. mlflow/store/db/utils.py +314 -0
  419. mlflow/store/db_migrations/__init__.py +0 -0
  420. mlflow/store/db_migrations/alembic.ini +74 -0
  421. mlflow/store/db_migrations/env.py +84 -0
  422. mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
  423. mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
  424. mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
  425. mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
  426. mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
  427. mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
  428. mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
  429. mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
  430. mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
  431. mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
  432. mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
  433. mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
  434. mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
  435. mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
  436. mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
  437. mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
  438. mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
  439. mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
  440. mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
  441. mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
  442. mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
  443. mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
  444. mlflow/store/db_migrations/versions/__init__.py +0 -0
  445. mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
  446. mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
  447. mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
  448. mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
  449. mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
  450. mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
  451. mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
  452. mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
  453. mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
  454. mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
  455. mlflow/store/entities/__init__.py +3 -0
  456. mlflow/store/entities/paged_list.py +18 -0
  457. mlflow/store/model_registry/__init__.py +10 -0
  458. mlflow/store/model_registry/abstract_store.py +1081 -0
  459. mlflow/store/model_registry/base_rest_store.py +44 -0
  460. mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
  461. mlflow/store/model_registry/dbmodels/__init__.py +0 -0
  462. mlflow/store/model_registry/dbmodels/models.py +206 -0
  463. mlflow/store/model_registry/file_store.py +1091 -0
  464. mlflow/store/model_registry/rest_store.py +481 -0
  465. mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
  466. mlflow/store/tracking/__init__.py +23 -0
  467. mlflow/store/tracking/abstract_store.py +816 -0
  468. mlflow/store/tracking/dbmodels/__init__.py +0 -0
  469. mlflow/store/tracking/dbmodels/initial_models.py +243 -0
  470. mlflow/store/tracking/dbmodels/models.py +1073 -0
  471. mlflow/store/tracking/file_store.py +2438 -0
  472. mlflow/store/tracking/postgres_managed_identity.py +146 -0
  473. mlflow/store/tracking/rest_store.py +1131 -0
  474. mlflow/store/tracking/sqlalchemy_store.py +2785 -0
  475. mlflow/system_metrics/__init__.py +61 -0
  476. mlflow/system_metrics/metrics/__init__.py +0 -0
  477. mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
  478. mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
  479. mlflow/system_metrics/metrics/disk_monitor.py +21 -0
  480. mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
  481. mlflow/system_metrics/metrics/network_monitor.py +34 -0
  482. mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
  483. mlflow/system_metrics/system_metrics_monitor.py +198 -0
  484. mlflow/tracing/__init__.py +16 -0
  485. mlflow/tracing/assessment.py +356 -0
  486. mlflow/tracing/client.py +531 -0
  487. mlflow/tracing/config.py +125 -0
  488. mlflow/tracing/constant.py +105 -0
  489. mlflow/tracing/destination.py +81 -0
  490. mlflow/tracing/display/__init__.py +40 -0
  491. mlflow/tracing/display/display_handler.py +196 -0
  492. mlflow/tracing/export/async_export_queue.py +186 -0
  493. mlflow/tracing/export/inference_table.py +138 -0
  494. mlflow/tracing/export/mlflow_v3.py +137 -0
  495. mlflow/tracing/export/utils.py +70 -0
  496. mlflow/tracing/fluent.py +1417 -0
  497. mlflow/tracing/processor/base_mlflow.py +199 -0
  498. mlflow/tracing/processor/inference_table.py +175 -0
  499. mlflow/tracing/processor/mlflow_v3.py +47 -0
  500. mlflow/tracing/processor/otel.py +73 -0
  501. mlflow/tracing/provider.py +487 -0
  502. mlflow/tracing/trace_manager.py +200 -0
  503. mlflow/tracing/utils/__init__.py +616 -0
  504. mlflow/tracing/utils/artifact_utils.py +28 -0
  505. mlflow/tracing/utils/copy.py +55 -0
  506. mlflow/tracing/utils/environment.py +55 -0
  507. mlflow/tracing/utils/exception.py +21 -0
  508. mlflow/tracing/utils/once.py +35 -0
  509. mlflow/tracing/utils/otlp.py +63 -0
  510. mlflow/tracing/utils/processor.py +54 -0
  511. mlflow/tracing/utils/search.py +292 -0
  512. mlflow/tracing/utils/timeout.py +250 -0
  513. mlflow/tracing/utils/token.py +19 -0
  514. mlflow/tracing/utils/truncation.py +124 -0
  515. mlflow/tracing/utils/warning.py +76 -0
  516. mlflow/tracking/__init__.py +39 -0
  517. mlflow/tracking/_model_registry/__init__.py +1 -0
  518. mlflow/tracking/_model_registry/client.py +764 -0
  519. mlflow/tracking/_model_registry/fluent.py +853 -0
  520. mlflow/tracking/_model_registry/registry.py +67 -0
  521. mlflow/tracking/_model_registry/utils.py +251 -0
  522. mlflow/tracking/_tracking_service/__init__.py +0 -0
  523. mlflow/tracking/_tracking_service/client.py +883 -0
  524. mlflow/tracking/_tracking_service/registry.py +56 -0
  525. mlflow/tracking/_tracking_service/utils.py +275 -0
  526. mlflow/tracking/artifact_utils.py +179 -0
  527. mlflow/tracking/client.py +5900 -0
  528. mlflow/tracking/context/__init__.py +0 -0
  529. mlflow/tracking/context/abstract_context.py +35 -0
  530. mlflow/tracking/context/databricks_cluster_context.py +15 -0
  531. mlflow/tracking/context/databricks_command_context.py +15 -0
  532. mlflow/tracking/context/databricks_job_context.py +49 -0
  533. mlflow/tracking/context/databricks_notebook_context.py +41 -0
  534. mlflow/tracking/context/databricks_repo_context.py +43 -0
  535. mlflow/tracking/context/default_context.py +51 -0
  536. mlflow/tracking/context/git_context.py +32 -0
  537. mlflow/tracking/context/registry.py +98 -0
  538. mlflow/tracking/context/system_environment_context.py +15 -0
  539. mlflow/tracking/default_experiment/__init__.py +1 -0
  540. mlflow/tracking/default_experiment/abstract_context.py +43 -0
  541. mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
  542. mlflow/tracking/default_experiment/registry.py +75 -0
  543. mlflow/tracking/fluent.py +3595 -0
  544. mlflow/tracking/metric_value_conversion_utils.py +93 -0
  545. mlflow/tracking/multimedia.py +206 -0
  546. mlflow/tracking/registry.py +86 -0
  547. mlflow/tracking/request_auth/__init__.py +0 -0
  548. mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
  549. mlflow/tracking/request_auth/registry.py +60 -0
  550. mlflow/tracking/request_header/__init__.py +0 -0
  551. mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
  552. mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
  553. mlflow/tracking/request_header/default_request_header_provider.py +17 -0
  554. mlflow/tracking/request_header/registry.py +79 -0
  555. mlflow/transformers/__init__.py +2982 -0
  556. mlflow/transformers/flavor_config.py +258 -0
  557. mlflow/transformers/hub_utils.py +83 -0
  558. mlflow/transformers/llm_inference_utils.py +468 -0
  559. mlflow/transformers/model_io.py +301 -0
  560. mlflow/transformers/peft.py +51 -0
  561. mlflow/transformers/signature.py +183 -0
  562. mlflow/transformers/torch_utils.py +55 -0
  563. mlflow/types/__init__.py +21 -0
  564. mlflow/types/agent.py +270 -0
  565. mlflow/types/chat.py +240 -0
  566. mlflow/types/llm.py +935 -0
  567. mlflow/types/responses.py +139 -0
  568. mlflow/types/responses_helpers.py +416 -0
  569. mlflow/types/schema.py +1505 -0
  570. mlflow/types/type_hints.py +647 -0
  571. mlflow/types/utils.py +753 -0
  572. mlflow/utils/__init__.py +283 -0
  573. mlflow/utils/_capture_modules.py +256 -0
  574. mlflow/utils/_capture_transformers_modules.py +75 -0
  575. mlflow/utils/_spark_utils.py +201 -0
  576. mlflow/utils/_unity_catalog_oss_utils.py +97 -0
  577. mlflow/utils/_unity_catalog_utils.py +479 -0
  578. mlflow/utils/annotations.py +218 -0
  579. mlflow/utils/arguments_utils.py +16 -0
  580. mlflow/utils/async_logging/__init__.py +1 -0
  581. mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
  582. mlflow/utils/async_logging/async_logging_queue.py +366 -0
  583. mlflow/utils/async_logging/run_artifact.py +38 -0
  584. mlflow/utils/async_logging/run_batch.py +58 -0
  585. mlflow/utils/async_logging/run_operations.py +49 -0
  586. mlflow/utils/autologging_utils/__init__.py +737 -0
  587. mlflow/utils/autologging_utils/client.py +432 -0
  588. mlflow/utils/autologging_utils/config.py +33 -0
  589. mlflow/utils/autologging_utils/events.py +294 -0
  590. mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
  591. mlflow/utils/autologging_utils/metrics_queue.py +71 -0
  592. mlflow/utils/autologging_utils/safety.py +1104 -0
  593. mlflow/utils/autologging_utils/versioning.py +95 -0
  594. mlflow/utils/checkpoint_utils.py +206 -0
  595. mlflow/utils/class_utils.py +6 -0
  596. mlflow/utils/cli_args.py +257 -0
  597. mlflow/utils/conda.py +354 -0
  598. mlflow/utils/credentials.py +231 -0
  599. mlflow/utils/data_utils.py +17 -0
  600. mlflow/utils/databricks_utils.py +1436 -0
  601. mlflow/utils/docstring_utils.py +477 -0
  602. mlflow/utils/doctor.py +133 -0
  603. mlflow/utils/download_cloud_file_chunk.py +43 -0
  604. mlflow/utils/env_manager.py +16 -0
  605. mlflow/utils/env_pack.py +131 -0
  606. mlflow/utils/environment.py +1009 -0
  607. mlflow/utils/exception_utils.py +14 -0
  608. mlflow/utils/file_utils.py +978 -0
  609. mlflow/utils/git_utils.py +77 -0
  610. mlflow/utils/gorilla.py +797 -0
  611. mlflow/utils/import_hooks/__init__.py +363 -0
  612. mlflow/utils/lazy_load.py +51 -0
  613. mlflow/utils/logging_utils.py +168 -0
  614. mlflow/utils/mime_type_utils.py +58 -0
  615. mlflow/utils/mlflow_tags.py +103 -0
  616. mlflow/utils/model_utils.py +486 -0
  617. mlflow/utils/name_utils.py +346 -0
  618. mlflow/utils/nfs_on_spark.py +62 -0
  619. mlflow/utils/openai_utils.py +164 -0
  620. mlflow/utils/os.py +12 -0
  621. mlflow/utils/oss_registry_utils.py +29 -0
  622. mlflow/utils/plugins.py +17 -0
  623. mlflow/utils/process.py +182 -0
  624. mlflow/utils/promptlab_utils.py +146 -0
  625. mlflow/utils/proto_json_utils.py +743 -0
  626. mlflow/utils/pydantic_utils.py +54 -0
  627. mlflow/utils/request_utils.py +279 -0
  628. mlflow/utils/requirements_utils.py +704 -0
  629. mlflow/utils/rest_utils.py +673 -0
  630. mlflow/utils/search_logged_model_utils.py +127 -0
  631. mlflow/utils/search_utils.py +2111 -0
  632. mlflow/utils/secure_loading.py +221 -0
  633. mlflow/utils/security_validation.py +384 -0
  634. mlflow/utils/server_cli_utils.py +61 -0
  635. mlflow/utils/spark_utils.py +15 -0
  636. mlflow/utils/string_utils.py +138 -0
  637. mlflow/utils/thread_utils.py +63 -0
  638. mlflow/utils/time.py +54 -0
  639. mlflow/utils/timeout.py +42 -0
  640. mlflow/utils/uri.py +572 -0
  641. mlflow/utils/validation.py +662 -0
  642. mlflow/utils/virtualenv.py +458 -0
  643. mlflow/utils/warnings_utils.py +25 -0
  644. mlflow/utils/yaml_utils.py +179 -0
  645. mlflow/version.py +24 -0
@@ -0,0 +1,562 @@
1
+ import hashlib
2
+ import json
3
+ import logging
4
+ import math
5
+ import struct
6
+ import sys
7
+
8
+ from packaging.version import Version
9
+
10
+ import mlflow
11
+ from mlflow.entities import RunTag
12
+ from mlflow.exceptions import MlflowException
13
+ from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
14
+ from mlflow.utils.string_utils import generate_feature_name_if_not_string
15
+
16
+ try:
17
+ # `numpy` and `pandas` are not required for `mlflow-skinny`.
18
+ import numpy as np
19
+ import pandas as pd
20
+ except ImportError:
21
+ pass
22
+
23
+ _logger = logging.getLogger(__name__)
24
+
25
+
26
+ def _hash_uint64_ndarray_as_bytes(array):
27
+ assert len(array.shape) == 1
28
+ # see struct pack format string https://docs.python.org/3/library/struct.html#format-strings
29
+ return struct.pack(f">{array.size}Q", *array)
30
+
31
+
32
+ def _is_empty_list_or_array(data):
33
+ if isinstance(data, list):
34
+ return len(data) == 0
35
+ elif isinstance(data, np.ndarray):
36
+ return data.size == 0
37
+ return False
38
+
39
+
40
+ def _is_array_has_dict(nd_array):
41
+ if _is_empty_list_or_array(nd_array):
42
+ return False
43
+
44
+ # It is less likely the array or list contains heterogeneous elements, so just checking the
45
+ # first element to avoid performance overhead.
46
+ elm = nd_array.item(0)
47
+ if isinstance(elm, (list, np.ndarray)):
48
+ return _is_array_has_dict(elm)
49
+ elif isinstance(elm, dict):
50
+ return True
51
+
52
+ return False
53
+
54
+
55
+ def _hash_array_of_dict_as_bytes(data):
56
+ # NB: If an array or list contains dictionary element, it can't be hashed with
57
+ # pandas.util.hash_array. Hence we need to manually hash the elements here. This is
58
+ # particularly for the LLM use case where the input can be a list of dictionary
59
+ # (chat/completion payloads), so doesn't handle more complex case like nested lists.
60
+ result = b""
61
+ for elm in data:
62
+ if isinstance(elm, (list, np.ndarray)):
63
+ result += _hash_array_of_dict_as_bytes(elm)
64
+ elif isinstance(elm, dict):
65
+ result += _hash_dict_as_bytes(elm)
66
+ else:
67
+ result += _hash_data_as_bytes(elm)
68
+ return result
69
+
70
+
71
+ def _hash_ndarray_as_bytes(nd_array):
72
+ if not isinstance(nd_array, np.ndarray):
73
+ nd_array = np.array(nd_array)
74
+
75
+ if _is_array_has_dict(nd_array):
76
+ return _hash_array_of_dict_as_bytes(nd_array)
77
+
78
+ return _hash_uint64_ndarray_as_bytes(
79
+ pd.util.hash_array(nd_array.flatten(order="C"))
80
+ ) + _hash_uint64_ndarray_as_bytes(np.array(nd_array.shape, dtype="uint64"))
81
+
82
+
83
+ def _hash_data_as_bytes(data):
84
+ try:
85
+ if isinstance(data, (list, np.ndarray)):
86
+ return _hash_ndarray_as_bytes(data)
87
+ if isinstance(data, dict):
88
+ return _hash_dict_as_bytes(data)
89
+ if np.isscalar(data):
90
+ return _hash_uint64_ndarray_as_bytes(pd.util.hash_array(np.array([data])))
91
+ finally:
92
+ return b"" # Skip unsupported types by returning an empty byte string
93
+
94
+
95
+ def _hash_dict_as_bytes(data_dict):
96
+ result = _hash_ndarray_as_bytes(list(data_dict.keys()))
97
+ try:
98
+ result += _hash_ndarray_as_bytes(list(data_dict.values()))
99
+ # If the values containing non-hashable objects, we will hash the values recursively.
100
+ except Exception:
101
+ for value in data_dict.values():
102
+ result += _hash_data_as_bytes(value)
103
+ return result
104
+
105
+
106
+ def _hash_array_like_obj_as_bytes(data):
107
+ """
108
+ Helper method to convert pandas dataframe/numpy array/list into bytes for
109
+ MD5 calculation purpose.
110
+ """
111
+ if isinstance(data, pd.DataFrame):
112
+ # add checking `'pyspark' in sys.modules` to avoid importing pyspark when user
113
+ # run code not related to pyspark.
114
+ if "pyspark" in sys.modules:
115
+ from pyspark.ml.linalg import Vector as spark_vector_type
116
+ else:
117
+ spark_vector_type = None
118
+
119
+ def _hash_array_like_element_as_bytes(v):
120
+ if spark_vector_type is not None:
121
+ if isinstance(v, spark_vector_type):
122
+ return _hash_ndarray_as_bytes(v.toArray())
123
+ if isinstance(v, (dict, list, np.ndarray)):
124
+ return _hash_data_as_bytes(v)
125
+
126
+ try:
127
+ # Attempt to hash the value, if it fails, return an empty byte string
128
+ pd.util.hash_array(np.array([v]))
129
+ return v
130
+ except TypeError:
131
+ return b"" # Skip unhashable types by returning an empty byte string
132
+
133
+ if Version(pd.__version__) >= Version("2.1.0"):
134
+ data = data.map(_hash_array_like_element_as_bytes)
135
+ else:
136
+ data = data.applymap(_hash_array_like_element_as_bytes)
137
+ return _hash_uint64_ndarray_as_bytes(pd.util.hash_pandas_object(data))
138
+ elif isinstance(data, np.ndarray) and len(data) > 0 and isinstance(data[0], list):
139
+ # convert numpy array of lists into numpy array of the string representation of the lists
140
+ # because lists are not hashable
141
+ hashable = np.array(str(val) for val in data)
142
+ return _hash_ndarray_as_bytes(hashable)
143
+ elif isinstance(data, np.ndarray) and len(data) > 0 and isinstance(data[0], np.ndarray):
144
+ # convert numpy array of numpy arrays into 2d numpy arrays
145
+ # because numpy array of numpy arrays are not hashable
146
+ hashable = np.array(data.tolist())
147
+ return _hash_ndarray_as_bytes(hashable)
148
+ elif isinstance(data, np.ndarray):
149
+ return _hash_ndarray_as_bytes(data)
150
+ elif isinstance(data, list):
151
+ return _hash_ndarray_as_bytes(np.array(data))
152
+ else:
153
+ raise ValueError("Unsupported data type.")
154
+
155
+
156
+ def _gen_md5_for_arraylike_obj(md5_gen, data):
157
+ """
158
+ Helper method to generate MD5 hash array-like object, the MD5 will calculate over:
159
+ - array length
160
+ - first NUM_SAMPLE_ROWS_FOR_HASH rows content
161
+ - last NUM_SAMPLE_ROWS_FOR_HASH rows content
162
+ """
163
+ len_bytes = _hash_uint64_ndarray_as_bytes(np.array([len(data)], dtype="uint64"))
164
+ md5_gen.update(len_bytes)
165
+ if len(data) < EvaluationDataset.NUM_SAMPLE_ROWS_FOR_HASH * 2:
166
+ md5_gen.update(_hash_array_like_obj_as_bytes(data))
167
+ else:
168
+ if isinstance(data, pd.DataFrame):
169
+ # Access rows of pandas Df with iloc
170
+ head_rows = data.iloc[: EvaluationDataset.NUM_SAMPLE_ROWS_FOR_HASH]
171
+ tail_rows = data.iloc[-EvaluationDataset.NUM_SAMPLE_ROWS_FOR_HASH :]
172
+ else:
173
+ head_rows = data[: EvaluationDataset.NUM_SAMPLE_ROWS_FOR_HASH]
174
+ tail_rows = data[-EvaluationDataset.NUM_SAMPLE_ROWS_FOR_HASH :]
175
+ md5_gen.update(_hash_array_like_obj_as_bytes(head_rows))
176
+ md5_gen.update(_hash_array_like_obj_as_bytes(tail_rows))
177
+
178
+
179
+ def convert_data_to_mlflow_dataset(data, targets=None, predictions=None):
180
+ """Convert input data to mlflow dataset."""
181
+ supported_dataframe_types = [pd.DataFrame]
182
+ if "pyspark" in sys.modules:
183
+ from mlflow.utils.spark_utils import get_spark_dataframe_type
184
+
185
+ spark_df_type = get_spark_dataframe_type()
186
+ supported_dataframe_types.append(spark_df_type)
187
+
188
+ if predictions is not None:
189
+ _validate_dataset_type_supports_predictions(
190
+ data=data, supported_predictions_dataset_types=supported_dataframe_types
191
+ )
192
+
193
+ if isinstance(data, list):
194
+ # If the list is flat, we assume each element is an independent sample.
195
+ if not isinstance(data[0], (list, np.ndarray)):
196
+ data = [[elm] for elm in data]
197
+
198
+ return mlflow.data.from_numpy(
199
+ np.array(data), targets=np.array(targets) if targets else None
200
+ )
201
+ elif isinstance(data, np.ndarray):
202
+ return mlflow.data.from_numpy(data, targets=targets)
203
+ elif isinstance(data, pd.DataFrame):
204
+ return mlflow.data.from_pandas(df=data, targets=targets, predictions=predictions)
205
+ elif "pyspark" in sys.modules and isinstance(data, spark_df_type):
206
+ return mlflow.data.from_spark(df=data, targets=targets, predictions=predictions)
207
+ else:
208
+ # Cannot convert to mlflow dataset, return original data.
209
+ _logger.info(
210
+ "Cannot convert input data to `evaluate()` to an mlflow dataset, input must be a list, "
211
+ f"a numpy array, a panda Dataframe or a spark Dataframe, but received {type(data)}."
212
+ )
213
+ return data
214
+
215
+
216
+ def _validate_dataset_type_supports_predictions(data, supported_predictions_dataset_types):
217
+ """
218
+ Validate that the dataset type supports a user-specified "predictions" column.
219
+ """
220
+ if not any(isinstance(data, sdt) for sdt in supported_predictions_dataset_types):
221
+ raise MlflowException(
222
+ message=(
223
+ "If predictions is specified, data must be one of the following types, or an"
224
+ " MLflow Dataset that represents one of the following types:"
225
+ f" {supported_predictions_dataset_types}."
226
+ ),
227
+ error_code=INVALID_PARAMETER_VALUE,
228
+ )
229
+
230
+
231
+ class EvaluationDataset:
232
+ """
233
+ An input dataset for model evaluation. This is intended for use with the
234
+ :py:func:`mlflow.models.evaluate()`
235
+ API.
236
+ """
237
+
238
+ NUM_SAMPLE_ROWS_FOR_HASH = 5
239
+ SPARK_DATAFRAME_LIMIT = 10000
240
+
241
+ def __init__(
242
+ self,
243
+ data,
244
+ *,
245
+ targets=None,
246
+ name=None,
247
+ path=None,
248
+ feature_names=None,
249
+ predictions=None,
250
+ digest=None,
251
+ ):
252
+ """
253
+ The values of the constructor arguments comes from the `evaluate` call.
254
+ """
255
+ if name is not None and '"' in name:
256
+ raise MlflowException(
257
+ message=f'Dataset name cannot include a double quote (") but got {name}',
258
+ error_code=INVALID_PARAMETER_VALUE,
259
+ )
260
+ if path is not None and '"' in path:
261
+ raise MlflowException(
262
+ message=f'Dataset path cannot include a double quote (") but got {path}',
263
+ error_code=INVALID_PARAMETER_VALUE,
264
+ )
265
+
266
+ self._user_specified_name = name
267
+ self._path = path
268
+ self._hash = None
269
+ self._supported_dataframe_types = (pd.DataFrame,)
270
+ self._spark_df_type = None
271
+ self._labels_data = None
272
+ self._targets_name = None
273
+ self._has_targets = False
274
+ self._predictions_data = None
275
+ self._predictions_name = None
276
+ self._has_predictions = predictions is not None
277
+ self._digest = digest
278
+
279
+ try:
280
+ # add checking `'pyspark' in sys.modules` to avoid importing pyspark when user
281
+ # run code not related to pyspark.
282
+ if "pyspark" in sys.modules:
283
+ from mlflow.utils.spark_utils import get_spark_dataframe_type
284
+
285
+ spark_df_type = get_spark_dataframe_type()
286
+ self._supported_dataframe_types = (pd.DataFrame, spark_df_type)
287
+ self._spark_df_type = spark_df_type
288
+ except ImportError:
289
+ pass
290
+
291
+ if feature_names is not None and len(set(feature_names)) < len(list(feature_names)):
292
+ raise MlflowException(
293
+ message="`feature_names` argument must be a list containing unique feature names.",
294
+ error_code=INVALID_PARAMETER_VALUE,
295
+ )
296
+
297
+ if self._has_predictions:
298
+ _validate_dataset_type_supports_predictions(
299
+ data=data,
300
+ supported_predictions_dataset_types=self._supported_dataframe_types,
301
+ )
302
+
303
+ has_targets = targets is not None
304
+ if has_targets:
305
+ self._has_targets = True
306
+ if isinstance(data, (np.ndarray, list)):
307
+ if has_targets and not isinstance(targets, (np.ndarray, list)):
308
+ raise MlflowException(
309
+ message="If data is a numpy array or list of evaluation features, "
310
+ "`targets` argument must be a numpy array or list of evaluation labels.",
311
+ error_code=INVALID_PARAMETER_VALUE,
312
+ )
313
+
314
+ shape_message = (
315
+ "If the `data` argument is a numpy array, it must be a 2-dimensional "
316
+ "array, with the second dimension representing the number of features. If the "
317
+ "`data` argument is a list, each of its elements must be a feature array of "
318
+ "the numpy array or list, and all elements must have the same length."
319
+ )
320
+
321
+ if isinstance(data, list):
322
+ try:
323
+ data = np.array(data)
324
+ except ValueError as e:
325
+ raise MlflowException(
326
+ message=shape_message, error_code=INVALID_PARAMETER_VALUE
327
+ ) from e
328
+
329
+ if len(data.shape) != 2:
330
+ raise MlflowException(
331
+ message=shape_message,
332
+ error_code=INVALID_PARAMETER_VALUE,
333
+ )
334
+
335
+ self._features_data = data
336
+ if has_targets:
337
+ self._labels_data = (
338
+ targets if isinstance(targets, np.ndarray) else np.array(targets)
339
+ )
340
+
341
+ if len(self._features_data) != len(self._labels_data):
342
+ raise MlflowException(
343
+ message="The input features example rows must be the same length "
344
+ "with labels array.",
345
+ error_code=INVALID_PARAMETER_VALUE,
346
+ )
347
+
348
+ num_features = data.shape[1]
349
+
350
+ if feature_names is not None:
351
+ feature_names = list(feature_names)
352
+ if num_features != len(feature_names):
353
+ raise MlflowException(
354
+ message="feature name list must be the same length with feature data.",
355
+ error_code=INVALID_PARAMETER_VALUE,
356
+ )
357
+ self._feature_names = feature_names
358
+ else:
359
+ self._feature_names = [
360
+ f"feature_{str(i + 1).zfill(math.ceil(math.log10(num_features + 1)))}"
361
+ for i in range(num_features)
362
+ ]
363
+ elif isinstance(data, self._supported_dataframe_types):
364
+ if has_targets and not isinstance(targets, str):
365
+ raise MlflowException(
366
+ message="If data is a Pandas DataFrame or Spark DataFrame, `targets` argument "
367
+ "must be the name of the column which contains evaluation labels in the `data` "
368
+ "dataframe.",
369
+ error_code=INVALID_PARAMETER_VALUE,
370
+ )
371
+ if self._spark_df_type and isinstance(data, self._spark_df_type):
372
+ if data.count() > EvaluationDataset.SPARK_DATAFRAME_LIMIT:
373
+ _logger.warning(
374
+ "Specified Spark DataFrame is too large for model evaluation. Only "
375
+ f"the first {EvaluationDataset.SPARK_DATAFRAME_LIMIT} rows will be used. "
376
+ "If you want evaluate on the whole spark dataframe, please manually call "
377
+ "`spark_dataframe.toPandas()`."
378
+ )
379
+ data = data.limit(EvaluationDataset.SPARK_DATAFRAME_LIMIT).toPandas()
380
+
381
+ if has_targets:
382
+ self._labels_data = data[targets].to_numpy()
383
+ self._targets_name = targets
384
+
385
+ if self._has_predictions:
386
+ self._predictions_data = data[predictions].to_numpy()
387
+ self._predictions_name = predictions
388
+
389
+ if feature_names is not None:
390
+ self._features_data = data[list(feature_names)]
391
+ self._feature_names = feature_names
392
+ else:
393
+ features_data = data
394
+
395
+ if has_targets:
396
+ features_data = features_data.drop(targets, axis=1, inplace=False)
397
+
398
+ if self._has_predictions:
399
+ features_data = features_data.drop(predictions, axis=1, inplace=False)
400
+
401
+ self._features_data = features_data
402
+ self._feature_names = [
403
+ generate_feature_name_if_not_string(c) for c in self._features_data.columns
404
+ ]
405
+ else:
406
+ raise MlflowException(
407
+ message="The data argument must be a numpy array, a list or a Pandas DataFrame, or "
408
+ "spark DataFrame if pyspark package installed.",
409
+ error_code=INVALID_PARAMETER_VALUE,
410
+ )
411
+
412
+ # generate dataset hash
413
+ md5_gen = hashlib.md5(usedforsecurity=False)
414
+ _gen_md5_for_arraylike_obj(md5_gen, self._features_data)
415
+ if self._labels_data is not None:
416
+ _gen_md5_for_arraylike_obj(md5_gen, self._labels_data)
417
+ if self._predictions_data is not None:
418
+ _gen_md5_for_arraylike_obj(md5_gen, self._predictions_data)
419
+ md5_gen.update(",".join(list(map(str, self._feature_names))).encode("UTF-8"))
420
+
421
+ self._hash = md5_gen.hexdigest()
422
+
423
+ @property
424
+ def feature_names(self):
425
+ return self._feature_names
426
+
427
+ @property
428
+ def features_data(self):
429
+ """
430
+ return features data as a numpy array or a pandas DataFrame.
431
+ """
432
+ return self._features_data
433
+
434
+ @property
435
+ def labels_data(self):
436
+ """
437
+ return labels data as a numpy array
438
+ """
439
+ return self._labels_data
440
+
441
+ @property
442
+ def has_targets(self):
443
+ """
444
+ Returns True if the dataset has targets, False otherwise.
445
+ """
446
+ return self._has_targets
447
+
448
+ @property
449
+ def targets_name(self):
450
+ """
451
+ return targets name
452
+ """
453
+ return self._targets_name
454
+
455
+ @property
456
+ def predictions_data(self):
457
+ """
458
+ return labels data as a numpy array
459
+ """
460
+ return self._predictions_data
461
+
462
+ @property
463
+ def has_predictions(self):
464
+ """
465
+ Returns True if the dataset has targets, False otherwise.
466
+ """
467
+ return self._has_predictions
468
+
469
+ @property
470
+ def predictions_name(self):
471
+ """
472
+ return predictions name
473
+ """
474
+ return self._predictions_name
475
+
476
+ @property
477
+ def name(self):
478
+ """
479
+ Dataset name, which is specified dataset name or the dataset hash if user don't specify
480
+ name.
481
+ """
482
+ return self._user_specified_name if self._user_specified_name is not None else self.hash
483
+
484
+ @property
485
+ def path(self):
486
+ """
487
+ Dataset path
488
+ """
489
+ return self._path
490
+
491
+ @property
492
+ def hash(self):
493
+ """
494
+ Dataset hash, includes hash on first 20 rows and last 20 rows.
495
+ """
496
+ return self._hash
497
+
498
+ @property
499
+ def _metadata(self):
500
+ """
501
+ Return dataset metadata containing name, hash, and optional path.
502
+ """
503
+ metadata = {
504
+ "name": self.name,
505
+ "hash": self.hash,
506
+ }
507
+ if self.path is not None:
508
+ metadata["path"] = self.path
509
+ return metadata
510
+
511
+ @property
512
+ def digest(self):
513
+ """
514
+ Return the digest of the dataset.
515
+ """
516
+ return self._digest
517
+
518
+ def _log_dataset_tag(self, client, run_id, model_uuid):
519
+ """
520
+ Log dataset metadata as a tag "mlflow.datasets", if the tag already exists, it will
521
+ append current dataset metadata into existing tag content.
522
+ """
523
+ existing_dataset_metadata_str = client.get_run(run_id).data.tags.get(
524
+ "mlflow.datasets", "[]"
525
+ )
526
+ dataset_metadata_list = json.loads(existing_dataset_metadata_str)
527
+
528
+ for metadata in dataset_metadata_list:
529
+ if (
530
+ metadata["hash"] == self.hash
531
+ and metadata["name"] == self.name
532
+ and metadata["model"] == model_uuid
533
+ ):
534
+ break
535
+ else:
536
+ dataset_metadata_list.append({**self._metadata, "model": model_uuid})
537
+
538
+ dataset_metadata_str = json.dumps(dataset_metadata_list, separators=(",", ":"))
539
+ client.log_batch(
540
+ run_id,
541
+ tags=[RunTag("mlflow.datasets", dataset_metadata_str)],
542
+ )
543
+
544
+ def __hash__(self):
545
+ return hash(self.hash)
546
+
547
+ def __eq__(self, other):
548
+ if not isinstance(other, EvaluationDataset):
549
+ return False
550
+
551
+ if isinstance(self._features_data, np.ndarray):
552
+ is_features_data_equal = np.array_equal(self._features_data, other._features_data)
553
+ else:
554
+ is_features_data_equal = self._features_data.equals(other._features_data)
555
+
556
+ return (
557
+ is_features_data_equal
558
+ and np.array_equal(self._labels_data, other._labels_data)
559
+ and self.name == other.name
560
+ and self.path == other.path
561
+ and self._feature_names == other._feature_names
562
+ )
@@ -0,0 +1,81 @@
1
+ from abc import abstractmethod
2
+ from typing import Any
3
+
4
+ from mlflow.data.dataset_source import DatasetSource
5
+
6
+
7
+ class FileSystemDatasetSource(DatasetSource):
8
+ """
9
+ Represents the source of a dataset stored on a filesystem, e.g. a local UNIX filesystem,
10
+ blob storage services like S3, etc.
11
+ """
12
+
13
+ @property
14
+ @abstractmethod
15
+ def uri(self):
16
+ """The URI referring to the dataset source filesystem location.
17
+
18
+ Returns:
19
+ The URI referring to the dataset source filesystem location,
20
+ e.g "s3://mybucket/path/to/mydataset", "/tmp/path/to/my/dataset" etc.
21
+
22
+ """
23
+
24
+ @staticmethod
25
+ @abstractmethod
26
+ def _get_source_type() -> str:
27
+ """
28
+ Returns:
29
+ A string describing the filesystem containing the dataset, e.g. "local", "s3", ...
30
+ """
31
+
32
+ @abstractmethod
33
+ def load(self, dst_path=None) -> str:
34
+ """Downloads the dataset source to the local filesystem.
35
+
36
+ Args:
37
+ dst_path: Path of the local filesystem destination directory to which to download the
38
+ dataset source. If the directory does not exist, it is created. If
39
+ unspecified, the dataset source is downloaded to a new uniquely-named
40
+ directory on the local filesystem, unless the dataset source already
41
+ exists on the local filesystem, in which case its local path is returned
42
+ directly.
43
+
44
+ Returns:
45
+ The path to the downloaded dataset source on the local filesystem.
46
+
47
+ """
48
+
49
+ @staticmethod
50
+ @abstractmethod
51
+ def _can_resolve(raw_source: Any) -> bool:
52
+ """
53
+ Args:
54
+ raw_source: The raw source, e.g. a string like "s3://mybucket/path/to/iris/data".
55
+
56
+ Returns:
57
+ True if this DatasetSource can resolve the raw source, False otherwise.
58
+ """
59
+
60
+ @classmethod
61
+ @abstractmethod
62
+ def _resolve(cls, raw_source: Any) -> "FileSystemDatasetSource":
63
+ """
64
+ Args:
65
+ raw_source: The raw source, e.g. a string like "s3://mybucket/path/to/iris/data".
66
+ """
67
+
68
+ @abstractmethod
69
+ def to_dict(self) -> dict[Any, Any]:
70
+ """
71
+ Returns:
72
+ A JSON-compatible dictionary representation of the FileSystemDatasetSource.
73
+ """
74
+
75
+ @classmethod
76
+ @abstractmethod
77
+ def from_dict(cls, source_dict: dict[Any, Any]) -> "FileSystemDatasetSource":
78
+ """
79
+ Args:
80
+ source_dict: A dictionary representation of the FileSystemDatasetSource.
81
+ """