genesis-flow 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (645) hide show
  1. genesis_flow-1.0.0.dist-info/METADATA +822 -0
  2. genesis_flow-1.0.0.dist-info/RECORD +645 -0
  3. genesis_flow-1.0.0.dist-info/WHEEL +5 -0
  4. genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
  5. genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
  6. genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
  7. mlflow/__init__.py +367 -0
  8. mlflow/__main__.py +3 -0
  9. mlflow/ag2/__init__.py +56 -0
  10. mlflow/ag2/ag2_logger.py +294 -0
  11. mlflow/anthropic/__init__.py +40 -0
  12. mlflow/anthropic/autolog.py +129 -0
  13. mlflow/anthropic/chat.py +144 -0
  14. mlflow/artifacts/__init__.py +268 -0
  15. mlflow/autogen/__init__.py +144 -0
  16. mlflow/autogen/chat.py +142 -0
  17. mlflow/azure/__init__.py +26 -0
  18. mlflow/azure/auth_handler.py +257 -0
  19. mlflow/azure/client.py +319 -0
  20. mlflow/azure/config.py +120 -0
  21. mlflow/azure/connection_factory.py +340 -0
  22. mlflow/azure/exceptions.py +27 -0
  23. mlflow/azure/stores.py +327 -0
  24. mlflow/azure/utils.py +183 -0
  25. mlflow/bedrock/__init__.py +45 -0
  26. mlflow/bedrock/_autolog.py +202 -0
  27. mlflow/bedrock/chat.py +122 -0
  28. mlflow/bedrock/stream.py +160 -0
  29. mlflow/bedrock/utils.py +43 -0
  30. mlflow/cli.py +707 -0
  31. mlflow/client.py +12 -0
  32. mlflow/config/__init__.py +56 -0
  33. mlflow/crewai/__init__.py +79 -0
  34. mlflow/crewai/autolog.py +253 -0
  35. mlflow/crewai/chat.py +29 -0
  36. mlflow/data/__init__.py +75 -0
  37. mlflow/data/artifact_dataset_sources.py +170 -0
  38. mlflow/data/code_dataset_source.py +40 -0
  39. mlflow/data/dataset.py +123 -0
  40. mlflow/data/dataset_registry.py +168 -0
  41. mlflow/data/dataset_source.py +110 -0
  42. mlflow/data/dataset_source_registry.py +219 -0
  43. mlflow/data/delta_dataset_source.py +167 -0
  44. mlflow/data/digest_utils.py +108 -0
  45. mlflow/data/evaluation_dataset.py +562 -0
  46. mlflow/data/filesystem_dataset_source.py +81 -0
  47. mlflow/data/http_dataset_source.py +145 -0
  48. mlflow/data/huggingface_dataset.py +258 -0
  49. mlflow/data/huggingface_dataset_source.py +118 -0
  50. mlflow/data/meta_dataset.py +104 -0
  51. mlflow/data/numpy_dataset.py +223 -0
  52. mlflow/data/pandas_dataset.py +231 -0
  53. mlflow/data/polars_dataset.py +352 -0
  54. mlflow/data/pyfunc_dataset_mixin.py +31 -0
  55. mlflow/data/schema.py +76 -0
  56. mlflow/data/sources.py +1 -0
  57. mlflow/data/spark_dataset.py +406 -0
  58. mlflow/data/spark_dataset_source.py +74 -0
  59. mlflow/data/spark_delta_utils.py +118 -0
  60. mlflow/data/tensorflow_dataset.py +350 -0
  61. mlflow/data/uc_volume_dataset_source.py +81 -0
  62. mlflow/db.py +27 -0
  63. mlflow/dspy/__init__.py +17 -0
  64. mlflow/dspy/autolog.py +197 -0
  65. mlflow/dspy/callback.py +398 -0
  66. mlflow/dspy/constant.py +1 -0
  67. mlflow/dspy/load.py +93 -0
  68. mlflow/dspy/save.py +393 -0
  69. mlflow/dspy/util.py +109 -0
  70. mlflow/dspy/wrapper.py +226 -0
  71. mlflow/entities/__init__.py +104 -0
  72. mlflow/entities/_mlflow_object.py +52 -0
  73. mlflow/entities/assessment.py +545 -0
  74. mlflow/entities/assessment_error.py +80 -0
  75. mlflow/entities/assessment_source.py +141 -0
  76. mlflow/entities/dataset.py +92 -0
  77. mlflow/entities/dataset_input.py +51 -0
  78. mlflow/entities/dataset_summary.py +62 -0
  79. mlflow/entities/document.py +48 -0
  80. mlflow/entities/experiment.py +109 -0
  81. mlflow/entities/experiment_tag.py +35 -0
  82. mlflow/entities/file_info.py +45 -0
  83. mlflow/entities/input_tag.py +35 -0
  84. mlflow/entities/lifecycle_stage.py +35 -0
  85. mlflow/entities/logged_model.py +228 -0
  86. mlflow/entities/logged_model_input.py +26 -0
  87. mlflow/entities/logged_model_output.py +32 -0
  88. mlflow/entities/logged_model_parameter.py +46 -0
  89. mlflow/entities/logged_model_status.py +74 -0
  90. mlflow/entities/logged_model_tag.py +33 -0
  91. mlflow/entities/metric.py +200 -0
  92. mlflow/entities/model_registry/__init__.py +29 -0
  93. mlflow/entities/model_registry/_model_registry_entity.py +13 -0
  94. mlflow/entities/model_registry/model_version.py +243 -0
  95. mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
  96. mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
  97. mlflow/entities/model_registry/model_version_search.py +25 -0
  98. mlflow/entities/model_registry/model_version_stages.py +25 -0
  99. mlflow/entities/model_registry/model_version_status.py +35 -0
  100. mlflow/entities/model_registry/model_version_tag.py +35 -0
  101. mlflow/entities/model_registry/prompt.py +73 -0
  102. mlflow/entities/model_registry/prompt_version.py +244 -0
  103. mlflow/entities/model_registry/registered_model.py +175 -0
  104. mlflow/entities/model_registry/registered_model_alias.py +35 -0
  105. mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
  106. mlflow/entities/model_registry/registered_model_search.py +25 -0
  107. mlflow/entities/model_registry/registered_model_tag.py +35 -0
  108. mlflow/entities/multipart_upload.py +74 -0
  109. mlflow/entities/param.py +49 -0
  110. mlflow/entities/run.py +97 -0
  111. mlflow/entities/run_data.py +84 -0
  112. mlflow/entities/run_info.py +188 -0
  113. mlflow/entities/run_inputs.py +59 -0
  114. mlflow/entities/run_outputs.py +43 -0
  115. mlflow/entities/run_status.py +41 -0
  116. mlflow/entities/run_tag.py +36 -0
  117. mlflow/entities/source_type.py +31 -0
  118. mlflow/entities/span.py +774 -0
  119. mlflow/entities/span_event.py +96 -0
  120. mlflow/entities/span_status.py +102 -0
  121. mlflow/entities/trace.py +317 -0
  122. mlflow/entities/trace_data.py +71 -0
  123. mlflow/entities/trace_info.py +220 -0
  124. mlflow/entities/trace_info_v2.py +162 -0
  125. mlflow/entities/trace_location.py +173 -0
  126. mlflow/entities/trace_state.py +39 -0
  127. mlflow/entities/trace_status.py +68 -0
  128. mlflow/entities/view_type.py +51 -0
  129. mlflow/environment_variables.py +866 -0
  130. mlflow/evaluation/__init__.py +16 -0
  131. mlflow/evaluation/assessment.py +369 -0
  132. mlflow/evaluation/evaluation.py +411 -0
  133. mlflow/evaluation/evaluation_tag.py +61 -0
  134. mlflow/evaluation/fluent.py +48 -0
  135. mlflow/evaluation/utils.py +201 -0
  136. mlflow/exceptions.py +213 -0
  137. mlflow/experiments.py +140 -0
  138. mlflow/gemini/__init__.py +81 -0
  139. mlflow/gemini/autolog.py +186 -0
  140. mlflow/gemini/chat.py +261 -0
  141. mlflow/genai/__init__.py +71 -0
  142. mlflow/genai/datasets/__init__.py +67 -0
  143. mlflow/genai/datasets/evaluation_dataset.py +131 -0
  144. mlflow/genai/evaluation/__init__.py +3 -0
  145. mlflow/genai/evaluation/base.py +411 -0
  146. mlflow/genai/evaluation/constant.py +23 -0
  147. mlflow/genai/evaluation/utils.py +244 -0
  148. mlflow/genai/judges/__init__.py +21 -0
  149. mlflow/genai/judges/databricks.py +404 -0
  150. mlflow/genai/label_schemas/__init__.py +153 -0
  151. mlflow/genai/label_schemas/label_schemas.py +209 -0
  152. mlflow/genai/labeling/__init__.py +159 -0
  153. mlflow/genai/labeling/labeling.py +250 -0
  154. mlflow/genai/optimize/__init__.py +13 -0
  155. mlflow/genai/optimize/base.py +198 -0
  156. mlflow/genai/optimize/optimizers/__init__.py +4 -0
  157. mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
  158. mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
  159. mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
  160. mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
  161. mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
  162. mlflow/genai/optimize/types.py +75 -0
  163. mlflow/genai/optimize/util.py +30 -0
  164. mlflow/genai/prompts/__init__.py +206 -0
  165. mlflow/genai/scheduled_scorers.py +431 -0
  166. mlflow/genai/scorers/__init__.py +26 -0
  167. mlflow/genai/scorers/base.py +492 -0
  168. mlflow/genai/scorers/builtin_scorers.py +765 -0
  169. mlflow/genai/scorers/scorer_utils.py +138 -0
  170. mlflow/genai/scorers/validation.py +165 -0
  171. mlflow/genai/utils/data_validation.py +146 -0
  172. mlflow/genai/utils/enum_utils.py +23 -0
  173. mlflow/genai/utils/trace_utils.py +211 -0
  174. mlflow/groq/__init__.py +42 -0
  175. mlflow/groq/_groq_autolog.py +74 -0
  176. mlflow/johnsnowlabs/__init__.py +888 -0
  177. mlflow/langchain/__init__.py +24 -0
  178. mlflow/langchain/api_request_parallel_processor.py +330 -0
  179. mlflow/langchain/autolog.py +147 -0
  180. mlflow/langchain/chat_agent_langgraph.py +340 -0
  181. mlflow/langchain/constant.py +1 -0
  182. mlflow/langchain/constants.py +1 -0
  183. mlflow/langchain/databricks_dependencies.py +444 -0
  184. mlflow/langchain/langchain_tracer.py +597 -0
  185. mlflow/langchain/model.py +919 -0
  186. mlflow/langchain/output_parsers.py +142 -0
  187. mlflow/langchain/retriever_chain.py +153 -0
  188. mlflow/langchain/runnables.py +527 -0
  189. mlflow/langchain/utils/chat.py +402 -0
  190. mlflow/langchain/utils/logging.py +671 -0
  191. mlflow/langchain/utils/serialization.py +36 -0
  192. mlflow/legacy_databricks_cli/__init__.py +0 -0
  193. mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
  194. mlflow/legacy_databricks_cli/configure/provider.py +482 -0
  195. mlflow/litellm/__init__.py +175 -0
  196. mlflow/llama_index/__init__.py +22 -0
  197. mlflow/llama_index/autolog.py +55 -0
  198. mlflow/llama_index/chat.py +43 -0
  199. mlflow/llama_index/constant.py +1 -0
  200. mlflow/llama_index/model.py +577 -0
  201. mlflow/llama_index/pyfunc_wrapper.py +332 -0
  202. mlflow/llama_index/serialize_objects.py +188 -0
  203. mlflow/llama_index/tracer.py +561 -0
  204. mlflow/metrics/__init__.py +479 -0
  205. mlflow/metrics/base.py +39 -0
  206. mlflow/metrics/genai/__init__.py +25 -0
  207. mlflow/metrics/genai/base.py +101 -0
  208. mlflow/metrics/genai/genai_metric.py +771 -0
  209. mlflow/metrics/genai/metric_definitions.py +450 -0
  210. mlflow/metrics/genai/model_utils.py +371 -0
  211. mlflow/metrics/genai/prompt_template.py +68 -0
  212. mlflow/metrics/genai/prompts/__init__.py +0 -0
  213. mlflow/metrics/genai/prompts/v1.py +422 -0
  214. mlflow/metrics/genai/utils.py +6 -0
  215. mlflow/metrics/metric_definitions.py +619 -0
  216. mlflow/mismatch.py +34 -0
  217. mlflow/mistral/__init__.py +34 -0
  218. mlflow/mistral/autolog.py +71 -0
  219. mlflow/mistral/chat.py +135 -0
  220. mlflow/ml_package_versions.py +452 -0
  221. mlflow/models/__init__.py +97 -0
  222. mlflow/models/auth_policy.py +83 -0
  223. mlflow/models/cli.py +354 -0
  224. mlflow/models/container/__init__.py +294 -0
  225. mlflow/models/container/scoring_server/__init__.py +0 -0
  226. mlflow/models/container/scoring_server/nginx.conf +39 -0
  227. mlflow/models/dependencies_schemas.py +287 -0
  228. mlflow/models/display_utils.py +158 -0
  229. mlflow/models/docker_utils.py +211 -0
  230. mlflow/models/evaluation/__init__.py +23 -0
  231. mlflow/models/evaluation/_shap_patch.py +64 -0
  232. mlflow/models/evaluation/artifacts.py +194 -0
  233. mlflow/models/evaluation/base.py +1811 -0
  234. mlflow/models/evaluation/calibration_curve.py +109 -0
  235. mlflow/models/evaluation/default_evaluator.py +996 -0
  236. mlflow/models/evaluation/deprecated.py +23 -0
  237. mlflow/models/evaluation/evaluator_registry.py +80 -0
  238. mlflow/models/evaluation/evaluators/classifier.py +704 -0
  239. mlflow/models/evaluation/evaluators/default.py +233 -0
  240. mlflow/models/evaluation/evaluators/regressor.py +96 -0
  241. mlflow/models/evaluation/evaluators/shap.py +296 -0
  242. mlflow/models/evaluation/lift_curve.py +178 -0
  243. mlflow/models/evaluation/utils/metric.py +123 -0
  244. mlflow/models/evaluation/utils/trace.py +179 -0
  245. mlflow/models/evaluation/validation.py +434 -0
  246. mlflow/models/flavor_backend.py +93 -0
  247. mlflow/models/flavor_backend_registry.py +53 -0
  248. mlflow/models/model.py +1639 -0
  249. mlflow/models/model_config.py +150 -0
  250. mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
  251. mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
  252. mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
  253. mlflow/models/python_api.py +369 -0
  254. mlflow/models/rag_signatures.py +128 -0
  255. mlflow/models/resources.py +321 -0
  256. mlflow/models/signature.py +662 -0
  257. mlflow/models/utils.py +2054 -0
  258. mlflow/models/wheeled_model.py +280 -0
  259. mlflow/openai/__init__.py +57 -0
  260. mlflow/openai/_agent_tracer.py +364 -0
  261. mlflow/openai/api_request_parallel_processor.py +131 -0
  262. mlflow/openai/autolog.py +509 -0
  263. mlflow/openai/constant.py +1 -0
  264. mlflow/openai/model.py +824 -0
  265. mlflow/openai/utils/chat_schema.py +367 -0
  266. mlflow/optuna/__init__.py +3 -0
  267. mlflow/optuna/storage.py +646 -0
  268. mlflow/plugins/__init__.py +72 -0
  269. mlflow/plugins/base.py +358 -0
  270. mlflow/plugins/builtin/__init__.py +24 -0
  271. mlflow/plugins/builtin/pytorch_plugin.py +150 -0
  272. mlflow/plugins/builtin/sklearn_plugin.py +158 -0
  273. mlflow/plugins/builtin/transformers_plugin.py +187 -0
  274. mlflow/plugins/cli.py +321 -0
  275. mlflow/plugins/discovery.py +340 -0
  276. mlflow/plugins/manager.py +465 -0
  277. mlflow/plugins/registry.py +316 -0
  278. mlflow/plugins/templates/framework_plugin_template.py +329 -0
  279. mlflow/prompt/constants.py +20 -0
  280. mlflow/prompt/promptlab_model.py +197 -0
  281. mlflow/prompt/registry_utils.py +248 -0
  282. mlflow/promptflow/__init__.py +495 -0
  283. mlflow/protos/__init__.py +0 -0
  284. mlflow/protos/assessments_pb2.py +174 -0
  285. mlflow/protos/databricks_artifacts_pb2.py +489 -0
  286. mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
  287. mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
  288. mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
  289. mlflow/protos/databricks_pb2.py +267 -0
  290. mlflow/protos/databricks_trace_server_pb2.py +374 -0
  291. mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
  292. mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
  293. mlflow/protos/facet_feature_statistics_pb2.py +296 -0
  294. mlflow/protos/internal_pb2.py +77 -0
  295. mlflow/protos/mlflow_artifacts_pb2.py +336 -0
  296. mlflow/protos/model_registry_pb2.py +1073 -0
  297. mlflow/protos/scalapb/__init__.py +0 -0
  298. mlflow/protos/scalapb/scalapb_pb2.py +104 -0
  299. mlflow/protos/service_pb2.py +2600 -0
  300. mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
  301. mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
  302. mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
  303. mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
  304. mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
  305. mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
  306. mlflow/py.typed +0 -0
  307. mlflow/pydantic_ai/__init__.py +57 -0
  308. mlflow/pydantic_ai/autolog.py +173 -0
  309. mlflow/pyfunc/__init__.py +3844 -0
  310. mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
  311. mlflow/pyfunc/backend.py +523 -0
  312. mlflow/pyfunc/context.py +78 -0
  313. mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
  314. mlflow/pyfunc/loaders/__init__.py +7 -0
  315. mlflow/pyfunc/loaders/chat_agent.py +117 -0
  316. mlflow/pyfunc/loaders/chat_model.py +125 -0
  317. mlflow/pyfunc/loaders/code_model.py +31 -0
  318. mlflow/pyfunc/loaders/responses_agent.py +112 -0
  319. mlflow/pyfunc/mlserver.py +46 -0
  320. mlflow/pyfunc/model.py +1473 -0
  321. mlflow/pyfunc/scoring_server/__init__.py +604 -0
  322. mlflow/pyfunc/scoring_server/app.py +7 -0
  323. mlflow/pyfunc/scoring_server/client.py +146 -0
  324. mlflow/pyfunc/spark_model_cache.py +48 -0
  325. mlflow/pyfunc/stdin_server.py +44 -0
  326. mlflow/pyfunc/utils/__init__.py +3 -0
  327. mlflow/pyfunc/utils/data_validation.py +224 -0
  328. mlflow/pyfunc/utils/environment.py +22 -0
  329. mlflow/pyfunc/utils/input_converter.py +47 -0
  330. mlflow/pyfunc/utils/serving_data_parser.py +11 -0
  331. mlflow/pytorch/__init__.py +1171 -0
  332. mlflow/pytorch/_lightning_autolog.py +580 -0
  333. mlflow/pytorch/_pytorch_autolog.py +50 -0
  334. mlflow/pytorch/pickle_module.py +35 -0
  335. mlflow/rfunc/__init__.py +42 -0
  336. mlflow/rfunc/backend.py +134 -0
  337. mlflow/runs.py +89 -0
  338. mlflow/server/__init__.py +302 -0
  339. mlflow/server/auth/__init__.py +1224 -0
  340. mlflow/server/auth/__main__.py +4 -0
  341. mlflow/server/auth/basic_auth.ini +6 -0
  342. mlflow/server/auth/cli.py +11 -0
  343. mlflow/server/auth/client.py +537 -0
  344. mlflow/server/auth/config.py +34 -0
  345. mlflow/server/auth/db/__init__.py +0 -0
  346. mlflow/server/auth/db/cli.py +18 -0
  347. mlflow/server/auth/db/migrations/__init__.py +0 -0
  348. mlflow/server/auth/db/migrations/alembic.ini +110 -0
  349. mlflow/server/auth/db/migrations/env.py +76 -0
  350. mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
  351. mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
  352. mlflow/server/auth/db/models.py +67 -0
  353. mlflow/server/auth/db/utils.py +37 -0
  354. mlflow/server/auth/entities.py +165 -0
  355. mlflow/server/auth/logo.py +14 -0
  356. mlflow/server/auth/permissions.py +65 -0
  357. mlflow/server/auth/routes.py +18 -0
  358. mlflow/server/auth/sqlalchemy_store.py +263 -0
  359. mlflow/server/graphql/__init__.py +0 -0
  360. mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
  361. mlflow/server/graphql/graphql_custom_scalars.py +24 -0
  362. mlflow/server/graphql/graphql_errors.py +15 -0
  363. mlflow/server/graphql/graphql_no_batching.py +89 -0
  364. mlflow/server/graphql/graphql_schema_extensions.py +74 -0
  365. mlflow/server/handlers.py +3217 -0
  366. mlflow/server/prometheus_exporter.py +17 -0
  367. mlflow/server/validation.py +30 -0
  368. mlflow/shap/__init__.py +691 -0
  369. mlflow/sklearn/__init__.py +1994 -0
  370. mlflow/sklearn/utils.py +1041 -0
  371. mlflow/smolagents/__init__.py +66 -0
  372. mlflow/smolagents/autolog.py +139 -0
  373. mlflow/smolagents/chat.py +29 -0
  374. mlflow/store/__init__.py +10 -0
  375. mlflow/store/_unity_catalog/__init__.py +1 -0
  376. mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
  377. mlflow/store/_unity_catalog/lineage/constants.py +2 -0
  378. mlflow/store/_unity_catalog/registry/__init__.py +6 -0
  379. mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
  380. mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
  381. mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
  382. mlflow/store/_unity_catalog/registry/utils.py +121 -0
  383. mlflow/store/artifact/__init__.py +0 -0
  384. mlflow/store/artifact/artifact_repo.py +472 -0
  385. mlflow/store/artifact/artifact_repository_registry.py +154 -0
  386. mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
  387. mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
  388. mlflow/store/artifact/cli.py +141 -0
  389. mlflow/store/artifact/cloud_artifact_repo.py +332 -0
  390. mlflow/store/artifact/databricks_artifact_repo.py +729 -0
  391. mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
  392. mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
  393. mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
  394. mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
  395. mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
  396. mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
  397. mlflow/store/artifact/ftp_artifact_repo.py +132 -0
  398. mlflow/store/artifact/gcs_artifact_repo.py +296 -0
  399. mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
  400. mlflow/store/artifact/http_artifact_repo.py +218 -0
  401. mlflow/store/artifact/local_artifact_repo.py +142 -0
  402. mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
  403. mlflow/store/artifact/models_artifact_repo.py +259 -0
  404. mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
  405. mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
  406. mlflow/store/artifact/r2_artifact_repo.py +70 -0
  407. mlflow/store/artifact/runs_artifact_repo.py +265 -0
  408. mlflow/store/artifact/s3_artifact_repo.py +330 -0
  409. mlflow/store/artifact/sftp_artifact_repo.py +141 -0
  410. mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
  411. mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
  412. mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
  413. mlflow/store/artifact/utils/__init__.py +0 -0
  414. mlflow/store/artifact/utils/models.py +148 -0
  415. mlflow/store/db/__init__.py +0 -0
  416. mlflow/store/db/base_sql_model.py +3 -0
  417. mlflow/store/db/db_types.py +10 -0
  418. mlflow/store/db/utils.py +314 -0
  419. mlflow/store/db_migrations/__init__.py +0 -0
  420. mlflow/store/db_migrations/alembic.ini +74 -0
  421. mlflow/store/db_migrations/env.py +84 -0
  422. mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
  423. mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
  424. mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
  425. mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
  426. mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
  427. mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
  428. mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
  429. mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
  430. mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
  431. mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
  432. mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
  433. mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
  434. mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
  435. mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
  436. mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
  437. mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
  438. mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
  439. mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
  440. mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
  441. mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
  442. mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
  443. mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
  444. mlflow/store/db_migrations/versions/__init__.py +0 -0
  445. mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
  446. mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
  447. mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
  448. mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
  449. mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
  450. mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
  451. mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
  452. mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
  453. mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
  454. mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
  455. mlflow/store/entities/__init__.py +3 -0
  456. mlflow/store/entities/paged_list.py +18 -0
  457. mlflow/store/model_registry/__init__.py +10 -0
  458. mlflow/store/model_registry/abstract_store.py +1081 -0
  459. mlflow/store/model_registry/base_rest_store.py +44 -0
  460. mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
  461. mlflow/store/model_registry/dbmodels/__init__.py +0 -0
  462. mlflow/store/model_registry/dbmodels/models.py +206 -0
  463. mlflow/store/model_registry/file_store.py +1091 -0
  464. mlflow/store/model_registry/rest_store.py +481 -0
  465. mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
  466. mlflow/store/tracking/__init__.py +23 -0
  467. mlflow/store/tracking/abstract_store.py +816 -0
  468. mlflow/store/tracking/dbmodels/__init__.py +0 -0
  469. mlflow/store/tracking/dbmodels/initial_models.py +243 -0
  470. mlflow/store/tracking/dbmodels/models.py +1073 -0
  471. mlflow/store/tracking/file_store.py +2438 -0
  472. mlflow/store/tracking/postgres_managed_identity.py +146 -0
  473. mlflow/store/tracking/rest_store.py +1131 -0
  474. mlflow/store/tracking/sqlalchemy_store.py +2785 -0
  475. mlflow/system_metrics/__init__.py +61 -0
  476. mlflow/system_metrics/metrics/__init__.py +0 -0
  477. mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
  478. mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
  479. mlflow/system_metrics/metrics/disk_monitor.py +21 -0
  480. mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
  481. mlflow/system_metrics/metrics/network_monitor.py +34 -0
  482. mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
  483. mlflow/system_metrics/system_metrics_monitor.py +198 -0
  484. mlflow/tracing/__init__.py +16 -0
  485. mlflow/tracing/assessment.py +356 -0
  486. mlflow/tracing/client.py +531 -0
  487. mlflow/tracing/config.py +125 -0
  488. mlflow/tracing/constant.py +105 -0
  489. mlflow/tracing/destination.py +81 -0
  490. mlflow/tracing/display/__init__.py +40 -0
  491. mlflow/tracing/display/display_handler.py +196 -0
  492. mlflow/tracing/export/async_export_queue.py +186 -0
  493. mlflow/tracing/export/inference_table.py +138 -0
  494. mlflow/tracing/export/mlflow_v3.py +137 -0
  495. mlflow/tracing/export/utils.py +70 -0
  496. mlflow/tracing/fluent.py +1417 -0
  497. mlflow/tracing/processor/base_mlflow.py +199 -0
  498. mlflow/tracing/processor/inference_table.py +175 -0
  499. mlflow/tracing/processor/mlflow_v3.py +47 -0
  500. mlflow/tracing/processor/otel.py +73 -0
  501. mlflow/tracing/provider.py +487 -0
  502. mlflow/tracing/trace_manager.py +200 -0
  503. mlflow/tracing/utils/__init__.py +616 -0
  504. mlflow/tracing/utils/artifact_utils.py +28 -0
  505. mlflow/tracing/utils/copy.py +55 -0
  506. mlflow/tracing/utils/environment.py +55 -0
  507. mlflow/tracing/utils/exception.py +21 -0
  508. mlflow/tracing/utils/once.py +35 -0
  509. mlflow/tracing/utils/otlp.py +63 -0
  510. mlflow/tracing/utils/processor.py +54 -0
  511. mlflow/tracing/utils/search.py +292 -0
  512. mlflow/tracing/utils/timeout.py +250 -0
  513. mlflow/tracing/utils/token.py +19 -0
  514. mlflow/tracing/utils/truncation.py +124 -0
  515. mlflow/tracing/utils/warning.py +76 -0
  516. mlflow/tracking/__init__.py +39 -0
  517. mlflow/tracking/_model_registry/__init__.py +1 -0
  518. mlflow/tracking/_model_registry/client.py +764 -0
  519. mlflow/tracking/_model_registry/fluent.py +853 -0
  520. mlflow/tracking/_model_registry/registry.py +67 -0
  521. mlflow/tracking/_model_registry/utils.py +251 -0
  522. mlflow/tracking/_tracking_service/__init__.py +0 -0
  523. mlflow/tracking/_tracking_service/client.py +883 -0
  524. mlflow/tracking/_tracking_service/registry.py +56 -0
  525. mlflow/tracking/_tracking_service/utils.py +275 -0
  526. mlflow/tracking/artifact_utils.py +179 -0
  527. mlflow/tracking/client.py +5900 -0
  528. mlflow/tracking/context/__init__.py +0 -0
  529. mlflow/tracking/context/abstract_context.py +35 -0
  530. mlflow/tracking/context/databricks_cluster_context.py +15 -0
  531. mlflow/tracking/context/databricks_command_context.py +15 -0
  532. mlflow/tracking/context/databricks_job_context.py +49 -0
  533. mlflow/tracking/context/databricks_notebook_context.py +41 -0
  534. mlflow/tracking/context/databricks_repo_context.py +43 -0
  535. mlflow/tracking/context/default_context.py +51 -0
  536. mlflow/tracking/context/git_context.py +32 -0
  537. mlflow/tracking/context/registry.py +98 -0
  538. mlflow/tracking/context/system_environment_context.py +15 -0
  539. mlflow/tracking/default_experiment/__init__.py +1 -0
  540. mlflow/tracking/default_experiment/abstract_context.py +43 -0
  541. mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
  542. mlflow/tracking/default_experiment/registry.py +75 -0
  543. mlflow/tracking/fluent.py +3595 -0
  544. mlflow/tracking/metric_value_conversion_utils.py +93 -0
  545. mlflow/tracking/multimedia.py +206 -0
  546. mlflow/tracking/registry.py +86 -0
  547. mlflow/tracking/request_auth/__init__.py +0 -0
  548. mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
  549. mlflow/tracking/request_auth/registry.py +60 -0
  550. mlflow/tracking/request_header/__init__.py +0 -0
  551. mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
  552. mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
  553. mlflow/tracking/request_header/default_request_header_provider.py +17 -0
  554. mlflow/tracking/request_header/registry.py +79 -0
  555. mlflow/transformers/__init__.py +2982 -0
  556. mlflow/transformers/flavor_config.py +258 -0
  557. mlflow/transformers/hub_utils.py +83 -0
  558. mlflow/transformers/llm_inference_utils.py +468 -0
  559. mlflow/transformers/model_io.py +301 -0
  560. mlflow/transformers/peft.py +51 -0
  561. mlflow/transformers/signature.py +183 -0
  562. mlflow/transformers/torch_utils.py +55 -0
  563. mlflow/types/__init__.py +21 -0
  564. mlflow/types/agent.py +270 -0
  565. mlflow/types/chat.py +240 -0
  566. mlflow/types/llm.py +935 -0
  567. mlflow/types/responses.py +139 -0
  568. mlflow/types/responses_helpers.py +416 -0
  569. mlflow/types/schema.py +1505 -0
  570. mlflow/types/type_hints.py +647 -0
  571. mlflow/types/utils.py +753 -0
  572. mlflow/utils/__init__.py +283 -0
  573. mlflow/utils/_capture_modules.py +256 -0
  574. mlflow/utils/_capture_transformers_modules.py +75 -0
  575. mlflow/utils/_spark_utils.py +201 -0
  576. mlflow/utils/_unity_catalog_oss_utils.py +97 -0
  577. mlflow/utils/_unity_catalog_utils.py +479 -0
  578. mlflow/utils/annotations.py +218 -0
  579. mlflow/utils/arguments_utils.py +16 -0
  580. mlflow/utils/async_logging/__init__.py +1 -0
  581. mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
  582. mlflow/utils/async_logging/async_logging_queue.py +366 -0
  583. mlflow/utils/async_logging/run_artifact.py +38 -0
  584. mlflow/utils/async_logging/run_batch.py +58 -0
  585. mlflow/utils/async_logging/run_operations.py +49 -0
  586. mlflow/utils/autologging_utils/__init__.py +737 -0
  587. mlflow/utils/autologging_utils/client.py +432 -0
  588. mlflow/utils/autologging_utils/config.py +33 -0
  589. mlflow/utils/autologging_utils/events.py +294 -0
  590. mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
  591. mlflow/utils/autologging_utils/metrics_queue.py +71 -0
  592. mlflow/utils/autologging_utils/safety.py +1104 -0
  593. mlflow/utils/autologging_utils/versioning.py +95 -0
  594. mlflow/utils/checkpoint_utils.py +206 -0
  595. mlflow/utils/class_utils.py +6 -0
  596. mlflow/utils/cli_args.py +257 -0
  597. mlflow/utils/conda.py +354 -0
  598. mlflow/utils/credentials.py +231 -0
  599. mlflow/utils/data_utils.py +17 -0
  600. mlflow/utils/databricks_utils.py +1436 -0
  601. mlflow/utils/docstring_utils.py +477 -0
  602. mlflow/utils/doctor.py +133 -0
  603. mlflow/utils/download_cloud_file_chunk.py +43 -0
  604. mlflow/utils/env_manager.py +16 -0
  605. mlflow/utils/env_pack.py +131 -0
  606. mlflow/utils/environment.py +1009 -0
  607. mlflow/utils/exception_utils.py +14 -0
  608. mlflow/utils/file_utils.py +978 -0
  609. mlflow/utils/git_utils.py +77 -0
  610. mlflow/utils/gorilla.py +797 -0
  611. mlflow/utils/import_hooks/__init__.py +363 -0
  612. mlflow/utils/lazy_load.py +51 -0
  613. mlflow/utils/logging_utils.py +168 -0
  614. mlflow/utils/mime_type_utils.py +58 -0
  615. mlflow/utils/mlflow_tags.py +103 -0
  616. mlflow/utils/model_utils.py +486 -0
  617. mlflow/utils/name_utils.py +346 -0
  618. mlflow/utils/nfs_on_spark.py +62 -0
  619. mlflow/utils/openai_utils.py +164 -0
  620. mlflow/utils/os.py +12 -0
  621. mlflow/utils/oss_registry_utils.py +29 -0
  622. mlflow/utils/plugins.py +17 -0
  623. mlflow/utils/process.py +182 -0
  624. mlflow/utils/promptlab_utils.py +146 -0
  625. mlflow/utils/proto_json_utils.py +743 -0
  626. mlflow/utils/pydantic_utils.py +54 -0
  627. mlflow/utils/request_utils.py +279 -0
  628. mlflow/utils/requirements_utils.py +704 -0
  629. mlflow/utils/rest_utils.py +673 -0
  630. mlflow/utils/search_logged_model_utils.py +127 -0
  631. mlflow/utils/search_utils.py +2111 -0
  632. mlflow/utils/secure_loading.py +221 -0
  633. mlflow/utils/security_validation.py +384 -0
  634. mlflow/utils/server_cli_utils.py +61 -0
  635. mlflow/utils/spark_utils.py +15 -0
  636. mlflow/utils/string_utils.py +138 -0
  637. mlflow/utils/thread_utils.py +63 -0
  638. mlflow/utils/time.py +54 -0
  639. mlflow/utils/timeout.py +42 -0
  640. mlflow/utils/uri.py +572 -0
  641. mlflow/utils/validation.py +662 -0
  642. mlflow/utils/virtualenv.py +458 -0
  643. mlflow/utils/warnings_utils.py +25 -0
  644. mlflow/utils/yaml_utils.py +179 -0
  645. mlflow/version.py +24 -0
@@ -0,0 +1,40 @@
1
+ from typing import Any
2
+
3
+ from typing_extensions import Self
4
+
5
+ from mlflow.data.dataset_source import DatasetSource
6
+
7
+
8
+ class CodeDatasetSource(DatasetSource):
9
+ def __init__(
10
+ self,
11
+ tags: dict[Any, Any],
12
+ ):
13
+ self._tags = tags
14
+
15
+ @staticmethod
16
+ def _get_source_type() -> str:
17
+ return "code"
18
+
19
+ def load(self, **kwargs):
20
+ """
21
+ Load is not implemented for Code Dataset Source.
22
+ """
23
+ raise NotImplementedError
24
+
25
+ @staticmethod
26
+ def _can_resolve(raw_source: Any):
27
+ return False
28
+
29
+ @classmethod
30
+ def _resolve(cls, raw_source: str) -> Self:
31
+ raise NotImplementedError
32
+
33
+ def to_dict(self) -> dict[Any, Any]:
34
+ return {"tags": self._tags}
35
+
36
+ @classmethod
37
+ def from_dict(cls, source_dict: dict[Any, Any]) -> Self:
38
+ return cls(
39
+ tags=source_dict.get("tags"),
40
+ )
mlflow/data/dataset.py ADDED
@@ -0,0 +1,123 @@
1
+ import json
2
+ from abc import abstractmethod
3
+ from typing import Any, Optional
4
+
5
+ from mlflow.data.dataset_source import DatasetSource
6
+ from mlflow.entities import Dataset as DatasetEntity
7
+
8
+
9
+ class Dataset:
10
+ """
11
+ Represents a dataset for use with MLflow Tracking, including the name, digest (hash),
12
+ schema, and profile of the dataset as well as source information (e.g. the S3 bucket or
13
+ managed Delta table from which the dataset was derived). Most datasets expose features
14
+ and targets for training and evaluation as well.
15
+ """
16
+
17
+ def __init__(
18
+ self, source: DatasetSource, name: Optional[str] = None, digest: Optional[str] = None
19
+ ):
20
+ """
21
+ Base constructor for a dataset. All subclasses must call this constructor.
22
+ """
23
+ self._name = name
24
+ self._source = source
25
+ # Note: Subclasses should call super() once they've initialized all of
26
+ # the class attributes necessary for digest computation
27
+ self._digest = digest or self._compute_digest()
28
+
29
+ @abstractmethod
30
+ def _compute_digest(self) -> str:
31
+ """Computes a digest for the dataset. Called if the user doesn't supply
32
+ a digest when constructing the dataset.
33
+
34
+ Returns:
35
+ A string digest for the dataset. We recommend a maximum digest length
36
+ of 10 characters with an ideal length of 8 characters.
37
+
38
+ """
39
+
40
+ def to_dict(self) -> dict[str, str]:
41
+ """Create config dictionary for the dataset.
42
+
43
+ Subclasses should override this method to provide additional fields in the config dict,
44
+ e.g., schema, profile, etc.
45
+
46
+ Returns a string dictionary containing the following fields: name, digest, source, source
47
+ type.
48
+ """
49
+ return {
50
+ "name": self.name,
51
+ "digest": self.digest,
52
+ "source": self.source.to_json(),
53
+ "source_type": self.source._get_source_type(),
54
+ }
55
+
56
+ def to_json(self) -> str:
57
+ """
58
+ Obtains a JSON string representation of the :py:class:`Dataset
59
+ <mlflow.data.dataset.Dataset>`.
60
+
61
+ Returns:
62
+ A JSON string representation of the :py:class:`Dataset <mlflow.data.dataset.Dataset>`.
63
+ """
64
+
65
+ return json.dumps(self.to_dict())
66
+
67
+ @property
68
+ def name(self) -> str:
69
+ """
70
+ The name of the dataset, e.g. ``"iris_data"``, ``"myschema.mycatalog.mytable@v1"``, etc.
71
+ """
72
+ if self._name is not None:
73
+ return self._name
74
+ else:
75
+ return "dataset"
76
+
77
+ @property
78
+ def digest(self) -> str:
79
+ """
80
+ A unique hash or fingerprint of the dataset, e.g. ``"498c7496"``.
81
+ """
82
+ return self._digest
83
+
84
+ @property
85
+ def source(self) -> DatasetSource:
86
+ """
87
+ Information about the dataset's source, represented as an instance of
88
+ :py:class:`DatasetSource <mlflow.data.dataset_source.DatasetSource>`. For example, this
89
+ may be the S3 location or the name of the managed Delta Table from which the dataset
90
+ was derived.
91
+ """
92
+ return self._source
93
+
94
+ @property
95
+ @abstractmethod
96
+ def profile(self) -> Optional[Any]:
97
+ """
98
+ Optional summary statistics for the dataset, such as the number of rows in a table, the
99
+ mean / median / std of each table column, etc.
100
+ """
101
+
102
+ @property
103
+ @abstractmethod
104
+ def schema(self) -> Optional[Any]:
105
+ """
106
+ Optional dataset schema, such as an instance of :py:class:`mlflow.types.Schema` representing
107
+ the features and targets of the dataset.
108
+ """
109
+
110
+ def _to_mlflow_entity(self) -> DatasetEntity:
111
+ """
112
+ Returns:
113
+ A `mlflow.entities.Dataset` instance representing the dataset.
114
+ """
115
+ dataset_dict = self.to_dict()
116
+ return DatasetEntity(
117
+ name=dataset_dict["name"],
118
+ digest=dataset_dict["digest"],
119
+ source_type=dataset_dict["source_type"],
120
+ source=dataset_dict["source"],
121
+ schema=dataset_dict.get("schema"),
122
+ profile=dataset_dict.get("profile"),
123
+ )
@@ -0,0 +1,168 @@
1
+ import inspect
2
+ import warnings
3
+ from contextlib import suppress
4
+ from typing import Callable, Optional
5
+
6
+ import mlflow.data
7
+ from mlflow.data.dataset import Dataset
8
+ from mlflow.exceptions import MlflowException
9
+ from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
10
+ from mlflow.utils.plugins import get_entry_points
11
+
12
+
13
+ class DatasetRegistry:
14
+ def __init__(self):
15
+ self.constructors = {}
16
+
17
+ def register_constructor(
18
+ self,
19
+ constructor_fn: Callable[[Optional[str], Optional[str]], Dataset],
20
+ constructor_name: Optional[str] = None,
21
+ ) -> str:
22
+ """Registers a dataset constructor.
23
+
24
+ Args:
25
+ constructor_fn: A function that accepts at least the following
26
+ inputs and returns an instance of a subclass of
27
+ :py:class:`mlflow.data.dataset.Dataset`:
28
+
29
+ - name: Optional. A string dataset name
30
+ - digest: Optional. A string dataset digest.
31
+
32
+ constructor_name: The name of the constructor, e.g.
33
+ "from_spark". The name must begin with the
34
+ string "from_" or "load_". If unspecified, the `__name__`
35
+ attribute of the `constructor_fn` is used instead and must
36
+ begin with the string "from_" or "load_".
37
+
38
+ Returns:
39
+ The name of the registered constructor, e.g. "from_pandas" or "load_delta".
40
+ """
41
+ if constructor_name is None:
42
+ constructor_name = constructor_fn.__name__
43
+ DatasetRegistry._validate_constructor(constructor_fn, constructor_name)
44
+ self.constructors[constructor_name] = constructor_fn
45
+ return constructor_name
46
+
47
+ def register_entrypoints(self):
48
+ """
49
+ Registers dataset sources defined as Python entrypoints. For reference, see
50
+ https://mlflow.org/docs/latest/plugins.html#defining-a-plugin.
51
+ """
52
+ for entrypoint in get_entry_points("mlflow.dataset_constructor"):
53
+ try:
54
+ self.register_constructor(
55
+ constructor_fn=entrypoint.load(), constructor_name=entrypoint.name
56
+ )
57
+ except Exception as exc:
58
+ warnings.warn(
59
+ f"Failure attempting to register dataset constructor"
60
+ f' "{entrypoint.name}": {exc}.',
61
+ stacklevel=2,
62
+ )
63
+
64
+ @staticmethod
65
+ def _validate_constructor(
66
+ constructor_fn: Callable[[Optional[str], Optional[str]], Dataset],
67
+ constructor_name: str,
68
+ ):
69
+ if not constructor_name.startswith("load_") and not constructor_name.startswith("from_"):
70
+ raise MlflowException(
71
+ f"Invalid dataset constructor name: {constructor_name}."
72
+ f" Constructor name must start with 'load_' or 'from_'.",
73
+ INVALID_PARAMETER_VALUE,
74
+ )
75
+
76
+ signature = inspect.signature(constructor_fn)
77
+ parameters = signature.parameters
78
+ for expected_kwarg in ["name", "digest"]:
79
+ if expected_kwarg not in parameters or parameters[expected_kwarg].kind not in [
80
+ inspect.Parameter.KEYWORD_ONLY,
81
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
82
+ ]:
83
+ raise MlflowException(
84
+ f"Invalid dataset constructor function: {constructor_fn.__name__}. Function"
85
+ f" must define an optional parameter named '{expected_kwarg}'.",
86
+ INVALID_PARAMETER_VALUE,
87
+ )
88
+
89
+ if not issubclass(signature.return_annotation, Dataset):
90
+ raise MlflowException(
91
+ f"Invalid dataset constructor function: {constructor_fn.__name__}. Function must"
92
+ f" have a return type annotation that is a subclass of"
93
+ f" :py:class:`mlflow.data.dataset.Dataset`.",
94
+ INVALID_PARAMETER_VALUE,
95
+ )
96
+
97
+
98
+ def register_constructor(
99
+ constructor_fn: Callable[[Optional[str], Optional[str]], Dataset],
100
+ constructor_name: Optional[str] = None,
101
+ ) -> str:
102
+ """Registers a dataset constructor.
103
+
104
+ Args:
105
+ constructor_fn: A function that accepts at least the following
106
+ inputs and returns an instance of a subclass of
107
+ :py:class:`mlflow.data.dataset.Dataset`:
108
+
109
+ - name: Optional. A string dataset name
110
+ - digest: Optional. A string dataset digest.
111
+
112
+ constructor_name: The name of the constructor, e.g.
113
+ "from_spark". The name must begin with the
114
+ string "from_" or "load_". If unspecified, the `__name__`
115
+ attribute of the `constructor_fn` is used instead and must
116
+ begin with the string "from_" or "load_".
117
+
118
+ Returns:
119
+ The name of the registered constructor, e.g. "from_pandas" or "load_delta".
120
+
121
+ """
122
+ registered_constructor_name = _dataset_registry.register_constructor(
123
+ constructor_fn=constructor_fn, constructor_name=constructor_name
124
+ )
125
+ setattr(mlflow.data, registered_constructor_name, constructor_fn)
126
+ mlflow.data.__all__.append(registered_constructor_name)
127
+ return registered_constructor_name
128
+
129
+
130
+ def get_registered_constructors() -> dict[str, Callable[[Optional[str], Optional[str]], Dataset]]:
131
+ """Obtains the registered dataset constructors.
132
+
133
+ Returns:
134
+ A dictionary mapping constructor names to constructor functions.
135
+
136
+ """
137
+ return _dataset_registry.constructors
138
+
139
+
140
+ _dataset_registry = DatasetRegistry()
141
+ _dataset_registry.register_entrypoints()
142
+
143
+ # use contextlib suppress to ignore import errors
144
+ with suppress(ImportError):
145
+ from mlflow.data.pandas_dataset import from_pandas
146
+
147
+ _dataset_registry.register_constructor(from_pandas)
148
+ with suppress(ImportError):
149
+ from mlflow.data.numpy_dataset import from_numpy
150
+
151
+ _dataset_registry.register_constructor(from_numpy)
152
+ with suppress(ImportError):
153
+ from mlflow.data.huggingface_dataset import from_huggingface
154
+
155
+ _dataset_registry.register_constructor(from_huggingface)
156
+ with suppress(ImportError):
157
+ from mlflow.data.tensorflow_dataset import from_tensorflow
158
+
159
+ _dataset_registry.register_constructor(from_tensorflow)
160
+ with suppress(ImportError):
161
+ from mlflow.data.spark_dataset import from_spark, load_delta
162
+
163
+ _dataset_registry.register_constructor(load_delta)
164
+ _dataset_registry.register_constructor(from_spark)
165
+ with suppress(ImportError):
166
+ from mlflow.data.polars_dataset import from_polars
167
+
168
+ _dataset_registry.register_constructor(from_polars)
@@ -0,0 +1,110 @@
1
+ import json
2
+ from abc import abstractmethod
3
+ from typing import Any
4
+
5
+
6
+ class DatasetSource:
7
+ """
8
+ Represents the source of a dataset used in MLflow Tracking, providing information such as
9
+ cloud storage location, delta table name / version, etc.
10
+ """
11
+
12
+ @staticmethod
13
+ @abstractmethod
14
+ def _get_source_type() -> str:
15
+ """Obtains a string representing the source type of the dataset.
16
+
17
+ Returns:
18
+ A string representing the source type of the dataset, e.g. "s3", "delta_table", ...
19
+
20
+ """
21
+
22
+ @abstractmethod
23
+ def load(self) -> Any:
24
+ """
25
+ Loads files / objects referred to by the DatasetSource. For example, depending on the type
26
+ of :py:class:`DatasetSource <mlflow.data.dataset_source.DatasetSource>`, this may download
27
+ source CSV files from S3 to the local filesystem, load a source Delta Table as a Spark
28
+ DataFrame, etc.
29
+
30
+ Returns:
31
+ The downloaded source, e.g. a local filesystem path, a Spark DataFrame, etc.
32
+
33
+ """
34
+
35
+ @staticmethod
36
+ @abstractmethod
37
+ def _can_resolve(raw_source: Any) -> bool:
38
+ """Determines whether this type of DatasetSource can be resolved from a specified raw source
39
+ object. For example, an S3DatasetSource can be resolved from an S3 URI like
40
+ "s3://mybucket/path/to/iris/data" but not from an Azure Blob Storage URI like
41
+ "wasbs:/account@host.blob.core.windows.net".
42
+
43
+ Args:
44
+ raw_source: The raw source, e.g. a string like "s3://mybucket/path/to/iris/data".
45
+
46
+ Returns:
47
+ True if this DatasetSource can resolve the raw source, False otherwise.
48
+
49
+ """
50
+
51
+ @classmethod
52
+ @abstractmethod
53
+ def _resolve(cls, raw_source: Any) -> "DatasetSource":
54
+ """Constructs an instance of the DatasetSource from a raw source object, such as a
55
+ string URI like "s3://mybucket/path/to/iris/data" or a delta table identifier
56
+ like "my.delta.table@2".
57
+
58
+ Args:
59
+ raw_source: The raw source, e.g. a string like "s3://mybucket/path/to/iris/data".
60
+
61
+ Returns:
62
+ A DatasetSource instance derived from the raw_source.
63
+
64
+ """
65
+
66
+ @abstractmethod
67
+ def to_dict(self) -> dict[str, Any]:
68
+ """Obtains a JSON-compatible dictionary representation of the DatasetSource.
69
+
70
+ Returns:
71
+ A JSON-compatible dictionary representation of the DatasetSource.
72
+
73
+ """
74
+
75
+ def to_json(self) -> str:
76
+ """
77
+ Obtains a JSON string representation of the
78
+ :py:class:`DatasetSource <mlflow.data.dataset_source.DatasetSource>`.
79
+
80
+ Returns:
81
+ A JSON string representation of the
82
+ :py:class:`DatasetSource <mlflow.data.dataset_source.DatasetSource>`.
83
+ """
84
+ return json.dumps(self.to_dict())
85
+
86
+ @classmethod
87
+ @abstractmethod
88
+ def from_dict(cls, source_dict: dict[Any, Any]) -> "DatasetSource":
89
+ """Constructs an instance of the DatasetSource from a dictionary representation.
90
+
91
+ Args:
92
+ source_dict: A dictionary representation of the DatasetSource.
93
+
94
+ Returns:
95
+ A DatasetSource instance.
96
+
97
+ """
98
+
99
+ @classmethod
100
+ def from_json(cls, source_json: str) -> "DatasetSource":
101
+ """Constructs an instance of the DatasetSource from a JSON string representation.
102
+
103
+ Args:
104
+ source_json: A JSON string representation of the DatasetSource.
105
+
106
+ Returns:
107
+ A DatasetSource instance.
108
+
109
+ """
110
+ return cls.from_dict(json.loads(source_json))
@@ -0,0 +1,219 @@
1
+ import warnings
2
+ from typing import Any, Optional
3
+
4
+ from mlflow.data.artifact_dataset_sources import register_artifact_dataset_sources
5
+ from mlflow.data.dataset_source import DatasetSource
6
+ from mlflow.data.http_dataset_source import HTTPDatasetSource
7
+ from mlflow.exceptions import MlflowException
8
+ from mlflow.protos.databricks_pb2 import RESOURCE_DOES_NOT_EXIST
9
+ from mlflow.utils.plugins import get_entry_points
10
+
11
+
12
+ class DatasetSourceRegistry:
13
+ def __init__(self):
14
+ self.sources = []
15
+
16
+ def register(self, source: DatasetSource):
17
+ """Registers a DatasetSource for use with MLflow Tracking.
18
+
19
+ Args:
20
+ source: The DatasetSource to register.
21
+ """
22
+ self.sources.append(source)
23
+
24
+ def register_entrypoints(self):
25
+ """
26
+ Registers dataset sources defined as Python entrypoints. For reference, see
27
+ https://mlflow.org/docs/latest/plugins.html#defining-a-plugin.
28
+ """
29
+ for entrypoint in get_entry_points("mlflow.dataset_source"):
30
+ try:
31
+ self.register(entrypoint.load())
32
+ except (AttributeError, ImportError) as exc:
33
+ warnings.warn(
34
+ "Failure attempting to register dataset constructor"
35
+ + f' "{entrypoint}": {exc}',
36
+ stacklevel=2,
37
+ )
38
+
39
+ def resolve(
40
+ self, raw_source: Any, candidate_sources: Optional[list[DatasetSource]] = None
41
+ ) -> DatasetSource:
42
+ """Resolves a raw source object, such as a string URI, to a DatasetSource for use with
43
+ MLflow Tracking.
44
+
45
+ Args:
46
+ raw_source: The raw source, e.g. a string like "s3://mybucket/path/to/iris/data" or a
47
+ HuggingFace :py:class:`datasets.Dataset` object.
48
+ candidate_sources: A list of DatasetSource classes to consider as potential sources
49
+ when resolving the raw source. Subclasses of the specified candidate sources are
50
+ also considered. If unspecified, all registered sources are considered.
51
+
52
+ Raises:
53
+ MlflowException: If no DatasetSource class can resolve the raw source.
54
+
55
+ Returns:
56
+ The resolved DatasetSource.
57
+ """
58
+ matching_sources = []
59
+ for source in self.sources:
60
+ if candidate_sources and not any(
61
+ issubclass(source, candidate_src) for candidate_src in candidate_sources
62
+ ):
63
+ continue
64
+ try:
65
+ if source._can_resolve(raw_source):
66
+ matching_sources.append(source)
67
+ except Exception as e:
68
+ warnings.warn(
69
+ f"Failed to determine whether {source.__name__} can resolve source"
70
+ f" information for '{raw_source}'. Exception: {e}",
71
+ stacklevel=2,
72
+ )
73
+ continue
74
+
75
+ if len(matching_sources) > 1:
76
+ source_class_names_str = ", ".join([source.__name__ for source in matching_sources])
77
+ warnings.warn(
78
+ f"The specified dataset source can be interpreted in multiple ways:"
79
+ f" {source_class_names_str}. MLflow will assume that this is a"
80
+ f" {matching_sources[-1].__name__} source.",
81
+ stacklevel=2,
82
+ )
83
+
84
+ for matching_source in reversed(matching_sources):
85
+ try:
86
+ return matching_source._resolve(raw_source)
87
+ except Exception as e:
88
+ warnings.warn(
89
+ f"Encountered an unexpected error while using {matching_source.__name__} to"
90
+ f" resolve source information for '{raw_source}'. Exception: {e}",
91
+ stacklevel=2,
92
+ )
93
+ continue
94
+
95
+ raise MlflowException(
96
+ f"Could not find a source information resolver for the specified"
97
+ f" dataset source: {raw_source}.",
98
+ RESOURCE_DOES_NOT_EXIST,
99
+ )
100
+
101
+ def get_source_from_json(self, source_json: str, source_type: str) -> DatasetSource:
102
+ """Parses and returns a DatasetSource object from its JSON representation.
103
+
104
+ Args:
105
+ source_json: The JSON representation of the DatasetSource.
106
+ source_type: The string type of the DatasetSource, which indicates how to parse the
107
+ source JSON.
108
+ """
109
+ for source in reversed(self.sources):
110
+ if source._get_source_type() == source_type:
111
+ return source.from_json(source_json)
112
+
113
+ raise MlflowException(
114
+ f"Could not parse dataset source from JSON due to unrecognized"
115
+ f" source type: {source_type}.",
116
+ RESOURCE_DOES_NOT_EXIST,
117
+ )
118
+
119
+
120
+ def register_dataset_source(source: DatasetSource):
121
+ """Registers a DatasetSource for use with MLflow Tracking.
122
+
123
+ Args:
124
+ source: The DatasetSource to register.
125
+ """
126
+ _dataset_source_registry.register(source)
127
+
128
+
129
+ def resolve_dataset_source(
130
+ raw_source: Any, candidate_sources: Optional[list[DatasetSource]] = None
131
+ ) -> DatasetSource:
132
+ """Resolves a raw source object, such as a string URI, to a DatasetSource for use with
133
+ MLflow Tracking.
134
+
135
+ Args:
136
+ raw_source: The raw source, e.g. a string like "s3://mybucket/path/to/iris/data" or a
137
+ HuggingFace :py:class:`datasets.Dataset` object.
138
+ candidate_sources: A list of DatasetSource classes to consider as potential sources
139
+ when resolving the raw source. Subclasses of the specified candidate
140
+ sources are also considered. If unspecified, all registered sources
141
+ are considered.
142
+
143
+ Raises:
144
+ MlflowException: If no DatasetSource class can resolve the raw source.
145
+
146
+ Returns:
147
+ The resolved DatasetSource.
148
+ """
149
+ return _dataset_source_registry.resolve(
150
+ raw_source=raw_source, candidate_sources=candidate_sources
151
+ )
152
+
153
+
154
+ def get_dataset_source_from_json(source_json: str, source_type: str) -> DatasetSource:
155
+ """Parses and returns a DatasetSource object from its JSON representation.
156
+
157
+ Args:
158
+ source_json: The JSON representation of the DatasetSource.
159
+ source_type: The string type of the DatasetSource, which indicates how to parse the
160
+ source JSON.
161
+ """
162
+ return _dataset_source_registry.get_source_from_json(
163
+ source_json=source_json, source_type=source_type
164
+ )
165
+
166
+
167
+ def get_registered_sources() -> list[DatasetSource]:
168
+ """Obtains the registered dataset sources.
169
+
170
+ Returns:
171
+ A list of registered dataset sources.
172
+
173
+ """
174
+ return _dataset_source_registry.sources
175
+
176
+
177
+ # NB: The ordering here is important. The last dataset source to be registered takes precedence
178
+ # when resolving dataset information for a raw source (e.g. a string like "s3://mybucket/my/path").
179
+ # Dataset sources derived from artifact repositories are the most generic / provide the most
180
+ # general information about dataset source locations, so they are registered first. More specific
181
+ # source information is provided by specialized dataset platform sources like
182
+ # HuggingFaceDatasetSource, so these sources are registered next. Finally, externally-defined
183
+ # dataset sources are registered last because externally-defined behavior should take precedence
184
+ # over any internally-defined generic behavior
185
+ _dataset_source_registry = DatasetSourceRegistry()
186
+ register_artifact_dataset_sources()
187
+ _dataset_source_registry.register(HTTPDatasetSource)
188
+ _dataset_source_registry.register_entrypoints()
189
+
190
+ try:
191
+ from mlflow.data.huggingface_dataset_source import HuggingFaceDatasetSource
192
+
193
+ _dataset_source_registry.register(HuggingFaceDatasetSource)
194
+ except ImportError:
195
+ pass
196
+ try:
197
+ from mlflow.data.spark_dataset_source import SparkDatasetSource
198
+
199
+ _dataset_source_registry.register(SparkDatasetSource)
200
+ except ImportError:
201
+ pass
202
+ try:
203
+ from mlflow.data.delta_dataset_source import DeltaDatasetSource
204
+
205
+ _dataset_source_registry.register(DeltaDatasetSource)
206
+ except ImportError:
207
+ pass
208
+ try:
209
+ from mlflow.data.code_dataset_source import CodeDatasetSource
210
+
211
+ _dataset_source_registry.register(CodeDatasetSource)
212
+ except ImportError:
213
+ pass
214
+ try:
215
+ from mlflow.data.uc_volume_dataset_source import UCVolumeDatasetSource
216
+
217
+ _dataset_source_registry.register(UCVolumeDatasetSource)
218
+ except ImportError:
219
+ pass