genesis-flow 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (645) hide show
  1. genesis_flow-1.0.0.dist-info/METADATA +822 -0
  2. genesis_flow-1.0.0.dist-info/RECORD +645 -0
  3. genesis_flow-1.0.0.dist-info/WHEEL +5 -0
  4. genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
  5. genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
  6. genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
  7. mlflow/__init__.py +367 -0
  8. mlflow/__main__.py +3 -0
  9. mlflow/ag2/__init__.py +56 -0
  10. mlflow/ag2/ag2_logger.py +294 -0
  11. mlflow/anthropic/__init__.py +40 -0
  12. mlflow/anthropic/autolog.py +129 -0
  13. mlflow/anthropic/chat.py +144 -0
  14. mlflow/artifacts/__init__.py +268 -0
  15. mlflow/autogen/__init__.py +144 -0
  16. mlflow/autogen/chat.py +142 -0
  17. mlflow/azure/__init__.py +26 -0
  18. mlflow/azure/auth_handler.py +257 -0
  19. mlflow/azure/client.py +319 -0
  20. mlflow/azure/config.py +120 -0
  21. mlflow/azure/connection_factory.py +340 -0
  22. mlflow/azure/exceptions.py +27 -0
  23. mlflow/azure/stores.py +327 -0
  24. mlflow/azure/utils.py +183 -0
  25. mlflow/bedrock/__init__.py +45 -0
  26. mlflow/bedrock/_autolog.py +202 -0
  27. mlflow/bedrock/chat.py +122 -0
  28. mlflow/bedrock/stream.py +160 -0
  29. mlflow/bedrock/utils.py +43 -0
  30. mlflow/cli.py +707 -0
  31. mlflow/client.py +12 -0
  32. mlflow/config/__init__.py +56 -0
  33. mlflow/crewai/__init__.py +79 -0
  34. mlflow/crewai/autolog.py +253 -0
  35. mlflow/crewai/chat.py +29 -0
  36. mlflow/data/__init__.py +75 -0
  37. mlflow/data/artifact_dataset_sources.py +170 -0
  38. mlflow/data/code_dataset_source.py +40 -0
  39. mlflow/data/dataset.py +123 -0
  40. mlflow/data/dataset_registry.py +168 -0
  41. mlflow/data/dataset_source.py +110 -0
  42. mlflow/data/dataset_source_registry.py +219 -0
  43. mlflow/data/delta_dataset_source.py +167 -0
  44. mlflow/data/digest_utils.py +108 -0
  45. mlflow/data/evaluation_dataset.py +562 -0
  46. mlflow/data/filesystem_dataset_source.py +81 -0
  47. mlflow/data/http_dataset_source.py +145 -0
  48. mlflow/data/huggingface_dataset.py +258 -0
  49. mlflow/data/huggingface_dataset_source.py +118 -0
  50. mlflow/data/meta_dataset.py +104 -0
  51. mlflow/data/numpy_dataset.py +223 -0
  52. mlflow/data/pandas_dataset.py +231 -0
  53. mlflow/data/polars_dataset.py +352 -0
  54. mlflow/data/pyfunc_dataset_mixin.py +31 -0
  55. mlflow/data/schema.py +76 -0
  56. mlflow/data/sources.py +1 -0
  57. mlflow/data/spark_dataset.py +406 -0
  58. mlflow/data/spark_dataset_source.py +74 -0
  59. mlflow/data/spark_delta_utils.py +118 -0
  60. mlflow/data/tensorflow_dataset.py +350 -0
  61. mlflow/data/uc_volume_dataset_source.py +81 -0
  62. mlflow/db.py +27 -0
  63. mlflow/dspy/__init__.py +17 -0
  64. mlflow/dspy/autolog.py +197 -0
  65. mlflow/dspy/callback.py +398 -0
  66. mlflow/dspy/constant.py +1 -0
  67. mlflow/dspy/load.py +93 -0
  68. mlflow/dspy/save.py +393 -0
  69. mlflow/dspy/util.py +109 -0
  70. mlflow/dspy/wrapper.py +226 -0
  71. mlflow/entities/__init__.py +104 -0
  72. mlflow/entities/_mlflow_object.py +52 -0
  73. mlflow/entities/assessment.py +545 -0
  74. mlflow/entities/assessment_error.py +80 -0
  75. mlflow/entities/assessment_source.py +141 -0
  76. mlflow/entities/dataset.py +92 -0
  77. mlflow/entities/dataset_input.py +51 -0
  78. mlflow/entities/dataset_summary.py +62 -0
  79. mlflow/entities/document.py +48 -0
  80. mlflow/entities/experiment.py +109 -0
  81. mlflow/entities/experiment_tag.py +35 -0
  82. mlflow/entities/file_info.py +45 -0
  83. mlflow/entities/input_tag.py +35 -0
  84. mlflow/entities/lifecycle_stage.py +35 -0
  85. mlflow/entities/logged_model.py +228 -0
  86. mlflow/entities/logged_model_input.py +26 -0
  87. mlflow/entities/logged_model_output.py +32 -0
  88. mlflow/entities/logged_model_parameter.py +46 -0
  89. mlflow/entities/logged_model_status.py +74 -0
  90. mlflow/entities/logged_model_tag.py +33 -0
  91. mlflow/entities/metric.py +200 -0
  92. mlflow/entities/model_registry/__init__.py +29 -0
  93. mlflow/entities/model_registry/_model_registry_entity.py +13 -0
  94. mlflow/entities/model_registry/model_version.py +243 -0
  95. mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
  96. mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
  97. mlflow/entities/model_registry/model_version_search.py +25 -0
  98. mlflow/entities/model_registry/model_version_stages.py +25 -0
  99. mlflow/entities/model_registry/model_version_status.py +35 -0
  100. mlflow/entities/model_registry/model_version_tag.py +35 -0
  101. mlflow/entities/model_registry/prompt.py +73 -0
  102. mlflow/entities/model_registry/prompt_version.py +244 -0
  103. mlflow/entities/model_registry/registered_model.py +175 -0
  104. mlflow/entities/model_registry/registered_model_alias.py +35 -0
  105. mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
  106. mlflow/entities/model_registry/registered_model_search.py +25 -0
  107. mlflow/entities/model_registry/registered_model_tag.py +35 -0
  108. mlflow/entities/multipart_upload.py +74 -0
  109. mlflow/entities/param.py +49 -0
  110. mlflow/entities/run.py +97 -0
  111. mlflow/entities/run_data.py +84 -0
  112. mlflow/entities/run_info.py +188 -0
  113. mlflow/entities/run_inputs.py +59 -0
  114. mlflow/entities/run_outputs.py +43 -0
  115. mlflow/entities/run_status.py +41 -0
  116. mlflow/entities/run_tag.py +36 -0
  117. mlflow/entities/source_type.py +31 -0
  118. mlflow/entities/span.py +774 -0
  119. mlflow/entities/span_event.py +96 -0
  120. mlflow/entities/span_status.py +102 -0
  121. mlflow/entities/trace.py +317 -0
  122. mlflow/entities/trace_data.py +71 -0
  123. mlflow/entities/trace_info.py +220 -0
  124. mlflow/entities/trace_info_v2.py +162 -0
  125. mlflow/entities/trace_location.py +173 -0
  126. mlflow/entities/trace_state.py +39 -0
  127. mlflow/entities/trace_status.py +68 -0
  128. mlflow/entities/view_type.py +51 -0
  129. mlflow/environment_variables.py +866 -0
  130. mlflow/evaluation/__init__.py +16 -0
  131. mlflow/evaluation/assessment.py +369 -0
  132. mlflow/evaluation/evaluation.py +411 -0
  133. mlflow/evaluation/evaluation_tag.py +61 -0
  134. mlflow/evaluation/fluent.py +48 -0
  135. mlflow/evaluation/utils.py +201 -0
  136. mlflow/exceptions.py +213 -0
  137. mlflow/experiments.py +140 -0
  138. mlflow/gemini/__init__.py +81 -0
  139. mlflow/gemini/autolog.py +186 -0
  140. mlflow/gemini/chat.py +261 -0
  141. mlflow/genai/__init__.py +71 -0
  142. mlflow/genai/datasets/__init__.py +67 -0
  143. mlflow/genai/datasets/evaluation_dataset.py +131 -0
  144. mlflow/genai/evaluation/__init__.py +3 -0
  145. mlflow/genai/evaluation/base.py +411 -0
  146. mlflow/genai/evaluation/constant.py +23 -0
  147. mlflow/genai/evaluation/utils.py +244 -0
  148. mlflow/genai/judges/__init__.py +21 -0
  149. mlflow/genai/judges/databricks.py +404 -0
  150. mlflow/genai/label_schemas/__init__.py +153 -0
  151. mlflow/genai/label_schemas/label_schemas.py +209 -0
  152. mlflow/genai/labeling/__init__.py +159 -0
  153. mlflow/genai/labeling/labeling.py +250 -0
  154. mlflow/genai/optimize/__init__.py +13 -0
  155. mlflow/genai/optimize/base.py +198 -0
  156. mlflow/genai/optimize/optimizers/__init__.py +4 -0
  157. mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
  158. mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
  159. mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
  160. mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
  161. mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
  162. mlflow/genai/optimize/types.py +75 -0
  163. mlflow/genai/optimize/util.py +30 -0
  164. mlflow/genai/prompts/__init__.py +206 -0
  165. mlflow/genai/scheduled_scorers.py +431 -0
  166. mlflow/genai/scorers/__init__.py +26 -0
  167. mlflow/genai/scorers/base.py +492 -0
  168. mlflow/genai/scorers/builtin_scorers.py +765 -0
  169. mlflow/genai/scorers/scorer_utils.py +138 -0
  170. mlflow/genai/scorers/validation.py +165 -0
  171. mlflow/genai/utils/data_validation.py +146 -0
  172. mlflow/genai/utils/enum_utils.py +23 -0
  173. mlflow/genai/utils/trace_utils.py +211 -0
  174. mlflow/groq/__init__.py +42 -0
  175. mlflow/groq/_groq_autolog.py +74 -0
  176. mlflow/johnsnowlabs/__init__.py +888 -0
  177. mlflow/langchain/__init__.py +24 -0
  178. mlflow/langchain/api_request_parallel_processor.py +330 -0
  179. mlflow/langchain/autolog.py +147 -0
  180. mlflow/langchain/chat_agent_langgraph.py +340 -0
  181. mlflow/langchain/constant.py +1 -0
  182. mlflow/langchain/constants.py +1 -0
  183. mlflow/langchain/databricks_dependencies.py +444 -0
  184. mlflow/langchain/langchain_tracer.py +597 -0
  185. mlflow/langchain/model.py +919 -0
  186. mlflow/langchain/output_parsers.py +142 -0
  187. mlflow/langchain/retriever_chain.py +153 -0
  188. mlflow/langchain/runnables.py +527 -0
  189. mlflow/langchain/utils/chat.py +402 -0
  190. mlflow/langchain/utils/logging.py +671 -0
  191. mlflow/langchain/utils/serialization.py +36 -0
  192. mlflow/legacy_databricks_cli/__init__.py +0 -0
  193. mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
  194. mlflow/legacy_databricks_cli/configure/provider.py +482 -0
  195. mlflow/litellm/__init__.py +175 -0
  196. mlflow/llama_index/__init__.py +22 -0
  197. mlflow/llama_index/autolog.py +55 -0
  198. mlflow/llama_index/chat.py +43 -0
  199. mlflow/llama_index/constant.py +1 -0
  200. mlflow/llama_index/model.py +577 -0
  201. mlflow/llama_index/pyfunc_wrapper.py +332 -0
  202. mlflow/llama_index/serialize_objects.py +188 -0
  203. mlflow/llama_index/tracer.py +561 -0
  204. mlflow/metrics/__init__.py +479 -0
  205. mlflow/metrics/base.py +39 -0
  206. mlflow/metrics/genai/__init__.py +25 -0
  207. mlflow/metrics/genai/base.py +101 -0
  208. mlflow/metrics/genai/genai_metric.py +771 -0
  209. mlflow/metrics/genai/metric_definitions.py +450 -0
  210. mlflow/metrics/genai/model_utils.py +371 -0
  211. mlflow/metrics/genai/prompt_template.py +68 -0
  212. mlflow/metrics/genai/prompts/__init__.py +0 -0
  213. mlflow/metrics/genai/prompts/v1.py +422 -0
  214. mlflow/metrics/genai/utils.py +6 -0
  215. mlflow/metrics/metric_definitions.py +619 -0
  216. mlflow/mismatch.py +34 -0
  217. mlflow/mistral/__init__.py +34 -0
  218. mlflow/mistral/autolog.py +71 -0
  219. mlflow/mistral/chat.py +135 -0
  220. mlflow/ml_package_versions.py +452 -0
  221. mlflow/models/__init__.py +97 -0
  222. mlflow/models/auth_policy.py +83 -0
  223. mlflow/models/cli.py +354 -0
  224. mlflow/models/container/__init__.py +294 -0
  225. mlflow/models/container/scoring_server/__init__.py +0 -0
  226. mlflow/models/container/scoring_server/nginx.conf +39 -0
  227. mlflow/models/dependencies_schemas.py +287 -0
  228. mlflow/models/display_utils.py +158 -0
  229. mlflow/models/docker_utils.py +211 -0
  230. mlflow/models/evaluation/__init__.py +23 -0
  231. mlflow/models/evaluation/_shap_patch.py +64 -0
  232. mlflow/models/evaluation/artifacts.py +194 -0
  233. mlflow/models/evaluation/base.py +1811 -0
  234. mlflow/models/evaluation/calibration_curve.py +109 -0
  235. mlflow/models/evaluation/default_evaluator.py +996 -0
  236. mlflow/models/evaluation/deprecated.py +23 -0
  237. mlflow/models/evaluation/evaluator_registry.py +80 -0
  238. mlflow/models/evaluation/evaluators/classifier.py +704 -0
  239. mlflow/models/evaluation/evaluators/default.py +233 -0
  240. mlflow/models/evaluation/evaluators/regressor.py +96 -0
  241. mlflow/models/evaluation/evaluators/shap.py +296 -0
  242. mlflow/models/evaluation/lift_curve.py +178 -0
  243. mlflow/models/evaluation/utils/metric.py +123 -0
  244. mlflow/models/evaluation/utils/trace.py +179 -0
  245. mlflow/models/evaluation/validation.py +434 -0
  246. mlflow/models/flavor_backend.py +93 -0
  247. mlflow/models/flavor_backend_registry.py +53 -0
  248. mlflow/models/model.py +1639 -0
  249. mlflow/models/model_config.py +150 -0
  250. mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
  251. mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
  252. mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
  253. mlflow/models/python_api.py +369 -0
  254. mlflow/models/rag_signatures.py +128 -0
  255. mlflow/models/resources.py +321 -0
  256. mlflow/models/signature.py +662 -0
  257. mlflow/models/utils.py +2054 -0
  258. mlflow/models/wheeled_model.py +280 -0
  259. mlflow/openai/__init__.py +57 -0
  260. mlflow/openai/_agent_tracer.py +364 -0
  261. mlflow/openai/api_request_parallel_processor.py +131 -0
  262. mlflow/openai/autolog.py +509 -0
  263. mlflow/openai/constant.py +1 -0
  264. mlflow/openai/model.py +824 -0
  265. mlflow/openai/utils/chat_schema.py +367 -0
  266. mlflow/optuna/__init__.py +3 -0
  267. mlflow/optuna/storage.py +646 -0
  268. mlflow/plugins/__init__.py +72 -0
  269. mlflow/plugins/base.py +358 -0
  270. mlflow/plugins/builtin/__init__.py +24 -0
  271. mlflow/plugins/builtin/pytorch_plugin.py +150 -0
  272. mlflow/plugins/builtin/sklearn_plugin.py +158 -0
  273. mlflow/plugins/builtin/transformers_plugin.py +187 -0
  274. mlflow/plugins/cli.py +321 -0
  275. mlflow/plugins/discovery.py +340 -0
  276. mlflow/plugins/manager.py +465 -0
  277. mlflow/plugins/registry.py +316 -0
  278. mlflow/plugins/templates/framework_plugin_template.py +329 -0
  279. mlflow/prompt/constants.py +20 -0
  280. mlflow/prompt/promptlab_model.py +197 -0
  281. mlflow/prompt/registry_utils.py +248 -0
  282. mlflow/promptflow/__init__.py +495 -0
  283. mlflow/protos/__init__.py +0 -0
  284. mlflow/protos/assessments_pb2.py +174 -0
  285. mlflow/protos/databricks_artifacts_pb2.py +489 -0
  286. mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
  287. mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
  288. mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
  289. mlflow/protos/databricks_pb2.py +267 -0
  290. mlflow/protos/databricks_trace_server_pb2.py +374 -0
  291. mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
  292. mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
  293. mlflow/protos/facet_feature_statistics_pb2.py +296 -0
  294. mlflow/protos/internal_pb2.py +77 -0
  295. mlflow/protos/mlflow_artifacts_pb2.py +336 -0
  296. mlflow/protos/model_registry_pb2.py +1073 -0
  297. mlflow/protos/scalapb/__init__.py +0 -0
  298. mlflow/protos/scalapb/scalapb_pb2.py +104 -0
  299. mlflow/protos/service_pb2.py +2600 -0
  300. mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
  301. mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
  302. mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
  303. mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
  304. mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
  305. mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
  306. mlflow/py.typed +0 -0
  307. mlflow/pydantic_ai/__init__.py +57 -0
  308. mlflow/pydantic_ai/autolog.py +173 -0
  309. mlflow/pyfunc/__init__.py +3844 -0
  310. mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
  311. mlflow/pyfunc/backend.py +523 -0
  312. mlflow/pyfunc/context.py +78 -0
  313. mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
  314. mlflow/pyfunc/loaders/__init__.py +7 -0
  315. mlflow/pyfunc/loaders/chat_agent.py +117 -0
  316. mlflow/pyfunc/loaders/chat_model.py +125 -0
  317. mlflow/pyfunc/loaders/code_model.py +31 -0
  318. mlflow/pyfunc/loaders/responses_agent.py +112 -0
  319. mlflow/pyfunc/mlserver.py +46 -0
  320. mlflow/pyfunc/model.py +1473 -0
  321. mlflow/pyfunc/scoring_server/__init__.py +604 -0
  322. mlflow/pyfunc/scoring_server/app.py +7 -0
  323. mlflow/pyfunc/scoring_server/client.py +146 -0
  324. mlflow/pyfunc/spark_model_cache.py +48 -0
  325. mlflow/pyfunc/stdin_server.py +44 -0
  326. mlflow/pyfunc/utils/__init__.py +3 -0
  327. mlflow/pyfunc/utils/data_validation.py +224 -0
  328. mlflow/pyfunc/utils/environment.py +22 -0
  329. mlflow/pyfunc/utils/input_converter.py +47 -0
  330. mlflow/pyfunc/utils/serving_data_parser.py +11 -0
  331. mlflow/pytorch/__init__.py +1171 -0
  332. mlflow/pytorch/_lightning_autolog.py +580 -0
  333. mlflow/pytorch/_pytorch_autolog.py +50 -0
  334. mlflow/pytorch/pickle_module.py +35 -0
  335. mlflow/rfunc/__init__.py +42 -0
  336. mlflow/rfunc/backend.py +134 -0
  337. mlflow/runs.py +89 -0
  338. mlflow/server/__init__.py +302 -0
  339. mlflow/server/auth/__init__.py +1224 -0
  340. mlflow/server/auth/__main__.py +4 -0
  341. mlflow/server/auth/basic_auth.ini +6 -0
  342. mlflow/server/auth/cli.py +11 -0
  343. mlflow/server/auth/client.py +537 -0
  344. mlflow/server/auth/config.py +34 -0
  345. mlflow/server/auth/db/__init__.py +0 -0
  346. mlflow/server/auth/db/cli.py +18 -0
  347. mlflow/server/auth/db/migrations/__init__.py +0 -0
  348. mlflow/server/auth/db/migrations/alembic.ini +110 -0
  349. mlflow/server/auth/db/migrations/env.py +76 -0
  350. mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
  351. mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
  352. mlflow/server/auth/db/models.py +67 -0
  353. mlflow/server/auth/db/utils.py +37 -0
  354. mlflow/server/auth/entities.py +165 -0
  355. mlflow/server/auth/logo.py +14 -0
  356. mlflow/server/auth/permissions.py +65 -0
  357. mlflow/server/auth/routes.py +18 -0
  358. mlflow/server/auth/sqlalchemy_store.py +263 -0
  359. mlflow/server/graphql/__init__.py +0 -0
  360. mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
  361. mlflow/server/graphql/graphql_custom_scalars.py +24 -0
  362. mlflow/server/graphql/graphql_errors.py +15 -0
  363. mlflow/server/graphql/graphql_no_batching.py +89 -0
  364. mlflow/server/graphql/graphql_schema_extensions.py +74 -0
  365. mlflow/server/handlers.py +3217 -0
  366. mlflow/server/prometheus_exporter.py +17 -0
  367. mlflow/server/validation.py +30 -0
  368. mlflow/shap/__init__.py +691 -0
  369. mlflow/sklearn/__init__.py +1994 -0
  370. mlflow/sklearn/utils.py +1041 -0
  371. mlflow/smolagents/__init__.py +66 -0
  372. mlflow/smolagents/autolog.py +139 -0
  373. mlflow/smolagents/chat.py +29 -0
  374. mlflow/store/__init__.py +10 -0
  375. mlflow/store/_unity_catalog/__init__.py +1 -0
  376. mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
  377. mlflow/store/_unity_catalog/lineage/constants.py +2 -0
  378. mlflow/store/_unity_catalog/registry/__init__.py +6 -0
  379. mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
  380. mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
  381. mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
  382. mlflow/store/_unity_catalog/registry/utils.py +121 -0
  383. mlflow/store/artifact/__init__.py +0 -0
  384. mlflow/store/artifact/artifact_repo.py +472 -0
  385. mlflow/store/artifact/artifact_repository_registry.py +154 -0
  386. mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
  387. mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
  388. mlflow/store/artifact/cli.py +141 -0
  389. mlflow/store/artifact/cloud_artifact_repo.py +332 -0
  390. mlflow/store/artifact/databricks_artifact_repo.py +729 -0
  391. mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
  392. mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
  393. mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
  394. mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
  395. mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
  396. mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
  397. mlflow/store/artifact/ftp_artifact_repo.py +132 -0
  398. mlflow/store/artifact/gcs_artifact_repo.py +296 -0
  399. mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
  400. mlflow/store/artifact/http_artifact_repo.py +218 -0
  401. mlflow/store/artifact/local_artifact_repo.py +142 -0
  402. mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
  403. mlflow/store/artifact/models_artifact_repo.py +259 -0
  404. mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
  405. mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
  406. mlflow/store/artifact/r2_artifact_repo.py +70 -0
  407. mlflow/store/artifact/runs_artifact_repo.py +265 -0
  408. mlflow/store/artifact/s3_artifact_repo.py +330 -0
  409. mlflow/store/artifact/sftp_artifact_repo.py +141 -0
  410. mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
  411. mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
  412. mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
  413. mlflow/store/artifact/utils/__init__.py +0 -0
  414. mlflow/store/artifact/utils/models.py +148 -0
  415. mlflow/store/db/__init__.py +0 -0
  416. mlflow/store/db/base_sql_model.py +3 -0
  417. mlflow/store/db/db_types.py +10 -0
  418. mlflow/store/db/utils.py +314 -0
  419. mlflow/store/db_migrations/__init__.py +0 -0
  420. mlflow/store/db_migrations/alembic.ini +74 -0
  421. mlflow/store/db_migrations/env.py +84 -0
  422. mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
  423. mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
  424. mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
  425. mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
  426. mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
  427. mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
  428. mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
  429. mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
  430. mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
  431. mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
  432. mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
  433. mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
  434. mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
  435. mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
  436. mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
  437. mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
  438. mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
  439. mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
  440. mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
  441. mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
  442. mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
  443. mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
  444. mlflow/store/db_migrations/versions/__init__.py +0 -0
  445. mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
  446. mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
  447. mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
  448. mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
  449. mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
  450. mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
  451. mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
  452. mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
  453. mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
  454. mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
  455. mlflow/store/entities/__init__.py +3 -0
  456. mlflow/store/entities/paged_list.py +18 -0
  457. mlflow/store/model_registry/__init__.py +10 -0
  458. mlflow/store/model_registry/abstract_store.py +1081 -0
  459. mlflow/store/model_registry/base_rest_store.py +44 -0
  460. mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
  461. mlflow/store/model_registry/dbmodels/__init__.py +0 -0
  462. mlflow/store/model_registry/dbmodels/models.py +206 -0
  463. mlflow/store/model_registry/file_store.py +1091 -0
  464. mlflow/store/model_registry/rest_store.py +481 -0
  465. mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
  466. mlflow/store/tracking/__init__.py +23 -0
  467. mlflow/store/tracking/abstract_store.py +816 -0
  468. mlflow/store/tracking/dbmodels/__init__.py +0 -0
  469. mlflow/store/tracking/dbmodels/initial_models.py +243 -0
  470. mlflow/store/tracking/dbmodels/models.py +1073 -0
  471. mlflow/store/tracking/file_store.py +2438 -0
  472. mlflow/store/tracking/postgres_managed_identity.py +146 -0
  473. mlflow/store/tracking/rest_store.py +1131 -0
  474. mlflow/store/tracking/sqlalchemy_store.py +2785 -0
  475. mlflow/system_metrics/__init__.py +61 -0
  476. mlflow/system_metrics/metrics/__init__.py +0 -0
  477. mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
  478. mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
  479. mlflow/system_metrics/metrics/disk_monitor.py +21 -0
  480. mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
  481. mlflow/system_metrics/metrics/network_monitor.py +34 -0
  482. mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
  483. mlflow/system_metrics/system_metrics_monitor.py +198 -0
  484. mlflow/tracing/__init__.py +16 -0
  485. mlflow/tracing/assessment.py +356 -0
  486. mlflow/tracing/client.py +531 -0
  487. mlflow/tracing/config.py +125 -0
  488. mlflow/tracing/constant.py +105 -0
  489. mlflow/tracing/destination.py +81 -0
  490. mlflow/tracing/display/__init__.py +40 -0
  491. mlflow/tracing/display/display_handler.py +196 -0
  492. mlflow/tracing/export/async_export_queue.py +186 -0
  493. mlflow/tracing/export/inference_table.py +138 -0
  494. mlflow/tracing/export/mlflow_v3.py +137 -0
  495. mlflow/tracing/export/utils.py +70 -0
  496. mlflow/tracing/fluent.py +1417 -0
  497. mlflow/tracing/processor/base_mlflow.py +199 -0
  498. mlflow/tracing/processor/inference_table.py +175 -0
  499. mlflow/tracing/processor/mlflow_v3.py +47 -0
  500. mlflow/tracing/processor/otel.py +73 -0
  501. mlflow/tracing/provider.py +487 -0
  502. mlflow/tracing/trace_manager.py +200 -0
  503. mlflow/tracing/utils/__init__.py +616 -0
  504. mlflow/tracing/utils/artifact_utils.py +28 -0
  505. mlflow/tracing/utils/copy.py +55 -0
  506. mlflow/tracing/utils/environment.py +55 -0
  507. mlflow/tracing/utils/exception.py +21 -0
  508. mlflow/tracing/utils/once.py +35 -0
  509. mlflow/tracing/utils/otlp.py +63 -0
  510. mlflow/tracing/utils/processor.py +54 -0
  511. mlflow/tracing/utils/search.py +292 -0
  512. mlflow/tracing/utils/timeout.py +250 -0
  513. mlflow/tracing/utils/token.py +19 -0
  514. mlflow/tracing/utils/truncation.py +124 -0
  515. mlflow/tracing/utils/warning.py +76 -0
  516. mlflow/tracking/__init__.py +39 -0
  517. mlflow/tracking/_model_registry/__init__.py +1 -0
  518. mlflow/tracking/_model_registry/client.py +764 -0
  519. mlflow/tracking/_model_registry/fluent.py +853 -0
  520. mlflow/tracking/_model_registry/registry.py +67 -0
  521. mlflow/tracking/_model_registry/utils.py +251 -0
  522. mlflow/tracking/_tracking_service/__init__.py +0 -0
  523. mlflow/tracking/_tracking_service/client.py +883 -0
  524. mlflow/tracking/_tracking_service/registry.py +56 -0
  525. mlflow/tracking/_tracking_service/utils.py +275 -0
  526. mlflow/tracking/artifact_utils.py +179 -0
  527. mlflow/tracking/client.py +5900 -0
  528. mlflow/tracking/context/__init__.py +0 -0
  529. mlflow/tracking/context/abstract_context.py +35 -0
  530. mlflow/tracking/context/databricks_cluster_context.py +15 -0
  531. mlflow/tracking/context/databricks_command_context.py +15 -0
  532. mlflow/tracking/context/databricks_job_context.py +49 -0
  533. mlflow/tracking/context/databricks_notebook_context.py +41 -0
  534. mlflow/tracking/context/databricks_repo_context.py +43 -0
  535. mlflow/tracking/context/default_context.py +51 -0
  536. mlflow/tracking/context/git_context.py +32 -0
  537. mlflow/tracking/context/registry.py +98 -0
  538. mlflow/tracking/context/system_environment_context.py +15 -0
  539. mlflow/tracking/default_experiment/__init__.py +1 -0
  540. mlflow/tracking/default_experiment/abstract_context.py +43 -0
  541. mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
  542. mlflow/tracking/default_experiment/registry.py +75 -0
  543. mlflow/tracking/fluent.py +3595 -0
  544. mlflow/tracking/metric_value_conversion_utils.py +93 -0
  545. mlflow/tracking/multimedia.py +206 -0
  546. mlflow/tracking/registry.py +86 -0
  547. mlflow/tracking/request_auth/__init__.py +0 -0
  548. mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
  549. mlflow/tracking/request_auth/registry.py +60 -0
  550. mlflow/tracking/request_header/__init__.py +0 -0
  551. mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
  552. mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
  553. mlflow/tracking/request_header/default_request_header_provider.py +17 -0
  554. mlflow/tracking/request_header/registry.py +79 -0
  555. mlflow/transformers/__init__.py +2982 -0
  556. mlflow/transformers/flavor_config.py +258 -0
  557. mlflow/transformers/hub_utils.py +83 -0
  558. mlflow/transformers/llm_inference_utils.py +468 -0
  559. mlflow/transformers/model_io.py +301 -0
  560. mlflow/transformers/peft.py +51 -0
  561. mlflow/transformers/signature.py +183 -0
  562. mlflow/transformers/torch_utils.py +55 -0
  563. mlflow/types/__init__.py +21 -0
  564. mlflow/types/agent.py +270 -0
  565. mlflow/types/chat.py +240 -0
  566. mlflow/types/llm.py +935 -0
  567. mlflow/types/responses.py +139 -0
  568. mlflow/types/responses_helpers.py +416 -0
  569. mlflow/types/schema.py +1505 -0
  570. mlflow/types/type_hints.py +647 -0
  571. mlflow/types/utils.py +753 -0
  572. mlflow/utils/__init__.py +283 -0
  573. mlflow/utils/_capture_modules.py +256 -0
  574. mlflow/utils/_capture_transformers_modules.py +75 -0
  575. mlflow/utils/_spark_utils.py +201 -0
  576. mlflow/utils/_unity_catalog_oss_utils.py +97 -0
  577. mlflow/utils/_unity_catalog_utils.py +479 -0
  578. mlflow/utils/annotations.py +218 -0
  579. mlflow/utils/arguments_utils.py +16 -0
  580. mlflow/utils/async_logging/__init__.py +1 -0
  581. mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
  582. mlflow/utils/async_logging/async_logging_queue.py +366 -0
  583. mlflow/utils/async_logging/run_artifact.py +38 -0
  584. mlflow/utils/async_logging/run_batch.py +58 -0
  585. mlflow/utils/async_logging/run_operations.py +49 -0
  586. mlflow/utils/autologging_utils/__init__.py +737 -0
  587. mlflow/utils/autologging_utils/client.py +432 -0
  588. mlflow/utils/autologging_utils/config.py +33 -0
  589. mlflow/utils/autologging_utils/events.py +294 -0
  590. mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
  591. mlflow/utils/autologging_utils/metrics_queue.py +71 -0
  592. mlflow/utils/autologging_utils/safety.py +1104 -0
  593. mlflow/utils/autologging_utils/versioning.py +95 -0
  594. mlflow/utils/checkpoint_utils.py +206 -0
  595. mlflow/utils/class_utils.py +6 -0
  596. mlflow/utils/cli_args.py +257 -0
  597. mlflow/utils/conda.py +354 -0
  598. mlflow/utils/credentials.py +231 -0
  599. mlflow/utils/data_utils.py +17 -0
  600. mlflow/utils/databricks_utils.py +1436 -0
  601. mlflow/utils/docstring_utils.py +477 -0
  602. mlflow/utils/doctor.py +133 -0
  603. mlflow/utils/download_cloud_file_chunk.py +43 -0
  604. mlflow/utils/env_manager.py +16 -0
  605. mlflow/utils/env_pack.py +131 -0
  606. mlflow/utils/environment.py +1009 -0
  607. mlflow/utils/exception_utils.py +14 -0
  608. mlflow/utils/file_utils.py +978 -0
  609. mlflow/utils/git_utils.py +77 -0
  610. mlflow/utils/gorilla.py +797 -0
  611. mlflow/utils/import_hooks/__init__.py +363 -0
  612. mlflow/utils/lazy_load.py +51 -0
  613. mlflow/utils/logging_utils.py +168 -0
  614. mlflow/utils/mime_type_utils.py +58 -0
  615. mlflow/utils/mlflow_tags.py +103 -0
  616. mlflow/utils/model_utils.py +486 -0
  617. mlflow/utils/name_utils.py +346 -0
  618. mlflow/utils/nfs_on_spark.py +62 -0
  619. mlflow/utils/openai_utils.py +164 -0
  620. mlflow/utils/os.py +12 -0
  621. mlflow/utils/oss_registry_utils.py +29 -0
  622. mlflow/utils/plugins.py +17 -0
  623. mlflow/utils/process.py +182 -0
  624. mlflow/utils/promptlab_utils.py +146 -0
  625. mlflow/utils/proto_json_utils.py +743 -0
  626. mlflow/utils/pydantic_utils.py +54 -0
  627. mlflow/utils/request_utils.py +279 -0
  628. mlflow/utils/requirements_utils.py +704 -0
  629. mlflow/utils/rest_utils.py +673 -0
  630. mlflow/utils/search_logged_model_utils.py +127 -0
  631. mlflow/utils/search_utils.py +2111 -0
  632. mlflow/utils/secure_loading.py +221 -0
  633. mlflow/utils/security_validation.py +384 -0
  634. mlflow/utils/server_cli_utils.py +61 -0
  635. mlflow/utils/spark_utils.py +15 -0
  636. mlflow/utils/string_utils.py +138 -0
  637. mlflow/utils/thread_utils.py +63 -0
  638. mlflow/utils/time.py +54 -0
  639. mlflow/utils/timeout.py +42 -0
  640. mlflow/utils/uri.py +572 -0
  641. mlflow/utils/validation.py +662 -0
  642. mlflow/utils/virtualenv.py +458 -0
  643. mlflow/utils/warnings_utils.py +25 -0
  644. mlflow/utils/yaml_utils.py +179 -0
  645. mlflow/version.py +24 -0
@@ -0,0 +1,406 @@
1
+ import json
2
+ import logging
3
+ from functools import cached_property
4
+ from typing import TYPE_CHECKING, Any, Optional, Union
5
+
6
+ from packaging.version import Version
7
+
8
+ from mlflow.data.dataset import Dataset
9
+ from mlflow.data.dataset_source import DatasetSource
10
+ from mlflow.data.delta_dataset_source import DeltaDatasetSource
11
+ from mlflow.data.digest_utils import get_normalized_md5_digest
12
+ from mlflow.data.evaluation_dataset import EvaluationDataset
13
+ from mlflow.data.pyfunc_dataset_mixin import PyFuncConvertibleDatasetMixin, PyFuncInputsOutputs
14
+ from mlflow.data.spark_dataset_source import SparkDatasetSource
15
+ from mlflow.exceptions import MlflowException
16
+ from mlflow.protos.databricks_pb2 import INTERNAL_ERROR, INVALID_PARAMETER_VALUE
17
+ from mlflow.types import Schema
18
+ from mlflow.types.utils import _infer_schema
19
+
20
+ if TYPE_CHECKING:
21
+ import pyspark
22
+
23
+ _logger = logging.getLogger(__name__)
24
+
25
+
26
+ class SparkDataset(Dataset, PyFuncConvertibleDatasetMixin):
27
+ """
28
+ Represents a Spark dataset (e.g. data derived from a Spark Table / file directory or Delta
29
+ Table) for use with MLflow Tracking.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ df: "pyspark.sql.DataFrame",
35
+ source: DatasetSource,
36
+ targets: Optional[str] = None,
37
+ name: Optional[str] = None,
38
+ digest: Optional[str] = None,
39
+ predictions: Optional[str] = None,
40
+ ):
41
+ if targets is not None and targets not in df.columns:
42
+ raise MlflowException(
43
+ f"The specified Spark dataset does not contain the specified targets column"
44
+ f" '{targets}'.",
45
+ INVALID_PARAMETER_VALUE,
46
+ )
47
+ if predictions is not None and predictions not in df.columns:
48
+ raise MlflowException(
49
+ f"The specified Spark dataset does not contain the specified predictions column"
50
+ f" '{predictions}'.",
51
+ INVALID_PARAMETER_VALUE,
52
+ )
53
+
54
+ self._df = df
55
+ self._targets = targets
56
+ self._predictions = predictions
57
+ super().__init__(source=source, name=name, digest=digest)
58
+
59
+ def _compute_digest(self) -> str:
60
+ """
61
+ Computes a digest for the dataset. Called if the user doesn't supply
62
+ a digest when constructing the dataset.
63
+ """
64
+ # Retrieve a semantic hash of the DataFrame's logical plan, which is much more efficient
65
+ # and deterministic than hashing DataFrame records
66
+ import numpy as np
67
+ import pyspark
68
+
69
+ # Spark 3.1.0+ has a semanticHash() method on DataFrame
70
+ if Version(pyspark.__version__) >= Version("3.1.0"):
71
+ semantic_hash = self._df.semanticHash()
72
+ else:
73
+ semantic_hash = self._df._jdf.queryExecution().analyzed().semanticHash()
74
+ return get_normalized_md5_digest([np.int64(semantic_hash)])
75
+
76
+ def to_dict(self) -> dict[str, str]:
77
+ """Create config dictionary for the dataset.
78
+
79
+ Returns a string dictionary containing the following fields: name, digest, source, source
80
+ type, schema, and profile.
81
+ """
82
+ schema = json.dumps({"mlflow_colspec": self.schema.to_dict()}) if self.schema else None
83
+ config = super().to_dict()
84
+ config.update(
85
+ {
86
+ "schema": schema,
87
+ "profile": json.dumps(self.profile),
88
+ }
89
+ )
90
+ return config
91
+
92
+ @property
93
+ def df(self):
94
+ """The Spark DataFrame instance.
95
+
96
+ Returns:
97
+ The Spark DataFrame instance.
98
+
99
+ """
100
+ return self._df
101
+
102
+ @property
103
+ def targets(self) -> Optional[str]:
104
+ """The name of the Spark DataFrame column containing targets (labels) for supervised
105
+ learning.
106
+
107
+ Returns:
108
+ The string name of the Spark DataFrame column containing targets.
109
+ """
110
+ return self._targets
111
+
112
+ @property
113
+ def predictions(self) -> Optional[str]:
114
+ """
115
+ The name of the predictions column. May be ``None`` if no predictions column
116
+ was specified when the dataset was created.
117
+ """
118
+ return self._predictions
119
+
120
+ @property
121
+ def source(self) -> Union[SparkDatasetSource, DeltaDatasetSource]:
122
+ """
123
+ Spark dataset source information.
124
+
125
+ Returns:
126
+ An instance of
127
+ :py:class:`SparkDatasetSource <mlflow.data.spark_dataset_source.SparkDatasetSource>` or
128
+ :py:class:`DeltaDatasetSource <mlflow.data.delta_dataset_source.DeltaDatasetSource>`.
129
+ """
130
+ return self._source
131
+
132
+ @property
133
+ def profile(self) -> Optional[Any]:
134
+ """
135
+ A profile of the dataset. May be None if no profile is available.
136
+ """
137
+ try:
138
+ from pyspark.rdd import BoundedFloat
139
+
140
+ # Use Spark RDD countApprox to get approximate count since count() may be expensive.
141
+ # Note that we call the Scala RDD API because the PySpark API does not respect the
142
+ # specified timeout. Reference code:
143
+ # https://spark.apache.org/docs/3.4.0/api/python/_modules/pyspark/rdd.html
144
+ # #RDD.countApprox. This is confirmed to work in all Spark 3.x versions
145
+ py_rdd = self.df.rdd
146
+ drdd = py_rdd.mapPartitions(lambda it: [float(sum(1 for i in it))])
147
+ jrdd = drdd.mapPartitions(lambda it: [float(sum(it))])._to_java_object_rdd()
148
+ jdrdd = drdd.ctx._jvm.JavaDoubleRDD.fromRDD(jrdd.rdd())
149
+ timeout_millis = 5000
150
+ confidence = 0.9
151
+ approx_count_operation = jdrdd.sumApprox(timeout_millis, confidence)
152
+ approx_count_result = approx_count_operation.initialValue()
153
+ approx_count_float = BoundedFloat(
154
+ mean=approx_count_result.mean(),
155
+ confidence=approx_count_result.confidence(),
156
+ low=approx_count_result.low(),
157
+ high=approx_count_result.high(),
158
+ )
159
+ approx_count = int(approx_count_float)
160
+ if approx_count <= 0:
161
+ # An approximate count of zero likely indicates that the count timed
162
+ # out before an estimate could be made. In this case, we use the value
163
+ # "unknown" so that users don't think the dataset is empty
164
+ approx_count = "unknown"
165
+
166
+ return {
167
+ "approx_count": approx_count,
168
+ }
169
+ except Exception as e:
170
+ _logger.warning(
171
+ "Encountered an unexpected exception while computing Spark dataset profile."
172
+ " Exception: %s",
173
+ e,
174
+ )
175
+
176
+ @cached_property
177
+ def schema(self) -> Optional[Schema]:
178
+ """
179
+ The MLflow ColSpec schema of the Spark dataset.
180
+ """
181
+ try:
182
+ return _infer_schema(self._df)
183
+ except Exception as e:
184
+ _logger.warning("Failed to infer schema for Spark dataset. Exception: %s", e)
185
+ return None
186
+
187
+ def to_pyfunc(self) -> PyFuncInputsOutputs:
188
+ """
189
+ Converts the Spark DataFrame to pandas and splits the resulting
190
+ :py:class:`pandas.DataFrame` into: 1. a :py:class:`pandas.DataFrame` of features and
191
+ 2. a :py:class:`pandas.Series` of targets.
192
+
193
+ To avoid overuse of driver memory, only the first 10,000 DataFrame rows are selected.
194
+ """
195
+ df = self._df.limit(10000).toPandas()
196
+ if self._targets is not None:
197
+ if self._targets not in df.columns:
198
+ raise MlflowException(
199
+ f"Failed to convert Spark dataset to pyfunc inputs and outputs because"
200
+ f" the pandas representation of the Spark dataset does not contain the"
201
+ f" specified targets column '{self._targets}'.",
202
+ # This is an internal error because we should have validated the presence of
203
+ # the target column in the Hugging Face dataset at construction time
204
+ INTERNAL_ERROR,
205
+ )
206
+ inputs = df.drop(columns=self._targets)
207
+ outputs = df[self._targets]
208
+ return PyFuncInputsOutputs(inputs=inputs, outputs=outputs)
209
+ else:
210
+ return PyFuncInputsOutputs(inputs=df, outputs=None)
211
+
212
+ def to_evaluation_dataset(self, path=None, feature_names=None) -> EvaluationDataset:
213
+ """
214
+ Converts the dataset to an EvaluationDataset for model evaluation. Required
215
+ for use with mlflow.evaluate().
216
+ """
217
+ return EvaluationDataset(
218
+ data=self._df.limit(10000).toPandas(),
219
+ targets=self._targets,
220
+ path=path,
221
+ feature_names=feature_names,
222
+ predictions=self._predictions,
223
+ name=self.name,
224
+ digest=self.digest,
225
+ )
226
+
227
+
228
+ def load_delta(
229
+ path: Optional[str] = None,
230
+ table_name: Optional[str] = None,
231
+ version: Optional[str] = None,
232
+ targets: Optional[str] = None,
233
+ name: Optional[str] = None,
234
+ digest: Optional[str] = None,
235
+ ) -> SparkDataset:
236
+ """
237
+ Loads a :py:class:`SparkDataset <mlflow.data.spark_dataset.SparkDataset>` from a Delta table
238
+ for use with MLflow Tracking.
239
+
240
+ Args:
241
+ path: The path to the Delta table. Either ``path`` or ``table_name`` must be specified.
242
+ table_name: The name of the Delta table. Either ``path`` or ``table_name`` must be
243
+ specified.
244
+ version: The Delta table version. If not specified, the version will be inferred.
245
+ targets: Optional. The name of the Delta table column containing targets (labels) for
246
+ supervised learning.
247
+ name: The name of the dataset. E.g. "wiki_train". If unspecified, a name is
248
+ automatically generated.
249
+ digest: The digest (hash, fingerprint) of the dataset. If unspecified, a digest
250
+ is automatically computed.
251
+
252
+ Returns:
253
+ An instance of :py:class:`SparkDataset <mlflow.data.spark_dataset.SparkDataset>`.
254
+ """
255
+ from mlflow.data.spark_delta_utils import (
256
+ _try_get_delta_table_latest_version_from_path,
257
+ _try_get_delta_table_latest_version_from_table_name,
258
+ )
259
+
260
+ if (path, table_name).count(None) != 1:
261
+ raise MlflowException(
262
+ "Must specify exactly one of `table_name` or `path`.",
263
+ INVALID_PARAMETER_VALUE,
264
+ )
265
+
266
+ if version is None:
267
+ if path is not None:
268
+ version = _try_get_delta_table_latest_version_from_path(path)
269
+ else:
270
+ version = _try_get_delta_table_latest_version_from_table_name(table_name)
271
+
272
+ if name is None and table_name is not None:
273
+ name = table_name + (f"@v{version}" if version is not None else "")
274
+
275
+ source = DeltaDatasetSource(path=path, delta_table_name=table_name, delta_table_version=version)
276
+ df = source.load()
277
+
278
+ return SparkDataset(
279
+ df=df,
280
+ source=source,
281
+ targets=targets,
282
+ name=name,
283
+ digest=digest,
284
+ )
285
+
286
+
287
+ def from_spark(
288
+ df: "pyspark.sql.DataFrame",
289
+ path: Optional[str] = None,
290
+ table_name: Optional[str] = None,
291
+ version: Optional[str] = None,
292
+ sql: Optional[str] = None,
293
+ targets: Optional[str] = None,
294
+ name: Optional[str] = None,
295
+ digest: Optional[str] = None,
296
+ predictions: Optional[str] = None,
297
+ ) -> SparkDataset:
298
+ """
299
+ Given a Spark DataFrame, constructs a
300
+ :py:class:`SparkDataset <mlflow.data.spark_dataset.SparkDataset>` object for use with
301
+ MLflow Tracking.
302
+
303
+ Args:
304
+ df: The Spark DataFrame from which to construct a SparkDataset.
305
+ path: The path of the Spark or Delta source that the DataFrame originally came from. Note
306
+ that the path does not have to match the DataFrame exactly, since the DataFrame may have
307
+ been modified by Spark operations. This is used to reload the dataset upon request via
308
+ :py:func:`SparkDataset.source.load()
309
+ <mlflow.data.spark_dataset_source.SparkDatasetSource.load>`. If none of ``path``,
310
+ ``table_name``, or ``sql`` are specified, a CodeDatasetSource is used, which will source
311
+ information from the run context.
312
+ table_name: The name of the Spark or Delta table that the DataFrame originally came from.
313
+ Note that the table does not have to match the DataFrame exactly, since the DataFrame
314
+ may have been modified by Spark operations. This is used to reload the dataset upon
315
+ request via :py:func:`SparkDataset.source.load()
316
+ <mlflow.data.spark_dataset_source.SparkDatasetSource.load>`. If none of ``path``,
317
+ ``table_name``, or ``sql`` are specified, a CodeDatasetSource is used, which will source
318
+ information from the run context.
319
+ version: If the DataFrame originally came from a Delta table, specifies the version of the
320
+ Delta table. This is used to reload the dataset upon request via
321
+ :py:func:`SparkDataset.source.load()
322
+ <mlflow.data.spark_dataset_source.SparkDatasetSource.load>`. ``version`` cannot be
323
+ specified if ``sql`` is specified.
324
+ sql: The Spark SQL statement that was originally used to construct the DataFrame. Note that
325
+ the Spark SQL statement does not have to match the DataFrame exactly, since the
326
+ DataFrame may have been modified by Spark operations. This is used to reload the dataset
327
+ upon request via :py:func:`SparkDataset.source.load()
328
+ <mlflow.data.spark_dataset_source.SparkDatasetSource.load>`. If none of ``path``,
329
+ ``table_name``, or ``sql`` are specified, a CodeDatasetSource is used, which will source
330
+ information from the run context.
331
+ targets: Optional. The name of the Data Frame column containing targets (labels) for
332
+ supervised learning.
333
+ name: The name of the dataset. E.g. "wiki_train". If unspecified, a name is automatically
334
+ generated.
335
+ digest: The digest (hash, fingerprint) of the dataset. If unspecified, a digest is
336
+ automatically computed.
337
+ predictions: Optional. The name of the column containing model predictions,
338
+ if the dataset contains model predictions. If specified, this column
339
+ must be present in the dataframe (``df``).
340
+
341
+ Returns:
342
+ An instance of :py:class:`SparkDataset <mlflow.data.spark_dataset.SparkDataset>`.
343
+ """
344
+ from mlflow.data.code_dataset_source import CodeDatasetSource
345
+ from mlflow.data.spark_delta_utils import (
346
+ _is_delta_table,
347
+ _is_delta_table_path,
348
+ _try_get_delta_table_latest_version_from_path,
349
+ _try_get_delta_table_latest_version_from_table_name,
350
+ )
351
+ from mlflow.tracking.context import registry
352
+
353
+ if (path, table_name, sql).count(None) < 2:
354
+ raise MlflowException(
355
+ "Must specify at most one of `path`, `table_name`, or `sql`.",
356
+ INVALID_PARAMETER_VALUE,
357
+ )
358
+
359
+ if (sql, version).count(None) == 0:
360
+ raise MlflowException(
361
+ "`version` may not be specified when `sql` is specified. `version` may only be"
362
+ " specified when `table_name` or `path` is specified.",
363
+ INVALID_PARAMETER_VALUE,
364
+ )
365
+
366
+ if sql is not None:
367
+ source = SparkDatasetSource(sql=sql)
368
+ elif path is not None:
369
+ if _is_delta_table_path(path):
370
+ version = version or _try_get_delta_table_latest_version_from_path(path)
371
+ source = DeltaDatasetSource(path=path, delta_table_version=version)
372
+ elif version is None:
373
+ source = SparkDatasetSource(path=path)
374
+ else:
375
+ raise MlflowException(
376
+ f"Version '{version}' was specified, but the path '{path}' does not refer"
377
+ f" to a Delta table.",
378
+ INVALID_PARAMETER_VALUE,
379
+ )
380
+ elif table_name is not None:
381
+ if _is_delta_table(table_name):
382
+ version = version or _try_get_delta_table_latest_version_from_table_name(table_name)
383
+ source = DeltaDatasetSource(
384
+ delta_table_name=table_name,
385
+ delta_table_version=version,
386
+ )
387
+ elif version is None:
388
+ source = SparkDatasetSource(table_name=table_name)
389
+ else:
390
+ raise MlflowException(
391
+ f"Version '{version}' was specified, but could not find a Delta table with name"
392
+ f" '{table_name}'.",
393
+ INVALID_PARAMETER_VALUE,
394
+ )
395
+ else:
396
+ context_tags = registry.resolve_tags()
397
+ source = CodeDatasetSource(tags=context_tags)
398
+
399
+ return SparkDataset(
400
+ df=df,
401
+ source=source,
402
+ targets=targets,
403
+ name=name,
404
+ digest=digest,
405
+ predictions=predictions,
406
+ )
@@ -0,0 +1,74 @@
1
+ from typing import Any, Optional
2
+
3
+ from mlflow.data.dataset_source import DatasetSource
4
+ from mlflow.exceptions import MlflowException
5
+ from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
6
+
7
+
8
+ class SparkDatasetSource(DatasetSource):
9
+ """
10
+ Represents the source of a dataset stored in a spark table.
11
+ """
12
+
13
+ def __init__(
14
+ self,
15
+ path: Optional[str] = None,
16
+ table_name: Optional[str] = None,
17
+ sql: Optional[str] = None,
18
+ ):
19
+ if (path, table_name, sql).count(None) != 2:
20
+ raise MlflowException(
21
+ 'Must specify exactly one of "path", "table_name", or "sql"',
22
+ INVALID_PARAMETER_VALUE,
23
+ )
24
+ self._path = path
25
+ self._table_name = table_name
26
+ self._sql = sql
27
+
28
+ @staticmethod
29
+ def _get_source_type() -> str:
30
+ return "spark"
31
+
32
+ def load(self, **kwargs):
33
+ """Loads the dataset source as a Spark Dataset Source.
34
+
35
+ Returns:
36
+ An instance of ``pyspark.sql.DataFrame``.
37
+
38
+ """
39
+ from pyspark.sql import SparkSession
40
+
41
+ spark = SparkSession.builder.getOrCreate()
42
+
43
+ if self._path:
44
+ return spark.read.parquet(self._path)
45
+ if self._table_name:
46
+ return spark.read.table(self._table_name)
47
+ if self._sql:
48
+ return spark.sql(self._sql)
49
+
50
+ @staticmethod
51
+ def _can_resolve(raw_source: Any):
52
+ return False
53
+
54
+ @classmethod
55
+ def _resolve(cls, raw_source: str) -> "SparkDatasetSource":
56
+ raise NotImplementedError
57
+
58
+ def to_dict(self) -> dict[Any, Any]:
59
+ info = {}
60
+ if self._path is not None:
61
+ info["path"] = self._path
62
+ elif self._table_name is not None:
63
+ info["table_name"] = self._table_name
64
+ elif self._sql is not None:
65
+ info["sql"] = self._sql
66
+ return info
67
+
68
+ @classmethod
69
+ def from_dict(cls, source_dict: dict[Any, Any]) -> "SparkDatasetSource":
70
+ return cls(
71
+ path=source_dict.get("path"),
72
+ table_name=source_dict.get("table_name"),
73
+ sql=source_dict.get("sql"),
74
+ )
@@ -0,0 +1,118 @@
1
+ import logging
2
+ import os
3
+ from typing import Optional
4
+
5
+ from mlflow.utils.string_utils import _backtick_quote
6
+
7
+ _logger = logging.getLogger(__name__)
8
+
9
+
10
+ def _is_delta_table(table_name: str) -> bool:
11
+ """Checks if a Delta table exists with the specified table name.
12
+
13
+ Returns:
14
+ True if a Delta table exists with the specified table name. False otherwise.
15
+
16
+ """
17
+ from pyspark.sql import SparkSession
18
+ from pyspark.sql.utils import AnalysisException
19
+
20
+ spark = SparkSession.builder.getOrCreate()
21
+
22
+ try:
23
+ # use DESCRIBE DETAIL to check if the table is a Delta table
24
+ # https://docs.databricks.com/delta/delta-utility.html#describe-detail
25
+ # format will be `delta` for delta tables
26
+ spark.sql(f"DESCRIBE DETAIL {table_name}").filter("format = 'delta'").count()
27
+ return True
28
+ except AnalysisException:
29
+ return False
30
+
31
+
32
+ def _is_delta_table_path(path: str) -> bool:
33
+ """Checks if the specified filesystem path is a Delta table.
34
+
35
+ Returns:
36
+ True if the specified path is a Delta table. False otherwise.
37
+ """
38
+ if os.path.exists(path) and os.path.isdir(path) and "_delta_log" in os.listdir(path):
39
+ return True
40
+ from mlflow.utils.uri import dbfs_hdfs_uri_to_fuse_path
41
+
42
+ try:
43
+ dbfs_path = dbfs_hdfs_uri_to_fuse_path(path)
44
+ return os.path.exists(dbfs_path) and "_delta_log" in os.listdir(dbfs_path)
45
+ except Exception:
46
+ return False
47
+
48
+
49
+ def _try_get_delta_table_latest_version_from_path(path: str) -> Optional[int]:
50
+ """Gets the latest version of the Delta table located at the specified path.
51
+
52
+ Args:
53
+ path: The path to the Delta table.
54
+
55
+ Returns:
56
+ The version of the Delta table, or None if it cannot be resolved (e.g. because the
57
+ Delta core library is not installed or the specified path does not refer to a Delta
58
+ table).
59
+
60
+ """
61
+ from pyspark.sql import SparkSession
62
+
63
+ try:
64
+ spark = SparkSession.builder.getOrCreate()
65
+ j_delta_table = spark._jvm.io.delta.tables.DeltaTable.forPath(spark._jsparkSession, path)
66
+ return _get_delta_table_latest_version(j_delta_table)
67
+ except Exception as e:
68
+ _logger.warning(
69
+ "Failed to obtain version information for Delta table at path '%s'. Version information"
70
+ " may not be included in the dataset source for MLflow Tracking. Exception: %s",
71
+ path,
72
+ e,
73
+ )
74
+
75
+
76
+ def _try_get_delta_table_latest_version_from_table_name(table_name: str) -> Optional[int]:
77
+ """Gets the latest version of the Delta table with the specified name.
78
+
79
+ Args:
80
+ table_name: The name of the Delta table.
81
+
82
+ Returns:
83
+ The version of the Delta table, or None if it cannot be resolved (e.g. because the
84
+ Delta core library is not installed or no such table exists).
85
+ """
86
+ from pyspark.sql import SparkSession
87
+
88
+ try:
89
+ spark = SparkSession.builder.getOrCreate()
90
+ backticked_table_name = ".".join(map(_backtick_quote, table_name.split(".")))
91
+ j_delta_table = spark._jvm.io.delta.tables.DeltaTable.forName(
92
+ spark._jsparkSession, backticked_table_name
93
+ )
94
+ return _get_delta_table_latest_version(j_delta_table)
95
+ except Exception as e:
96
+ _logger.warning(
97
+ "Failed to obtain version information for Delta table with name '%s'. Version"
98
+ " information may not be included in the dataset source for MLflow Tracking."
99
+ " Exception: %s",
100
+ table_name,
101
+ e,
102
+ )
103
+
104
+
105
+ def _get_delta_table_latest_version(j_delta_table) -> int:
106
+ """Obtains the latest version of the specified Delta table Java class.
107
+
108
+ Args:
109
+ j_delta_table: A Java DeltaTable class instance.
110
+
111
+ Returns:
112
+ The version of the Delta table.
113
+
114
+ """
115
+ latest_commit_jdf = j_delta_table.history(1)
116
+ latest_commit_row = latest_commit_jdf.head()
117
+ version_field_idx = latest_commit_row.fieldIndex("version")
118
+ return latest_commit_row.get(version_field_idx)