genesis-flow 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (645) hide show
  1. genesis_flow-1.0.0.dist-info/METADATA +822 -0
  2. genesis_flow-1.0.0.dist-info/RECORD +645 -0
  3. genesis_flow-1.0.0.dist-info/WHEEL +5 -0
  4. genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
  5. genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
  6. genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
  7. mlflow/__init__.py +367 -0
  8. mlflow/__main__.py +3 -0
  9. mlflow/ag2/__init__.py +56 -0
  10. mlflow/ag2/ag2_logger.py +294 -0
  11. mlflow/anthropic/__init__.py +40 -0
  12. mlflow/anthropic/autolog.py +129 -0
  13. mlflow/anthropic/chat.py +144 -0
  14. mlflow/artifacts/__init__.py +268 -0
  15. mlflow/autogen/__init__.py +144 -0
  16. mlflow/autogen/chat.py +142 -0
  17. mlflow/azure/__init__.py +26 -0
  18. mlflow/azure/auth_handler.py +257 -0
  19. mlflow/azure/client.py +319 -0
  20. mlflow/azure/config.py +120 -0
  21. mlflow/azure/connection_factory.py +340 -0
  22. mlflow/azure/exceptions.py +27 -0
  23. mlflow/azure/stores.py +327 -0
  24. mlflow/azure/utils.py +183 -0
  25. mlflow/bedrock/__init__.py +45 -0
  26. mlflow/bedrock/_autolog.py +202 -0
  27. mlflow/bedrock/chat.py +122 -0
  28. mlflow/bedrock/stream.py +160 -0
  29. mlflow/bedrock/utils.py +43 -0
  30. mlflow/cli.py +707 -0
  31. mlflow/client.py +12 -0
  32. mlflow/config/__init__.py +56 -0
  33. mlflow/crewai/__init__.py +79 -0
  34. mlflow/crewai/autolog.py +253 -0
  35. mlflow/crewai/chat.py +29 -0
  36. mlflow/data/__init__.py +75 -0
  37. mlflow/data/artifact_dataset_sources.py +170 -0
  38. mlflow/data/code_dataset_source.py +40 -0
  39. mlflow/data/dataset.py +123 -0
  40. mlflow/data/dataset_registry.py +168 -0
  41. mlflow/data/dataset_source.py +110 -0
  42. mlflow/data/dataset_source_registry.py +219 -0
  43. mlflow/data/delta_dataset_source.py +167 -0
  44. mlflow/data/digest_utils.py +108 -0
  45. mlflow/data/evaluation_dataset.py +562 -0
  46. mlflow/data/filesystem_dataset_source.py +81 -0
  47. mlflow/data/http_dataset_source.py +145 -0
  48. mlflow/data/huggingface_dataset.py +258 -0
  49. mlflow/data/huggingface_dataset_source.py +118 -0
  50. mlflow/data/meta_dataset.py +104 -0
  51. mlflow/data/numpy_dataset.py +223 -0
  52. mlflow/data/pandas_dataset.py +231 -0
  53. mlflow/data/polars_dataset.py +352 -0
  54. mlflow/data/pyfunc_dataset_mixin.py +31 -0
  55. mlflow/data/schema.py +76 -0
  56. mlflow/data/sources.py +1 -0
  57. mlflow/data/spark_dataset.py +406 -0
  58. mlflow/data/spark_dataset_source.py +74 -0
  59. mlflow/data/spark_delta_utils.py +118 -0
  60. mlflow/data/tensorflow_dataset.py +350 -0
  61. mlflow/data/uc_volume_dataset_source.py +81 -0
  62. mlflow/db.py +27 -0
  63. mlflow/dspy/__init__.py +17 -0
  64. mlflow/dspy/autolog.py +197 -0
  65. mlflow/dspy/callback.py +398 -0
  66. mlflow/dspy/constant.py +1 -0
  67. mlflow/dspy/load.py +93 -0
  68. mlflow/dspy/save.py +393 -0
  69. mlflow/dspy/util.py +109 -0
  70. mlflow/dspy/wrapper.py +226 -0
  71. mlflow/entities/__init__.py +104 -0
  72. mlflow/entities/_mlflow_object.py +52 -0
  73. mlflow/entities/assessment.py +545 -0
  74. mlflow/entities/assessment_error.py +80 -0
  75. mlflow/entities/assessment_source.py +141 -0
  76. mlflow/entities/dataset.py +92 -0
  77. mlflow/entities/dataset_input.py +51 -0
  78. mlflow/entities/dataset_summary.py +62 -0
  79. mlflow/entities/document.py +48 -0
  80. mlflow/entities/experiment.py +109 -0
  81. mlflow/entities/experiment_tag.py +35 -0
  82. mlflow/entities/file_info.py +45 -0
  83. mlflow/entities/input_tag.py +35 -0
  84. mlflow/entities/lifecycle_stage.py +35 -0
  85. mlflow/entities/logged_model.py +228 -0
  86. mlflow/entities/logged_model_input.py +26 -0
  87. mlflow/entities/logged_model_output.py +32 -0
  88. mlflow/entities/logged_model_parameter.py +46 -0
  89. mlflow/entities/logged_model_status.py +74 -0
  90. mlflow/entities/logged_model_tag.py +33 -0
  91. mlflow/entities/metric.py +200 -0
  92. mlflow/entities/model_registry/__init__.py +29 -0
  93. mlflow/entities/model_registry/_model_registry_entity.py +13 -0
  94. mlflow/entities/model_registry/model_version.py +243 -0
  95. mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
  96. mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
  97. mlflow/entities/model_registry/model_version_search.py +25 -0
  98. mlflow/entities/model_registry/model_version_stages.py +25 -0
  99. mlflow/entities/model_registry/model_version_status.py +35 -0
  100. mlflow/entities/model_registry/model_version_tag.py +35 -0
  101. mlflow/entities/model_registry/prompt.py +73 -0
  102. mlflow/entities/model_registry/prompt_version.py +244 -0
  103. mlflow/entities/model_registry/registered_model.py +175 -0
  104. mlflow/entities/model_registry/registered_model_alias.py +35 -0
  105. mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
  106. mlflow/entities/model_registry/registered_model_search.py +25 -0
  107. mlflow/entities/model_registry/registered_model_tag.py +35 -0
  108. mlflow/entities/multipart_upload.py +74 -0
  109. mlflow/entities/param.py +49 -0
  110. mlflow/entities/run.py +97 -0
  111. mlflow/entities/run_data.py +84 -0
  112. mlflow/entities/run_info.py +188 -0
  113. mlflow/entities/run_inputs.py +59 -0
  114. mlflow/entities/run_outputs.py +43 -0
  115. mlflow/entities/run_status.py +41 -0
  116. mlflow/entities/run_tag.py +36 -0
  117. mlflow/entities/source_type.py +31 -0
  118. mlflow/entities/span.py +774 -0
  119. mlflow/entities/span_event.py +96 -0
  120. mlflow/entities/span_status.py +102 -0
  121. mlflow/entities/trace.py +317 -0
  122. mlflow/entities/trace_data.py +71 -0
  123. mlflow/entities/trace_info.py +220 -0
  124. mlflow/entities/trace_info_v2.py +162 -0
  125. mlflow/entities/trace_location.py +173 -0
  126. mlflow/entities/trace_state.py +39 -0
  127. mlflow/entities/trace_status.py +68 -0
  128. mlflow/entities/view_type.py +51 -0
  129. mlflow/environment_variables.py +866 -0
  130. mlflow/evaluation/__init__.py +16 -0
  131. mlflow/evaluation/assessment.py +369 -0
  132. mlflow/evaluation/evaluation.py +411 -0
  133. mlflow/evaluation/evaluation_tag.py +61 -0
  134. mlflow/evaluation/fluent.py +48 -0
  135. mlflow/evaluation/utils.py +201 -0
  136. mlflow/exceptions.py +213 -0
  137. mlflow/experiments.py +140 -0
  138. mlflow/gemini/__init__.py +81 -0
  139. mlflow/gemini/autolog.py +186 -0
  140. mlflow/gemini/chat.py +261 -0
  141. mlflow/genai/__init__.py +71 -0
  142. mlflow/genai/datasets/__init__.py +67 -0
  143. mlflow/genai/datasets/evaluation_dataset.py +131 -0
  144. mlflow/genai/evaluation/__init__.py +3 -0
  145. mlflow/genai/evaluation/base.py +411 -0
  146. mlflow/genai/evaluation/constant.py +23 -0
  147. mlflow/genai/evaluation/utils.py +244 -0
  148. mlflow/genai/judges/__init__.py +21 -0
  149. mlflow/genai/judges/databricks.py +404 -0
  150. mlflow/genai/label_schemas/__init__.py +153 -0
  151. mlflow/genai/label_schemas/label_schemas.py +209 -0
  152. mlflow/genai/labeling/__init__.py +159 -0
  153. mlflow/genai/labeling/labeling.py +250 -0
  154. mlflow/genai/optimize/__init__.py +13 -0
  155. mlflow/genai/optimize/base.py +198 -0
  156. mlflow/genai/optimize/optimizers/__init__.py +4 -0
  157. mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
  158. mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
  159. mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
  160. mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
  161. mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
  162. mlflow/genai/optimize/types.py +75 -0
  163. mlflow/genai/optimize/util.py +30 -0
  164. mlflow/genai/prompts/__init__.py +206 -0
  165. mlflow/genai/scheduled_scorers.py +431 -0
  166. mlflow/genai/scorers/__init__.py +26 -0
  167. mlflow/genai/scorers/base.py +492 -0
  168. mlflow/genai/scorers/builtin_scorers.py +765 -0
  169. mlflow/genai/scorers/scorer_utils.py +138 -0
  170. mlflow/genai/scorers/validation.py +165 -0
  171. mlflow/genai/utils/data_validation.py +146 -0
  172. mlflow/genai/utils/enum_utils.py +23 -0
  173. mlflow/genai/utils/trace_utils.py +211 -0
  174. mlflow/groq/__init__.py +42 -0
  175. mlflow/groq/_groq_autolog.py +74 -0
  176. mlflow/johnsnowlabs/__init__.py +888 -0
  177. mlflow/langchain/__init__.py +24 -0
  178. mlflow/langchain/api_request_parallel_processor.py +330 -0
  179. mlflow/langchain/autolog.py +147 -0
  180. mlflow/langchain/chat_agent_langgraph.py +340 -0
  181. mlflow/langchain/constant.py +1 -0
  182. mlflow/langchain/constants.py +1 -0
  183. mlflow/langchain/databricks_dependencies.py +444 -0
  184. mlflow/langchain/langchain_tracer.py +597 -0
  185. mlflow/langchain/model.py +919 -0
  186. mlflow/langchain/output_parsers.py +142 -0
  187. mlflow/langchain/retriever_chain.py +153 -0
  188. mlflow/langchain/runnables.py +527 -0
  189. mlflow/langchain/utils/chat.py +402 -0
  190. mlflow/langchain/utils/logging.py +671 -0
  191. mlflow/langchain/utils/serialization.py +36 -0
  192. mlflow/legacy_databricks_cli/__init__.py +0 -0
  193. mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
  194. mlflow/legacy_databricks_cli/configure/provider.py +482 -0
  195. mlflow/litellm/__init__.py +175 -0
  196. mlflow/llama_index/__init__.py +22 -0
  197. mlflow/llama_index/autolog.py +55 -0
  198. mlflow/llama_index/chat.py +43 -0
  199. mlflow/llama_index/constant.py +1 -0
  200. mlflow/llama_index/model.py +577 -0
  201. mlflow/llama_index/pyfunc_wrapper.py +332 -0
  202. mlflow/llama_index/serialize_objects.py +188 -0
  203. mlflow/llama_index/tracer.py +561 -0
  204. mlflow/metrics/__init__.py +479 -0
  205. mlflow/metrics/base.py +39 -0
  206. mlflow/metrics/genai/__init__.py +25 -0
  207. mlflow/metrics/genai/base.py +101 -0
  208. mlflow/metrics/genai/genai_metric.py +771 -0
  209. mlflow/metrics/genai/metric_definitions.py +450 -0
  210. mlflow/metrics/genai/model_utils.py +371 -0
  211. mlflow/metrics/genai/prompt_template.py +68 -0
  212. mlflow/metrics/genai/prompts/__init__.py +0 -0
  213. mlflow/metrics/genai/prompts/v1.py +422 -0
  214. mlflow/metrics/genai/utils.py +6 -0
  215. mlflow/metrics/metric_definitions.py +619 -0
  216. mlflow/mismatch.py +34 -0
  217. mlflow/mistral/__init__.py +34 -0
  218. mlflow/mistral/autolog.py +71 -0
  219. mlflow/mistral/chat.py +135 -0
  220. mlflow/ml_package_versions.py +452 -0
  221. mlflow/models/__init__.py +97 -0
  222. mlflow/models/auth_policy.py +83 -0
  223. mlflow/models/cli.py +354 -0
  224. mlflow/models/container/__init__.py +294 -0
  225. mlflow/models/container/scoring_server/__init__.py +0 -0
  226. mlflow/models/container/scoring_server/nginx.conf +39 -0
  227. mlflow/models/dependencies_schemas.py +287 -0
  228. mlflow/models/display_utils.py +158 -0
  229. mlflow/models/docker_utils.py +211 -0
  230. mlflow/models/evaluation/__init__.py +23 -0
  231. mlflow/models/evaluation/_shap_patch.py +64 -0
  232. mlflow/models/evaluation/artifacts.py +194 -0
  233. mlflow/models/evaluation/base.py +1811 -0
  234. mlflow/models/evaluation/calibration_curve.py +109 -0
  235. mlflow/models/evaluation/default_evaluator.py +996 -0
  236. mlflow/models/evaluation/deprecated.py +23 -0
  237. mlflow/models/evaluation/evaluator_registry.py +80 -0
  238. mlflow/models/evaluation/evaluators/classifier.py +704 -0
  239. mlflow/models/evaluation/evaluators/default.py +233 -0
  240. mlflow/models/evaluation/evaluators/regressor.py +96 -0
  241. mlflow/models/evaluation/evaluators/shap.py +296 -0
  242. mlflow/models/evaluation/lift_curve.py +178 -0
  243. mlflow/models/evaluation/utils/metric.py +123 -0
  244. mlflow/models/evaluation/utils/trace.py +179 -0
  245. mlflow/models/evaluation/validation.py +434 -0
  246. mlflow/models/flavor_backend.py +93 -0
  247. mlflow/models/flavor_backend_registry.py +53 -0
  248. mlflow/models/model.py +1639 -0
  249. mlflow/models/model_config.py +150 -0
  250. mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
  251. mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
  252. mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
  253. mlflow/models/python_api.py +369 -0
  254. mlflow/models/rag_signatures.py +128 -0
  255. mlflow/models/resources.py +321 -0
  256. mlflow/models/signature.py +662 -0
  257. mlflow/models/utils.py +2054 -0
  258. mlflow/models/wheeled_model.py +280 -0
  259. mlflow/openai/__init__.py +57 -0
  260. mlflow/openai/_agent_tracer.py +364 -0
  261. mlflow/openai/api_request_parallel_processor.py +131 -0
  262. mlflow/openai/autolog.py +509 -0
  263. mlflow/openai/constant.py +1 -0
  264. mlflow/openai/model.py +824 -0
  265. mlflow/openai/utils/chat_schema.py +367 -0
  266. mlflow/optuna/__init__.py +3 -0
  267. mlflow/optuna/storage.py +646 -0
  268. mlflow/plugins/__init__.py +72 -0
  269. mlflow/plugins/base.py +358 -0
  270. mlflow/plugins/builtin/__init__.py +24 -0
  271. mlflow/plugins/builtin/pytorch_plugin.py +150 -0
  272. mlflow/plugins/builtin/sklearn_plugin.py +158 -0
  273. mlflow/plugins/builtin/transformers_plugin.py +187 -0
  274. mlflow/plugins/cli.py +321 -0
  275. mlflow/plugins/discovery.py +340 -0
  276. mlflow/plugins/manager.py +465 -0
  277. mlflow/plugins/registry.py +316 -0
  278. mlflow/plugins/templates/framework_plugin_template.py +329 -0
  279. mlflow/prompt/constants.py +20 -0
  280. mlflow/prompt/promptlab_model.py +197 -0
  281. mlflow/prompt/registry_utils.py +248 -0
  282. mlflow/promptflow/__init__.py +495 -0
  283. mlflow/protos/__init__.py +0 -0
  284. mlflow/protos/assessments_pb2.py +174 -0
  285. mlflow/protos/databricks_artifacts_pb2.py +489 -0
  286. mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
  287. mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
  288. mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
  289. mlflow/protos/databricks_pb2.py +267 -0
  290. mlflow/protos/databricks_trace_server_pb2.py +374 -0
  291. mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
  292. mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
  293. mlflow/protos/facet_feature_statistics_pb2.py +296 -0
  294. mlflow/protos/internal_pb2.py +77 -0
  295. mlflow/protos/mlflow_artifacts_pb2.py +336 -0
  296. mlflow/protos/model_registry_pb2.py +1073 -0
  297. mlflow/protos/scalapb/__init__.py +0 -0
  298. mlflow/protos/scalapb/scalapb_pb2.py +104 -0
  299. mlflow/protos/service_pb2.py +2600 -0
  300. mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
  301. mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
  302. mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
  303. mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
  304. mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
  305. mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
  306. mlflow/py.typed +0 -0
  307. mlflow/pydantic_ai/__init__.py +57 -0
  308. mlflow/pydantic_ai/autolog.py +173 -0
  309. mlflow/pyfunc/__init__.py +3844 -0
  310. mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
  311. mlflow/pyfunc/backend.py +523 -0
  312. mlflow/pyfunc/context.py +78 -0
  313. mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
  314. mlflow/pyfunc/loaders/__init__.py +7 -0
  315. mlflow/pyfunc/loaders/chat_agent.py +117 -0
  316. mlflow/pyfunc/loaders/chat_model.py +125 -0
  317. mlflow/pyfunc/loaders/code_model.py +31 -0
  318. mlflow/pyfunc/loaders/responses_agent.py +112 -0
  319. mlflow/pyfunc/mlserver.py +46 -0
  320. mlflow/pyfunc/model.py +1473 -0
  321. mlflow/pyfunc/scoring_server/__init__.py +604 -0
  322. mlflow/pyfunc/scoring_server/app.py +7 -0
  323. mlflow/pyfunc/scoring_server/client.py +146 -0
  324. mlflow/pyfunc/spark_model_cache.py +48 -0
  325. mlflow/pyfunc/stdin_server.py +44 -0
  326. mlflow/pyfunc/utils/__init__.py +3 -0
  327. mlflow/pyfunc/utils/data_validation.py +224 -0
  328. mlflow/pyfunc/utils/environment.py +22 -0
  329. mlflow/pyfunc/utils/input_converter.py +47 -0
  330. mlflow/pyfunc/utils/serving_data_parser.py +11 -0
  331. mlflow/pytorch/__init__.py +1171 -0
  332. mlflow/pytorch/_lightning_autolog.py +580 -0
  333. mlflow/pytorch/_pytorch_autolog.py +50 -0
  334. mlflow/pytorch/pickle_module.py +35 -0
  335. mlflow/rfunc/__init__.py +42 -0
  336. mlflow/rfunc/backend.py +134 -0
  337. mlflow/runs.py +89 -0
  338. mlflow/server/__init__.py +302 -0
  339. mlflow/server/auth/__init__.py +1224 -0
  340. mlflow/server/auth/__main__.py +4 -0
  341. mlflow/server/auth/basic_auth.ini +6 -0
  342. mlflow/server/auth/cli.py +11 -0
  343. mlflow/server/auth/client.py +537 -0
  344. mlflow/server/auth/config.py +34 -0
  345. mlflow/server/auth/db/__init__.py +0 -0
  346. mlflow/server/auth/db/cli.py +18 -0
  347. mlflow/server/auth/db/migrations/__init__.py +0 -0
  348. mlflow/server/auth/db/migrations/alembic.ini +110 -0
  349. mlflow/server/auth/db/migrations/env.py +76 -0
  350. mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
  351. mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
  352. mlflow/server/auth/db/models.py +67 -0
  353. mlflow/server/auth/db/utils.py +37 -0
  354. mlflow/server/auth/entities.py +165 -0
  355. mlflow/server/auth/logo.py +14 -0
  356. mlflow/server/auth/permissions.py +65 -0
  357. mlflow/server/auth/routes.py +18 -0
  358. mlflow/server/auth/sqlalchemy_store.py +263 -0
  359. mlflow/server/graphql/__init__.py +0 -0
  360. mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
  361. mlflow/server/graphql/graphql_custom_scalars.py +24 -0
  362. mlflow/server/graphql/graphql_errors.py +15 -0
  363. mlflow/server/graphql/graphql_no_batching.py +89 -0
  364. mlflow/server/graphql/graphql_schema_extensions.py +74 -0
  365. mlflow/server/handlers.py +3217 -0
  366. mlflow/server/prometheus_exporter.py +17 -0
  367. mlflow/server/validation.py +30 -0
  368. mlflow/shap/__init__.py +691 -0
  369. mlflow/sklearn/__init__.py +1994 -0
  370. mlflow/sklearn/utils.py +1041 -0
  371. mlflow/smolagents/__init__.py +66 -0
  372. mlflow/smolagents/autolog.py +139 -0
  373. mlflow/smolagents/chat.py +29 -0
  374. mlflow/store/__init__.py +10 -0
  375. mlflow/store/_unity_catalog/__init__.py +1 -0
  376. mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
  377. mlflow/store/_unity_catalog/lineage/constants.py +2 -0
  378. mlflow/store/_unity_catalog/registry/__init__.py +6 -0
  379. mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
  380. mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
  381. mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
  382. mlflow/store/_unity_catalog/registry/utils.py +121 -0
  383. mlflow/store/artifact/__init__.py +0 -0
  384. mlflow/store/artifact/artifact_repo.py +472 -0
  385. mlflow/store/artifact/artifact_repository_registry.py +154 -0
  386. mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
  387. mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
  388. mlflow/store/artifact/cli.py +141 -0
  389. mlflow/store/artifact/cloud_artifact_repo.py +332 -0
  390. mlflow/store/artifact/databricks_artifact_repo.py +729 -0
  391. mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
  392. mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
  393. mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
  394. mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
  395. mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
  396. mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
  397. mlflow/store/artifact/ftp_artifact_repo.py +132 -0
  398. mlflow/store/artifact/gcs_artifact_repo.py +296 -0
  399. mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
  400. mlflow/store/artifact/http_artifact_repo.py +218 -0
  401. mlflow/store/artifact/local_artifact_repo.py +142 -0
  402. mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
  403. mlflow/store/artifact/models_artifact_repo.py +259 -0
  404. mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
  405. mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
  406. mlflow/store/artifact/r2_artifact_repo.py +70 -0
  407. mlflow/store/artifact/runs_artifact_repo.py +265 -0
  408. mlflow/store/artifact/s3_artifact_repo.py +330 -0
  409. mlflow/store/artifact/sftp_artifact_repo.py +141 -0
  410. mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
  411. mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
  412. mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
  413. mlflow/store/artifact/utils/__init__.py +0 -0
  414. mlflow/store/artifact/utils/models.py +148 -0
  415. mlflow/store/db/__init__.py +0 -0
  416. mlflow/store/db/base_sql_model.py +3 -0
  417. mlflow/store/db/db_types.py +10 -0
  418. mlflow/store/db/utils.py +314 -0
  419. mlflow/store/db_migrations/__init__.py +0 -0
  420. mlflow/store/db_migrations/alembic.ini +74 -0
  421. mlflow/store/db_migrations/env.py +84 -0
  422. mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
  423. mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
  424. mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
  425. mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
  426. mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
  427. mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
  428. mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
  429. mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
  430. mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
  431. mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
  432. mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
  433. mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
  434. mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
  435. mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
  436. mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
  437. mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
  438. mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
  439. mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
  440. mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
  441. mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
  442. mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
  443. mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
  444. mlflow/store/db_migrations/versions/__init__.py +0 -0
  445. mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
  446. mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
  447. mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
  448. mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
  449. mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
  450. mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
  451. mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
  452. mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
  453. mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
  454. mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
  455. mlflow/store/entities/__init__.py +3 -0
  456. mlflow/store/entities/paged_list.py +18 -0
  457. mlflow/store/model_registry/__init__.py +10 -0
  458. mlflow/store/model_registry/abstract_store.py +1081 -0
  459. mlflow/store/model_registry/base_rest_store.py +44 -0
  460. mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
  461. mlflow/store/model_registry/dbmodels/__init__.py +0 -0
  462. mlflow/store/model_registry/dbmodels/models.py +206 -0
  463. mlflow/store/model_registry/file_store.py +1091 -0
  464. mlflow/store/model_registry/rest_store.py +481 -0
  465. mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
  466. mlflow/store/tracking/__init__.py +23 -0
  467. mlflow/store/tracking/abstract_store.py +816 -0
  468. mlflow/store/tracking/dbmodels/__init__.py +0 -0
  469. mlflow/store/tracking/dbmodels/initial_models.py +243 -0
  470. mlflow/store/tracking/dbmodels/models.py +1073 -0
  471. mlflow/store/tracking/file_store.py +2438 -0
  472. mlflow/store/tracking/postgres_managed_identity.py +146 -0
  473. mlflow/store/tracking/rest_store.py +1131 -0
  474. mlflow/store/tracking/sqlalchemy_store.py +2785 -0
  475. mlflow/system_metrics/__init__.py +61 -0
  476. mlflow/system_metrics/metrics/__init__.py +0 -0
  477. mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
  478. mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
  479. mlflow/system_metrics/metrics/disk_monitor.py +21 -0
  480. mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
  481. mlflow/system_metrics/metrics/network_monitor.py +34 -0
  482. mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
  483. mlflow/system_metrics/system_metrics_monitor.py +198 -0
  484. mlflow/tracing/__init__.py +16 -0
  485. mlflow/tracing/assessment.py +356 -0
  486. mlflow/tracing/client.py +531 -0
  487. mlflow/tracing/config.py +125 -0
  488. mlflow/tracing/constant.py +105 -0
  489. mlflow/tracing/destination.py +81 -0
  490. mlflow/tracing/display/__init__.py +40 -0
  491. mlflow/tracing/display/display_handler.py +196 -0
  492. mlflow/tracing/export/async_export_queue.py +186 -0
  493. mlflow/tracing/export/inference_table.py +138 -0
  494. mlflow/tracing/export/mlflow_v3.py +137 -0
  495. mlflow/tracing/export/utils.py +70 -0
  496. mlflow/tracing/fluent.py +1417 -0
  497. mlflow/tracing/processor/base_mlflow.py +199 -0
  498. mlflow/tracing/processor/inference_table.py +175 -0
  499. mlflow/tracing/processor/mlflow_v3.py +47 -0
  500. mlflow/tracing/processor/otel.py +73 -0
  501. mlflow/tracing/provider.py +487 -0
  502. mlflow/tracing/trace_manager.py +200 -0
  503. mlflow/tracing/utils/__init__.py +616 -0
  504. mlflow/tracing/utils/artifact_utils.py +28 -0
  505. mlflow/tracing/utils/copy.py +55 -0
  506. mlflow/tracing/utils/environment.py +55 -0
  507. mlflow/tracing/utils/exception.py +21 -0
  508. mlflow/tracing/utils/once.py +35 -0
  509. mlflow/tracing/utils/otlp.py +63 -0
  510. mlflow/tracing/utils/processor.py +54 -0
  511. mlflow/tracing/utils/search.py +292 -0
  512. mlflow/tracing/utils/timeout.py +250 -0
  513. mlflow/tracing/utils/token.py +19 -0
  514. mlflow/tracing/utils/truncation.py +124 -0
  515. mlflow/tracing/utils/warning.py +76 -0
  516. mlflow/tracking/__init__.py +39 -0
  517. mlflow/tracking/_model_registry/__init__.py +1 -0
  518. mlflow/tracking/_model_registry/client.py +764 -0
  519. mlflow/tracking/_model_registry/fluent.py +853 -0
  520. mlflow/tracking/_model_registry/registry.py +67 -0
  521. mlflow/tracking/_model_registry/utils.py +251 -0
  522. mlflow/tracking/_tracking_service/__init__.py +0 -0
  523. mlflow/tracking/_tracking_service/client.py +883 -0
  524. mlflow/tracking/_tracking_service/registry.py +56 -0
  525. mlflow/tracking/_tracking_service/utils.py +275 -0
  526. mlflow/tracking/artifact_utils.py +179 -0
  527. mlflow/tracking/client.py +5900 -0
  528. mlflow/tracking/context/__init__.py +0 -0
  529. mlflow/tracking/context/abstract_context.py +35 -0
  530. mlflow/tracking/context/databricks_cluster_context.py +15 -0
  531. mlflow/tracking/context/databricks_command_context.py +15 -0
  532. mlflow/tracking/context/databricks_job_context.py +49 -0
  533. mlflow/tracking/context/databricks_notebook_context.py +41 -0
  534. mlflow/tracking/context/databricks_repo_context.py +43 -0
  535. mlflow/tracking/context/default_context.py +51 -0
  536. mlflow/tracking/context/git_context.py +32 -0
  537. mlflow/tracking/context/registry.py +98 -0
  538. mlflow/tracking/context/system_environment_context.py +15 -0
  539. mlflow/tracking/default_experiment/__init__.py +1 -0
  540. mlflow/tracking/default_experiment/abstract_context.py +43 -0
  541. mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
  542. mlflow/tracking/default_experiment/registry.py +75 -0
  543. mlflow/tracking/fluent.py +3595 -0
  544. mlflow/tracking/metric_value_conversion_utils.py +93 -0
  545. mlflow/tracking/multimedia.py +206 -0
  546. mlflow/tracking/registry.py +86 -0
  547. mlflow/tracking/request_auth/__init__.py +0 -0
  548. mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
  549. mlflow/tracking/request_auth/registry.py +60 -0
  550. mlflow/tracking/request_header/__init__.py +0 -0
  551. mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
  552. mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
  553. mlflow/tracking/request_header/default_request_header_provider.py +17 -0
  554. mlflow/tracking/request_header/registry.py +79 -0
  555. mlflow/transformers/__init__.py +2982 -0
  556. mlflow/transformers/flavor_config.py +258 -0
  557. mlflow/transformers/hub_utils.py +83 -0
  558. mlflow/transformers/llm_inference_utils.py +468 -0
  559. mlflow/transformers/model_io.py +301 -0
  560. mlflow/transformers/peft.py +51 -0
  561. mlflow/transformers/signature.py +183 -0
  562. mlflow/transformers/torch_utils.py +55 -0
  563. mlflow/types/__init__.py +21 -0
  564. mlflow/types/agent.py +270 -0
  565. mlflow/types/chat.py +240 -0
  566. mlflow/types/llm.py +935 -0
  567. mlflow/types/responses.py +139 -0
  568. mlflow/types/responses_helpers.py +416 -0
  569. mlflow/types/schema.py +1505 -0
  570. mlflow/types/type_hints.py +647 -0
  571. mlflow/types/utils.py +753 -0
  572. mlflow/utils/__init__.py +283 -0
  573. mlflow/utils/_capture_modules.py +256 -0
  574. mlflow/utils/_capture_transformers_modules.py +75 -0
  575. mlflow/utils/_spark_utils.py +201 -0
  576. mlflow/utils/_unity_catalog_oss_utils.py +97 -0
  577. mlflow/utils/_unity_catalog_utils.py +479 -0
  578. mlflow/utils/annotations.py +218 -0
  579. mlflow/utils/arguments_utils.py +16 -0
  580. mlflow/utils/async_logging/__init__.py +1 -0
  581. mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
  582. mlflow/utils/async_logging/async_logging_queue.py +366 -0
  583. mlflow/utils/async_logging/run_artifact.py +38 -0
  584. mlflow/utils/async_logging/run_batch.py +58 -0
  585. mlflow/utils/async_logging/run_operations.py +49 -0
  586. mlflow/utils/autologging_utils/__init__.py +737 -0
  587. mlflow/utils/autologging_utils/client.py +432 -0
  588. mlflow/utils/autologging_utils/config.py +33 -0
  589. mlflow/utils/autologging_utils/events.py +294 -0
  590. mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
  591. mlflow/utils/autologging_utils/metrics_queue.py +71 -0
  592. mlflow/utils/autologging_utils/safety.py +1104 -0
  593. mlflow/utils/autologging_utils/versioning.py +95 -0
  594. mlflow/utils/checkpoint_utils.py +206 -0
  595. mlflow/utils/class_utils.py +6 -0
  596. mlflow/utils/cli_args.py +257 -0
  597. mlflow/utils/conda.py +354 -0
  598. mlflow/utils/credentials.py +231 -0
  599. mlflow/utils/data_utils.py +17 -0
  600. mlflow/utils/databricks_utils.py +1436 -0
  601. mlflow/utils/docstring_utils.py +477 -0
  602. mlflow/utils/doctor.py +133 -0
  603. mlflow/utils/download_cloud_file_chunk.py +43 -0
  604. mlflow/utils/env_manager.py +16 -0
  605. mlflow/utils/env_pack.py +131 -0
  606. mlflow/utils/environment.py +1009 -0
  607. mlflow/utils/exception_utils.py +14 -0
  608. mlflow/utils/file_utils.py +978 -0
  609. mlflow/utils/git_utils.py +77 -0
  610. mlflow/utils/gorilla.py +797 -0
  611. mlflow/utils/import_hooks/__init__.py +363 -0
  612. mlflow/utils/lazy_load.py +51 -0
  613. mlflow/utils/logging_utils.py +168 -0
  614. mlflow/utils/mime_type_utils.py +58 -0
  615. mlflow/utils/mlflow_tags.py +103 -0
  616. mlflow/utils/model_utils.py +486 -0
  617. mlflow/utils/name_utils.py +346 -0
  618. mlflow/utils/nfs_on_spark.py +62 -0
  619. mlflow/utils/openai_utils.py +164 -0
  620. mlflow/utils/os.py +12 -0
  621. mlflow/utils/oss_registry_utils.py +29 -0
  622. mlflow/utils/plugins.py +17 -0
  623. mlflow/utils/process.py +182 -0
  624. mlflow/utils/promptlab_utils.py +146 -0
  625. mlflow/utils/proto_json_utils.py +743 -0
  626. mlflow/utils/pydantic_utils.py +54 -0
  627. mlflow/utils/request_utils.py +279 -0
  628. mlflow/utils/requirements_utils.py +704 -0
  629. mlflow/utils/rest_utils.py +673 -0
  630. mlflow/utils/search_logged_model_utils.py +127 -0
  631. mlflow/utils/search_utils.py +2111 -0
  632. mlflow/utils/secure_loading.py +221 -0
  633. mlflow/utils/security_validation.py +384 -0
  634. mlflow/utils/server_cli_utils.py +61 -0
  635. mlflow/utils/spark_utils.py +15 -0
  636. mlflow/utils/string_utils.py +138 -0
  637. mlflow/utils/thread_utils.py +63 -0
  638. mlflow/utils/time.py +54 -0
  639. mlflow/utils/timeout.py +42 -0
  640. mlflow/utils/uri.py +572 -0
  641. mlflow/utils/validation.py +662 -0
  642. mlflow/utils/virtualenv.py +458 -0
  643. mlflow/utils/warnings_utils.py +25 -0
  644. mlflow/utils/yaml_utils.py +179 -0
  645. mlflow/version.py +24 -0
@@ -0,0 +1,134 @@
1
+ import importlib.metadata
2
+ import logging
3
+ import posixpath
4
+ from concurrent.futures import Future
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, Optional
7
+
8
+ from packaging.version import Version
9
+
10
+ from mlflow.entities import FileInfo
11
+ from mlflow.environment_variables import MLFLOW_MULTIPART_UPLOAD_CHUNK_SIZE
12
+ from mlflow.exceptions import MlflowException
13
+ from mlflow.store.artifact.artifact_repo import ArtifactRepository
14
+
15
+ if TYPE_CHECKING:
16
+ from databricks.sdk.service.files import FilesAPI
17
+
18
+
19
+ def _sdk_supports_large_file_uploads() -> bool:
20
+ # https://github.com/databricks/databricks-sdk-py/commit/7ca3fb7e8643126b74c9f5779dc01fb20c1741fb
21
+ return Version(importlib.metadata.version("databricks-sdk")) >= Version("0.45.0")
22
+
23
+
24
+ _logger = logging.getLogger(__name__)
25
+
26
+
27
+ # TODO: The following artifact repositories should use this class. Migrate them.
28
+ # - databricks_sdk_models_artifact_repo.py
29
+ class DatabricksSdkArtifactRepository(ArtifactRepository):
30
+ def __init__(self, artifact_uri: str, tracking_uri: Optional[str] = None) -> None:
31
+ from databricks.sdk import WorkspaceClient
32
+ from databricks.sdk.config import Config
33
+
34
+ super().__init__(artifact_uri, tracking_uri)
35
+ supports_large_file_uploads = _sdk_supports_large_file_uploads()
36
+ wc = WorkspaceClient(
37
+ config=(
38
+ Config(enable_experimental_files_api_client=True)
39
+ if supports_large_file_uploads
40
+ else None
41
+ )
42
+ )
43
+ if supports_large_file_uploads:
44
+ # `Config` has a `multipart_upload_min_stream_size` parameter but the constructor
45
+ # doesn't set it. This is a bug in databricks-sdk.
46
+ # >>> from databricks.sdk.config import Config
47
+ # >>> config = Config(multipart_upload_chunk_size=123)
48
+ # >>> assert config.multipart_upload_chunk_size != 123
49
+ try:
50
+ wc.files._config.multipart_upload_chunk_size = (
51
+ MLFLOW_MULTIPART_UPLOAD_CHUNK_SIZE.get()
52
+ )
53
+ except AttributeError:
54
+ _logger.debug("Failed to set multipart_upload_chunk_size in Config", exc_info=True)
55
+ self.wc = wc
56
+
57
+ @property
58
+ def files_api(self) -> "FilesAPI":
59
+ return self.wc.files
60
+
61
+ def _is_dir(self, path: str) -> bool:
62
+ from databricks.sdk.errors.platform import NotFound
63
+
64
+ try:
65
+ self.files_api.get_directory_metadata(path)
66
+ except NotFound:
67
+ return False
68
+ return True
69
+
70
+ def full_path(self, artifact_path: Optional[str]) -> str:
71
+ return f"{self.artifact_uri}/{artifact_path}" if artifact_path else self.artifact_uri
72
+
73
+ def log_artifact(self, local_file: str, artifact_path: Optional[str] = None) -> None:
74
+ if Path(local_file).stat().st_size > 5 * (1024**3) and not _sdk_supports_large_file_uploads:
75
+ raise MlflowException.invalid_parameter_value(
76
+ "Databricks SDK version < 0.41.0 does not support uploading files larger than 5GB. "
77
+ "Please upgrade the databricks-sdk package to version >= 0.41.0."
78
+ )
79
+
80
+ with open(local_file, "rb") as f:
81
+ name = Path(local_file).name
82
+ self.files_api.upload(
83
+ self.full_path(posixpath.join(artifact_path, name) if artifact_path else name),
84
+ f,
85
+ overwrite=True,
86
+ )
87
+
88
+ def log_artifacts(self, local_dir: str, artifact_path: Optional[str] = None) -> None:
89
+ local_dir = Path(local_dir).resolve()
90
+ futures: list[Future[None]] = []
91
+ with self._create_thread_pool() as executor:
92
+ for f in local_dir.rglob("*"):
93
+ if not f.is_file():
94
+ continue
95
+
96
+ paths: list[str] = []
97
+ if artifact_path:
98
+ paths.append(artifact_path)
99
+ if f.parent != local_dir:
100
+ paths.append(str(f.parent.relative_to(local_dir)))
101
+
102
+ fut = executor.submit(
103
+ self.log_artifact,
104
+ local_file=f,
105
+ artifact_path=posixpath.join(*paths) if paths else None,
106
+ )
107
+ futures.append(fut)
108
+
109
+ for fut in futures:
110
+ fut.result()
111
+
112
+ def list_artifacts(self, path: Optional[str] = None) -> list[FileInfo]:
113
+ dest_path = self.full_path(path)
114
+ if not self._is_dir(dest_path):
115
+ return []
116
+
117
+ file_infos: list[FileInfo] = []
118
+ for directory_entry in self.files_api.list_directory_contents(dest_path):
119
+ relative_path = posixpath.relpath(directory_entry.path, self.artifact_uri)
120
+ file_infos.append(
121
+ FileInfo(
122
+ path=relative_path,
123
+ is_dir=directory_entry.is_directory,
124
+ file_size=directory_entry.file_size,
125
+ )
126
+ )
127
+
128
+ return sorted(file_infos, key=lambda f: f.path)
129
+
130
+ def _download_file(self, remote_file_path: str, local_path: str) -> None:
131
+ download_resp = self.files_api.download(self.full_path(remote_file_path))
132
+ with open(local_path, "wb") as f:
133
+ while chunk := download_resp.contents.read(10 * 1024 * 1024):
134
+ f.write(chunk)
@@ -0,0 +1,97 @@
1
+ import posixpath
2
+ from typing import Optional
3
+
4
+ from mlflow.entities import FileInfo
5
+ from mlflow.environment_variables import (
6
+ MLFLOW_MULTIPART_DOWNLOAD_CHUNK_SIZE,
7
+ )
8
+ from mlflow.store.artifact.cloud_artifact_repo import CloudArtifactRepository
9
+
10
+
11
+ def _get_databricks_workspace_client():
12
+ from databricks.sdk import WorkspaceClient
13
+
14
+ return WorkspaceClient()
15
+
16
+
17
+ class DatabricksSDKModelsArtifactRepository(CloudArtifactRepository):
18
+ """
19
+ Stores and retrieves model artifacts via Databricks SDK, agnostic to the underlying cloud
20
+ that stores the model artifacts.
21
+ """
22
+
23
+ def __init__(self, model_name, model_version):
24
+ self.model_name = model_name
25
+ self.model_version = model_version
26
+ self.model_base_path = f"/Models/{model_name.replace('.', '/')}/{model_version}"
27
+ self.client = _get_databricks_workspace_client()
28
+ super().__init__(self.model_base_path)
29
+
30
+ def list_artifacts(self, path: Optional[str] = None) -> list[FileInfo]:
31
+ dest_path = self.model_base_path
32
+ if path:
33
+ dest_path = posixpath.join(dest_path, path)
34
+
35
+ file_infos = []
36
+
37
+ # check if dest_path is file, if so return empty dir
38
+ if not self._is_dir(dest_path):
39
+ return file_infos
40
+
41
+ resp = self.client.files.list_directory_contents(dest_path)
42
+ for directory_entry in resp:
43
+ relative_path = posixpath.relpath(directory_entry.path, self.model_base_path)
44
+ file_infos.append(
45
+ FileInfo(
46
+ path=relative_path,
47
+ is_dir=directory_entry.is_directory,
48
+ file_size=directory_entry.file_size,
49
+ )
50
+ )
51
+
52
+ return sorted(file_infos, key=lambda f: f.path)
53
+
54
+ def _is_dir(self, artifact_path):
55
+ from databricks.sdk.errors.platform import NotFound
56
+
57
+ try:
58
+ self.client.files.get_directory_metadata(artifact_path)
59
+ except NotFound:
60
+ return False
61
+ return True
62
+
63
+ def _upload_to_cloud(self, cloud_credential_info, src_file_path, artifact_file_path=None):
64
+ dest_path = self.model_base_path
65
+ if artifact_file_path:
66
+ dest_path = posixpath.join(dest_path, artifact_file_path)
67
+
68
+ with open(src_file_path, "rb") as f:
69
+ self.client.files.upload(dest_path, f, overwrite=True)
70
+
71
+ def log_artifact(self, local_file, artifact_path=None):
72
+ self._upload_to_cloud(
73
+ cloud_credential_info=None,
74
+ src_file_path=local_file,
75
+ artifact_file_path=artifact_path,
76
+ )
77
+
78
+ def _download_from_cloud(self, remote_file_path, local_path):
79
+ dest_path = self.model_base_path
80
+ if remote_file_path:
81
+ dest_path = posixpath.join(dest_path, remote_file_path)
82
+
83
+ resp = self.client.files.download(dest_path)
84
+ contents = resp.contents
85
+ chunk_size = MLFLOW_MULTIPART_DOWNLOAD_CHUNK_SIZE.get()
86
+
87
+ with open(local_path, "wb") as f:
88
+ while chunk := contents.read(chunk_size):
89
+ f.write(chunk)
90
+
91
+ def _get_write_credential_infos(self, remote_file_paths):
92
+ # Databricks sdk based model download/upload don't need any extra credentials
93
+ return [None] * len(remote_file_paths)
94
+
95
+ def _get_read_credential_infos(self, remote_file_paths):
96
+ # Databricks sdk based model download/upload don't need any extra credentials
97
+ return [None] * len(remote_file_paths)
@@ -0,0 +1,240 @@
1
+ import json
2
+ import os
3
+ import posixpath
4
+ from typing import Optional
5
+
6
+ import mlflow.utils.databricks_utils
7
+ from mlflow.entities import FileInfo
8
+ from mlflow.environment_variables import MLFLOW_ENABLE_DBFS_FUSE_ARTIFACT_REPO
9
+ from mlflow.exceptions import MlflowException
10
+ from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
11
+ from mlflow.store.artifact.artifact_repo import ArtifactRepository
12
+ from mlflow.store.artifact.databricks_artifact_repo import DatabricksArtifactRepository
13
+ from mlflow.store.artifact.databricks_logged_model_artifact_repo import (
14
+ DatabricksLoggedModelArtifactRepository,
15
+ )
16
+ from mlflow.store.artifact.local_artifact_repo import LocalArtifactRepository
17
+ from mlflow.store.tracking.rest_store import RestStore
18
+ from mlflow.tracking._tracking_service import utils
19
+ from mlflow.utils.databricks_utils import get_databricks_host_creds
20
+ from mlflow.utils.file_utils import relative_path_to_artifact_path
21
+ from mlflow.utils.rest_utils import (
22
+ RESOURCE_NON_EXISTENT,
23
+ http_request,
24
+ http_request_safe,
25
+ )
26
+ from mlflow.utils.string_utils import strip_prefix
27
+ from mlflow.utils.uri import (
28
+ get_databricks_profile_uri_from_artifact_uri,
29
+ is_databricks_acled_artifacts_uri,
30
+ is_databricks_model_registry_artifacts_uri,
31
+ is_valid_dbfs_uri,
32
+ remove_databricks_profile_info_from_artifact_uri,
33
+ strip_scheme,
34
+ )
35
+
36
+ # The following constants are defined as @developer_stable
37
+ LIST_API_ENDPOINT = "/api/2.0/dbfs/list"
38
+ GET_STATUS_ENDPOINT = "/api/2.0/dbfs/get-status"
39
+ DOWNLOAD_CHUNK_SIZE = 1024
40
+
41
+
42
+ class DbfsRestArtifactRepository(ArtifactRepository):
43
+ """
44
+ Stores artifacts on DBFS using the DBFS REST API.
45
+
46
+ This repository is used with URIs of the form ``dbfs:/<path>``. The repository can only be used
47
+ together with the RestStore.
48
+ """
49
+
50
+ def __init__(self, artifact_uri: str, tracking_uri: Optional[str] = None) -> None:
51
+ if not is_valid_dbfs_uri(artifact_uri):
52
+ raise MlflowException(
53
+ message="DBFS URI must be of the form dbfs:/<path> or "
54
+ + "dbfs://profile@databricks/<path>",
55
+ error_code=INVALID_PARAMETER_VALUE,
56
+ )
57
+
58
+ # The dbfs:/ path ultimately used for artifact operations should not contain the
59
+ # Databricks profile info, so strip it before setting ``artifact_uri``.
60
+ super().__init__(
61
+ remove_databricks_profile_info_from_artifact_uri(artifact_uri), tracking_uri
62
+ )
63
+
64
+ databricks_profile_uri = get_databricks_profile_uri_from_artifact_uri(artifact_uri)
65
+ if databricks_profile_uri:
66
+ hostcreds_from_uri = get_databricks_host_creds(databricks_profile_uri)
67
+ self.get_host_creds = lambda: hostcreds_from_uri
68
+ else:
69
+ self.get_host_creds = _get_host_creds_from_default_store()
70
+
71
+ def _databricks_api_request(self, endpoint, method, **kwargs):
72
+ host_creds = self.get_host_creds()
73
+ return http_request_safe(host_creds=host_creds, endpoint=endpoint, method=method, **kwargs)
74
+
75
+ def _dbfs_list_api(self, json):
76
+ host_creds = self.get_host_creds()
77
+ return http_request(
78
+ host_creds=host_creds, endpoint=LIST_API_ENDPOINT, method="GET", params=json
79
+ )
80
+
81
+ def _dbfs_download(self, output_path, endpoint):
82
+ with open(output_path, "wb") as f:
83
+ response = self._databricks_api_request(endpoint=endpoint, method="GET", stream=True)
84
+ try:
85
+ for content in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
86
+ f.write(content)
87
+ finally:
88
+ response.close()
89
+
90
+ def _is_directory(self, artifact_path):
91
+ dbfs_path = self._get_dbfs_path(artifact_path) if artifact_path else self._get_dbfs_path("")
92
+ return self._dbfs_is_dir(dbfs_path)
93
+
94
+ def _dbfs_is_dir(self, dbfs_path):
95
+ response = self._databricks_api_request(
96
+ endpoint=GET_STATUS_ENDPOINT, method="GET", params={"path": dbfs_path}
97
+ )
98
+ json_response = json.loads(response.text)
99
+ try:
100
+ return json_response["is_dir"]
101
+ except KeyError:
102
+ raise MlflowException(f"DBFS path {dbfs_path} does not exist")
103
+
104
+ def _get_dbfs_path(self, artifact_path):
105
+ return "/{}/{}".format(
106
+ strip_scheme(self.artifact_uri).lstrip("/"),
107
+ artifact_path.lstrip("/"),
108
+ )
109
+
110
+ def _get_dbfs_endpoint(self, artifact_path):
111
+ return f"/dbfs{self._get_dbfs_path(artifact_path)}"
112
+
113
+ def log_artifact(self, local_file, artifact_path=None):
114
+ basename = os.path.basename(local_file)
115
+ if artifact_path:
116
+ http_endpoint = self._get_dbfs_endpoint(posixpath.join(artifact_path, basename))
117
+ else:
118
+ http_endpoint = self._get_dbfs_endpoint(basename)
119
+ if os.stat(local_file).st_size == 0:
120
+ # The API frontend doesn't like it when we post empty files to it using
121
+ # `requests.request`, potentially due to the bug described in
122
+ # https://github.com/requests/requests/issues/4215
123
+ self._databricks_api_request(
124
+ endpoint=http_endpoint, method="POST", data="", allow_redirects=False
125
+ )
126
+ else:
127
+ with open(local_file, "rb") as f:
128
+ self._databricks_api_request(
129
+ endpoint=http_endpoint, method="POST", data=f, allow_redirects=False
130
+ )
131
+
132
+ def log_artifacts(self, local_dir, artifact_path=None):
133
+ artifact_path = artifact_path or ""
134
+ for dirpath, _, filenames in os.walk(local_dir):
135
+ artifact_subdir = artifact_path
136
+ if dirpath != local_dir:
137
+ rel_path = os.path.relpath(dirpath, local_dir)
138
+ rel_path = relative_path_to_artifact_path(rel_path)
139
+ artifact_subdir = posixpath.join(artifact_path, rel_path)
140
+ for name in filenames:
141
+ file_path = os.path.join(dirpath, name)
142
+ self.log_artifact(file_path, artifact_subdir)
143
+
144
+ def list_artifacts(self, path: Optional[str] = None) -> list[FileInfo]:
145
+ dbfs_path = self._get_dbfs_path(path) if path else self._get_dbfs_path("")
146
+ dbfs_list_json = {"path": dbfs_path}
147
+ response = self._dbfs_list_api(dbfs_list_json)
148
+ try:
149
+ json_response = json.loads(response.text)
150
+ except ValueError:
151
+ raise MlflowException(
152
+ f"API request to list files under DBFS path {dbfs_path} failed with "
153
+ f"status code {response.status_code}. Response body: {response.text}"
154
+ )
155
+ # /api/2.0/dbfs/list will not have the 'files' key in the response for empty directories.
156
+ infos = []
157
+ artifact_prefix = strip_prefix(self.artifact_uri, "dbfs:")
158
+ if json_response.get("error_code", None) == RESOURCE_NON_EXISTENT:
159
+ return []
160
+ dbfs_files = json_response.get("files", [])
161
+ for dbfs_file in dbfs_files:
162
+ stripped_path = strip_prefix(dbfs_file["path"], artifact_prefix + "/")
163
+ # If `path` is a file, the DBFS list API returns a single list element with the
164
+ # same name as `path`. The list_artifacts API expects us to return an empty list in this
165
+ # case, so we do so here.
166
+ if stripped_path == path:
167
+ return []
168
+ is_dir = dbfs_file["is_dir"]
169
+ artifact_size = None if is_dir else dbfs_file["file_size"]
170
+ infos.append(FileInfo(stripped_path, is_dir, artifact_size))
171
+ return sorted(infos, key=lambda f: f.path)
172
+
173
+ def _download_file(self, remote_file_path, local_path):
174
+ self._dbfs_download(
175
+ output_path=local_path, endpoint=self._get_dbfs_endpoint(remote_file_path)
176
+ )
177
+
178
+ def delete_artifacts(self, artifact_path=None):
179
+ raise MlflowException("Not implemented yet")
180
+
181
+
182
+ def _get_host_creds_from_default_store():
183
+ store = utils._get_store()
184
+ if not isinstance(store, RestStore):
185
+ raise MlflowException(
186
+ "Failed to get credentials for DBFS; they are read from the "
187
+ + "Databricks CLI credentials or MLFLOW_TRACKING* environment "
188
+ + "variables."
189
+ )
190
+ return store.get_host_creds
191
+
192
+
193
+ def dbfs_artifact_repo_factory(artifact_uri: str, tracking_uri: Optional[str] = None):
194
+ """
195
+ Returns an ArtifactRepository subclass for storing artifacts on DBFS.
196
+
197
+ This factory method is used with URIs of the form ``dbfs:/<path>``. DBFS-backed artifact
198
+ storage can only be used together with the RestStore.
199
+
200
+ In the special case where the URI is of the form
201
+ `dbfs:/databricks/mlflow-tracking/<Exp-ID>/<Run-ID>/<path>',
202
+ a DatabricksArtifactRepository is returned. This is capable of storing access controlled
203
+ artifacts.
204
+
205
+ Args:
206
+ artifact_uri: DBFS root artifact URI.
207
+ tracking_uri: The tracking URI.
208
+
209
+ Returns:
210
+ Subclass of ArtifactRepository capable of storing artifacts on DBFS.
211
+ """
212
+ if not is_valid_dbfs_uri(artifact_uri):
213
+ raise MlflowException(
214
+ "DBFS URI must be of the form dbfs:/<path> or "
215
+ + "dbfs://profile@databricks/<path>, but received "
216
+ + artifact_uri
217
+ )
218
+
219
+ cleaned_artifact_uri = artifact_uri.rstrip("/")
220
+ db_profile_uri = get_databricks_profile_uri_from_artifact_uri(cleaned_artifact_uri)
221
+ if is_databricks_acled_artifacts_uri(artifact_uri):
222
+ if DatabricksLoggedModelArtifactRepository.is_logged_model_uri(artifact_uri):
223
+ return DatabricksLoggedModelArtifactRepository(cleaned_artifact_uri, tracking_uri)
224
+ return DatabricksArtifactRepository(cleaned_artifact_uri, tracking_uri)
225
+ elif (
226
+ mlflow.utils.databricks_utils.is_dbfs_fuse_available()
227
+ and MLFLOW_ENABLE_DBFS_FUSE_ARTIFACT_REPO.get()
228
+ and not is_databricks_model_registry_artifacts_uri(artifact_uri)
229
+ and (db_profile_uri is None or db_profile_uri == "databricks")
230
+ ):
231
+ # If the DBFS FUSE mount is available, write artifacts directly to
232
+ # /dbfs/... using local filesystem APIs.
233
+ # Note: it is possible for a named Databricks profile to point to the current workspace,
234
+ # but we're going to avoid doing a complex check and assume users will use `databricks`
235
+ # to mean the current workspace. Using `DbfsRestArtifactRepository` to access the current
236
+ # workspace's DBFS should still work; it just may be slower.
237
+ final_artifact_uri = remove_databricks_profile_info_from_artifact_uri(cleaned_artifact_uri)
238
+ file_uri = "file:///dbfs/{}".format(strip_prefix(final_artifact_uri, "dbfs:/"))
239
+ return LocalArtifactRepository(file_uri, tracking_uri)
240
+ return DbfsRestArtifactRepository(cleaned_artifact_uri, tracking_uri)
@@ -0,0 +1,132 @@
1
+ import ftplib
2
+ import os
3
+ import posixpath
4
+ import urllib.parse
5
+ from contextlib import contextmanager
6
+ from ftplib import FTP
7
+ from typing import Optional
8
+ from urllib.parse import unquote
9
+
10
+ from mlflow.entities.file_info import FileInfo
11
+ from mlflow.exceptions import MlflowException
12
+ from mlflow.store.artifact.artifact_repo import ArtifactRepository
13
+ from mlflow.utils.file_utils import relative_path_to_artifact_path
14
+
15
+
16
+ class FTPArtifactRepository(ArtifactRepository):
17
+ """Stores artifacts as files in a remote directory, via ftp."""
18
+
19
+ def __init__(self, artifact_uri: str, tracking_uri: Optional[str] = None) -> None:
20
+ super().__init__(artifact_uri, tracking_uri)
21
+ parsed = urllib.parse.urlparse(artifact_uri)
22
+ self.config = {
23
+ "host": parsed.hostname,
24
+ "port": 21 if parsed.port is None else parsed.port,
25
+ "username": parsed.username,
26
+ "password": parsed.password,
27
+ }
28
+ self.path = parsed.path or "/"
29
+
30
+ if self.config["host"] is None:
31
+ self.config["host"] = "localhost"
32
+ if self.config["password"] is None:
33
+ self.config["password"] = ""
34
+ else:
35
+ self.config["password"] = unquote(parsed.password)
36
+
37
+ @contextmanager
38
+ def get_ftp_client(self):
39
+ ftp = FTP()
40
+ ftp.connect(self.config["host"], self.config["port"])
41
+ ftp.login(self.config["username"], self.config["password"])
42
+ yield ftp
43
+ ftp.close()
44
+
45
+ @staticmethod
46
+ def _is_dir(ftp, full_file_path):
47
+ try:
48
+ ftp.cwd(full_file_path)
49
+ return True
50
+ except ftplib.error_perm:
51
+ return False
52
+
53
+ @staticmethod
54
+ def _mkdir(ftp, artifact_dir):
55
+ try:
56
+ if not FTPArtifactRepository._is_dir(ftp, artifact_dir):
57
+ ftp.mkd(artifact_dir)
58
+ except ftplib.error_perm:
59
+ head, _ = posixpath.split(artifact_dir)
60
+ FTPArtifactRepository._mkdir(ftp, head)
61
+ FTPArtifactRepository._mkdir(ftp, artifact_dir)
62
+
63
+ @staticmethod
64
+ def _size(ftp, full_file_path):
65
+ ftp.voidcmd("TYPE I")
66
+ size = ftp.size(full_file_path)
67
+ ftp.voidcmd("TYPE A")
68
+ return size
69
+
70
+ def log_artifact(self, local_file, artifact_path=None):
71
+ with self.get_ftp_client() as ftp:
72
+ artifact_dir = posixpath.join(self.path, artifact_path) if artifact_path else self.path
73
+ self._mkdir(ftp, artifact_dir)
74
+ with open(local_file, "rb") as f:
75
+ ftp.cwd(artifact_dir)
76
+ ftp.storbinary("STOR " + os.path.basename(local_file), f)
77
+
78
+ def log_artifacts(self, local_dir, artifact_path=None):
79
+ dest_path = posixpath.join(self.path, artifact_path) if artifact_path else self.path
80
+
81
+ local_dir = os.path.abspath(local_dir)
82
+ for root, _, filenames in os.walk(local_dir):
83
+ upload_path = dest_path
84
+ if root != local_dir:
85
+ rel_path = os.path.relpath(root, local_dir)
86
+ rel_upload_path = relative_path_to_artifact_path(rel_path)
87
+ upload_path = posixpath.join(dest_path, rel_upload_path)
88
+ if not filenames:
89
+ with self.get_ftp_client() as ftp:
90
+ self._mkdir(ftp, upload_path)
91
+ for f in filenames:
92
+ if os.path.isfile(os.path.join(root, f)):
93
+ self.log_artifact(os.path.join(root, f), upload_path)
94
+
95
+ def _is_directory(self, artifact_path):
96
+ artifact_dir = self.path
97
+ list_dir = posixpath.join(artifact_dir, artifact_path) if artifact_path else artifact_dir
98
+ with self.get_ftp_client() as ftp:
99
+ return self._is_dir(ftp, list_dir)
100
+
101
+ def list_artifacts(self, path=None):
102
+ with self.get_ftp_client() as ftp:
103
+ artifact_dir = self.path
104
+ list_dir = posixpath.join(artifact_dir, path) if path else artifact_dir
105
+ if not self._is_dir(ftp, list_dir):
106
+ return []
107
+ artifact_files = ftp.nlst(list_dir)
108
+ # Make sure artifact_files is a list of file names because ftp.nlst
109
+ # may return absolute paths.
110
+ artifact_files = [os.path.basename(f) for f in artifact_files]
111
+ artifact_files = list(filter(lambda x: x != "." and x != "..", artifact_files))
112
+ infos = []
113
+ for file_name in artifact_files:
114
+ file_path = file_name if path is None else posixpath.join(path, file_name)
115
+ full_file_path = posixpath.join(list_dir, file_name)
116
+ if self._is_dir(ftp, full_file_path):
117
+ infos.append(FileInfo(file_path, True, None))
118
+ else:
119
+ size = self._size(ftp, full_file_path)
120
+ infos.append(FileInfo(file_path, False, size))
121
+ return infos
122
+
123
+ def _download_file(self, remote_file_path, local_path):
124
+ remote_full_path = (
125
+ posixpath.join(self.path, remote_file_path) if remote_file_path else self.path
126
+ )
127
+ with self.get_ftp_client() as ftp:
128
+ with open(local_path, "wb") as f:
129
+ ftp.retrbinary("RETR " + remote_full_path, f.write)
130
+
131
+ def delete_artifacts(self, artifact_path=None):
132
+ raise MlflowException("Not implemented yet")