genesis-flow 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (645) hide show
  1. genesis_flow-1.0.0.dist-info/METADATA +822 -0
  2. genesis_flow-1.0.0.dist-info/RECORD +645 -0
  3. genesis_flow-1.0.0.dist-info/WHEEL +5 -0
  4. genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
  5. genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
  6. genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
  7. mlflow/__init__.py +367 -0
  8. mlflow/__main__.py +3 -0
  9. mlflow/ag2/__init__.py +56 -0
  10. mlflow/ag2/ag2_logger.py +294 -0
  11. mlflow/anthropic/__init__.py +40 -0
  12. mlflow/anthropic/autolog.py +129 -0
  13. mlflow/anthropic/chat.py +144 -0
  14. mlflow/artifacts/__init__.py +268 -0
  15. mlflow/autogen/__init__.py +144 -0
  16. mlflow/autogen/chat.py +142 -0
  17. mlflow/azure/__init__.py +26 -0
  18. mlflow/azure/auth_handler.py +257 -0
  19. mlflow/azure/client.py +319 -0
  20. mlflow/azure/config.py +120 -0
  21. mlflow/azure/connection_factory.py +340 -0
  22. mlflow/azure/exceptions.py +27 -0
  23. mlflow/azure/stores.py +327 -0
  24. mlflow/azure/utils.py +183 -0
  25. mlflow/bedrock/__init__.py +45 -0
  26. mlflow/bedrock/_autolog.py +202 -0
  27. mlflow/bedrock/chat.py +122 -0
  28. mlflow/bedrock/stream.py +160 -0
  29. mlflow/bedrock/utils.py +43 -0
  30. mlflow/cli.py +707 -0
  31. mlflow/client.py +12 -0
  32. mlflow/config/__init__.py +56 -0
  33. mlflow/crewai/__init__.py +79 -0
  34. mlflow/crewai/autolog.py +253 -0
  35. mlflow/crewai/chat.py +29 -0
  36. mlflow/data/__init__.py +75 -0
  37. mlflow/data/artifact_dataset_sources.py +170 -0
  38. mlflow/data/code_dataset_source.py +40 -0
  39. mlflow/data/dataset.py +123 -0
  40. mlflow/data/dataset_registry.py +168 -0
  41. mlflow/data/dataset_source.py +110 -0
  42. mlflow/data/dataset_source_registry.py +219 -0
  43. mlflow/data/delta_dataset_source.py +167 -0
  44. mlflow/data/digest_utils.py +108 -0
  45. mlflow/data/evaluation_dataset.py +562 -0
  46. mlflow/data/filesystem_dataset_source.py +81 -0
  47. mlflow/data/http_dataset_source.py +145 -0
  48. mlflow/data/huggingface_dataset.py +258 -0
  49. mlflow/data/huggingface_dataset_source.py +118 -0
  50. mlflow/data/meta_dataset.py +104 -0
  51. mlflow/data/numpy_dataset.py +223 -0
  52. mlflow/data/pandas_dataset.py +231 -0
  53. mlflow/data/polars_dataset.py +352 -0
  54. mlflow/data/pyfunc_dataset_mixin.py +31 -0
  55. mlflow/data/schema.py +76 -0
  56. mlflow/data/sources.py +1 -0
  57. mlflow/data/spark_dataset.py +406 -0
  58. mlflow/data/spark_dataset_source.py +74 -0
  59. mlflow/data/spark_delta_utils.py +118 -0
  60. mlflow/data/tensorflow_dataset.py +350 -0
  61. mlflow/data/uc_volume_dataset_source.py +81 -0
  62. mlflow/db.py +27 -0
  63. mlflow/dspy/__init__.py +17 -0
  64. mlflow/dspy/autolog.py +197 -0
  65. mlflow/dspy/callback.py +398 -0
  66. mlflow/dspy/constant.py +1 -0
  67. mlflow/dspy/load.py +93 -0
  68. mlflow/dspy/save.py +393 -0
  69. mlflow/dspy/util.py +109 -0
  70. mlflow/dspy/wrapper.py +226 -0
  71. mlflow/entities/__init__.py +104 -0
  72. mlflow/entities/_mlflow_object.py +52 -0
  73. mlflow/entities/assessment.py +545 -0
  74. mlflow/entities/assessment_error.py +80 -0
  75. mlflow/entities/assessment_source.py +141 -0
  76. mlflow/entities/dataset.py +92 -0
  77. mlflow/entities/dataset_input.py +51 -0
  78. mlflow/entities/dataset_summary.py +62 -0
  79. mlflow/entities/document.py +48 -0
  80. mlflow/entities/experiment.py +109 -0
  81. mlflow/entities/experiment_tag.py +35 -0
  82. mlflow/entities/file_info.py +45 -0
  83. mlflow/entities/input_tag.py +35 -0
  84. mlflow/entities/lifecycle_stage.py +35 -0
  85. mlflow/entities/logged_model.py +228 -0
  86. mlflow/entities/logged_model_input.py +26 -0
  87. mlflow/entities/logged_model_output.py +32 -0
  88. mlflow/entities/logged_model_parameter.py +46 -0
  89. mlflow/entities/logged_model_status.py +74 -0
  90. mlflow/entities/logged_model_tag.py +33 -0
  91. mlflow/entities/metric.py +200 -0
  92. mlflow/entities/model_registry/__init__.py +29 -0
  93. mlflow/entities/model_registry/_model_registry_entity.py +13 -0
  94. mlflow/entities/model_registry/model_version.py +243 -0
  95. mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
  96. mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
  97. mlflow/entities/model_registry/model_version_search.py +25 -0
  98. mlflow/entities/model_registry/model_version_stages.py +25 -0
  99. mlflow/entities/model_registry/model_version_status.py +35 -0
  100. mlflow/entities/model_registry/model_version_tag.py +35 -0
  101. mlflow/entities/model_registry/prompt.py +73 -0
  102. mlflow/entities/model_registry/prompt_version.py +244 -0
  103. mlflow/entities/model_registry/registered_model.py +175 -0
  104. mlflow/entities/model_registry/registered_model_alias.py +35 -0
  105. mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
  106. mlflow/entities/model_registry/registered_model_search.py +25 -0
  107. mlflow/entities/model_registry/registered_model_tag.py +35 -0
  108. mlflow/entities/multipart_upload.py +74 -0
  109. mlflow/entities/param.py +49 -0
  110. mlflow/entities/run.py +97 -0
  111. mlflow/entities/run_data.py +84 -0
  112. mlflow/entities/run_info.py +188 -0
  113. mlflow/entities/run_inputs.py +59 -0
  114. mlflow/entities/run_outputs.py +43 -0
  115. mlflow/entities/run_status.py +41 -0
  116. mlflow/entities/run_tag.py +36 -0
  117. mlflow/entities/source_type.py +31 -0
  118. mlflow/entities/span.py +774 -0
  119. mlflow/entities/span_event.py +96 -0
  120. mlflow/entities/span_status.py +102 -0
  121. mlflow/entities/trace.py +317 -0
  122. mlflow/entities/trace_data.py +71 -0
  123. mlflow/entities/trace_info.py +220 -0
  124. mlflow/entities/trace_info_v2.py +162 -0
  125. mlflow/entities/trace_location.py +173 -0
  126. mlflow/entities/trace_state.py +39 -0
  127. mlflow/entities/trace_status.py +68 -0
  128. mlflow/entities/view_type.py +51 -0
  129. mlflow/environment_variables.py +866 -0
  130. mlflow/evaluation/__init__.py +16 -0
  131. mlflow/evaluation/assessment.py +369 -0
  132. mlflow/evaluation/evaluation.py +411 -0
  133. mlflow/evaluation/evaluation_tag.py +61 -0
  134. mlflow/evaluation/fluent.py +48 -0
  135. mlflow/evaluation/utils.py +201 -0
  136. mlflow/exceptions.py +213 -0
  137. mlflow/experiments.py +140 -0
  138. mlflow/gemini/__init__.py +81 -0
  139. mlflow/gemini/autolog.py +186 -0
  140. mlflow/gemini/chat.py +261 -0
  141. mlflow/genai/__init__.py +71 -0
  142. mlflow/genai/datasets/__init__.py +67 -0
  143. mlflow/genai/datasets/evaluation_dataset.py +131 -0
  144. mlflow/genai/evaluation/__init__.py +3 -0
  145. mlflow/genai/evaluation/base.py +411 -0
  146. mlflow/genai/evaluation/constant.py +23 -0
  147. mlflow/genai/evaluation/utils.py +244 -0
  148. mlflow/genai/judges/__init__.py +21 -0
  149. mlflow/genai/judges/databricks.py +404 -0
  150. mlflow/genai/label_schemas/__init__.py +153 -0
  151. mlflow/genai/label_schemas/label_schemas.py +209 -0
  152. mlflow/genai/labeling/__init__.py +159 -0
  153. mlflow/genai/labeling/labeling.py +250 -0
  154. mlflow/genai/optimize/__init__.py +13 -0
  155. mlflow/genai/optimize/base.py +198 -0
  156. mlflow/genai/optimize/optimizers/__init__.py +4 -0
  157. mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
  158. mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
  159. mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
  160. mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
  161. mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
  162. mlflow/genai/optimize/types.py +75 -0
  163. mlflow/genai/optimize/util.py +30 -0
  164. mlflow/genai/prompts/__init__.py +206 -0
  165. mlflow/genai/scheduled_scorers.py +431 -0
  166. mlflow/genai/scorers/__init__.py +26 -0
  167. mlflow/genai/scorers/base.py +492 -0
  168. mlflow/genai/scorers/builtin_scorers.py +765 -0
  169. mlflow/genai/scorers/scorer_utils.py +138 -0
  170. mlflow/genai/scorers/validation.py +165 -0
  171. mlflow/genai/utils/data_validation.py +146 -0
  172. mlflow/genai/utils/enum_utils.py +23 -0
  173. mlflow/genai/utils/trace_utils.py +211 -0
  174. mlflow/groq/__init__.py +42 -0
  175. mlflow/groq/_groq_autolog.py +74 -0
  176. mlflow/johnsnowlabs/__init__.py +888 -0
  177. mlflow/langchain/__init__.py +24 -0
  178. mlflow/langchain/api_request_parallel_processor.py +330 -0
  179. mlflow/langchain/autolog.py +147 -0
  180. mlflow/langchain/chat_agent_langgraph.py +340 -0
  181. mlflow/langchain/constant.py +1 -0
  182. mlflow/langchain/constants.py +1 -0
  183. mlflow/langchain/databricks_dependencies.py +444 -0
  184. mlflow/langchain/langchain_tracer.py +597 -0
  185. mlflow/langchain/model.py +919 -0
  186. mlflow/langchain/output_parsers.py +142 -0
  187. mlflow/langchain/retriever_chain.py +153 -0
  188. mlflow/langchain/runnables.py +527 -0
  189. mlflow/langchain/utils/chat.py +402 -0
  190. mlflow/langchain/utils/logging.py +671 -0
  191. mlflow/langchain/utils/serialization.py +36 -0
  192. mlflow/legacy_databricks_cli/__init__.py +0 -0
  193. mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
  194. mlflow/legacy_databricks_cli/configure/provider.py +482 -0
  195. mlflow/litellm/__init__.py +175 -0
  196. mlflow/llama_index/__init__.py +22 -0
  197. mlflow/llama_index/autolog.py +55 -0
  198. mlflow/llama_index/chat.py +43 -0
  199. mlflow/llama_index/constant.py +1 -0
  200. mlflow/llama_index/model.py +577 -0
  201. mlflow/llama_index/pyfunc_wrapper.py +332 -0
  202. mlflow/llama_index/serialize_objects.py +188 -0
  203. mlflow/llama_index/tracer.py +561 -0
  204. mlflow/metrics/__init__.py +479 -0
  205. mlflow/metrics/base.py +39 -0
  206. mlflow/metrics/genai/__init__.py +25 -0
  207. mlflow/metrics/genai/base.py +101 -0
  208. mlflow/metrics/genai/genai_metric.py +771 -0
  209. mlflow/metrics/genai/metric_definitions.py +450 -0
  210. mlflow/metrics/genai/model_utils.py +371 -0
  211. mlflow/metrics/genai/prompt_template.py +68 -0
  212. mlflow/metrics/genai/prompts/__init__.py +0 -0
  213. mlflow/metrics/genai/prompts/v1.py +422 -0
  214. mlflow/metrics/genai/utils.py +6 -0
  215. mlflow/metrics/metric_definitions.py +619 -0
  216. mlflow/mismatch.py +34 -0
  217. mlflow/mistral/__init__.py +34 -0
  218. mlflow/mistral/autolog.py +71 -0
  219. mlflow/mistral/chat.py +135 -0
  220. mlflow/ml_package_versions.py +452 -0
  221. mlflow/models/__init__.py +97 -0
  222. mlflow/models/auth_policy.py +83 -0
  223. mlflow/models/cli.py +354 -0
  224. mlflow/models/container/__init__.py +294 -0
  225. mlflow/models/container/scoring_server/__init__.py +0 -0
  226. mlflow/models/container/scoring_server/nginx.conf +39 -0
  227. mlflow/models/dependencies_schemas.py +287 -0
  228. mlflow/models/display_utils.py +158 -0
  229. mlflow/models/docker_utils.py +211 -0
  230. mlflow/models/evaluation/__init__.py +23 -0
  231. mlflow/models/evaluation/_shap_patch.py +64 -0
  232. mlflow/models/evaluation/artifacts.py +194 -0
  233. mlflow/models/evaluation/base.py +1811 -0
  234. mlflow/models/evaluation/calibration_curve.py +109 -0
  235. mlflow/models/evaluation/default_evaluator.py +996 -0
  236. mlflow/models/evaluation/deprecated.py +23 -0
  237. mlflow/models/evaluation/evaluator_registry.py +80 -0
  238. mlflow/models/evaluation/evaluators/classifier.py +704 -0
  239. mlflow/models/evaluation/evaluators/default.py +233 -0
  240. mlflow/models/evaluation/evaluators/regressor.py +96 -0
  241. mlflow/models/evaluation/evaluators/shap.py +296 -0
  242. mlflow/models/evaluation/lift_curve.py +178 -0
  243. mlflow/models/evaluation/utils/metric.py +123 -0
  244. mlflow/models/evaluation/utils/trace.py +179 -0
  245. mlflow/models/evaluation/validation.py +434 -0
  246. mlflow/models/flavor_backend.py +93 -0
  247. mlflow/models/flavor_backend_registry.py +53 -0
  248. mlflow/models/model.py +1639 -0
  249. mlflow/models/model_config.py +150 -0
  250. mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
  251. mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
  252. mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
  253. mlflow/models/python_api.py +369 -0
  254. mlflow/models/rag_signatures.py +128 -0
  255. mlflow/models/resources.py +321 -0
  256. mlflow/models/signature.py +662 -0
  257. mlflow/models/utils.py +2054 -0
  258. mlflow/models/wheeled_model.py +280 -0
  259. mlflow/openai/__init__.py +57 -0
  260. mlflow/openai/_agent_tracer.py +364 -0
  261. mlflow/openai/api_request_parallel_processor.py +131 -0
  262. mlflow/openai/autolog.py +509 -0
  263. mlflow/openai/constant.py +1 -0
  264. mlflow/openai/model.py +824 -0
  265. mlflow/openai/utils/chat_schema.py +367 -0
  266. mlflow/optuna/__init__.py +3 -0
  267. mlflow/optuna/storage.py +646 -0
  268. mlflow/plugins/__init__.py +72 -0
  269. mlflow/plugins/base.py +358 -0
  270. mlflow/plugins/builtin/__init__.py +24 -0
  271. mlflow/plugins/builtin/pytorch_plugin.py +150 -0
  272. mlflow/plugins/builtin/sklearn_plugin.py +158 -0
  273. mlflow/plugins/builtin/transformers_plugin.py +187 -0
  274. mlflow/plugins/cli.py +321 -0
  275. mlflow/plugins/discovery.py +340 -0
  276. mlflow/plugins/manager.py +465 -0
  277. mlflow/plugins/registry.py +316 -0
  278. mlflow/plugins/templates/framework_plugin_template.py +329 -0
  279. mlflow/prompt/constants.py +20 -0
  280. mlflow/prompt/promptlab_model.py +197 -0
  281. mlflow/prompt/registry_utils.py +248 -0
  282. mlflow/promptflow/__init__.py +495 -0
  283. mlflow/protos/__init__.py +0 -0
  284. mlflow/protos/assessments_pb2.py +174 -0
  285. mlflow/protos/databricks_artifacts_pb2.py +489 -0
  286. mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
  287. mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
  288. mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
  289. mlflow/protos/databricks_pb2.py +267 -0
  290. mlflow/protos/databricks_trace_server_pb2.py +374 -0
  291. mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
  292. mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
  293. mlflow/protos/facet_feature_statistics_pb2.py +296 -0
  294. mlflow/protos/internal_pb2.py +77 -0
  295. mlflow/protos/mlflow_artifacts_pb2.py +336 -0
  296. mlflow/protos/model_registry_pb2.py +1073 -0
  297. mlflow/protos/scalapb/__init__.py +0 -0
  298. mlflow/protos/scalapb/scalapb_pb2.py +104 -0
  299. mlflow/protos/service_pb2.py +2600 -0
  300. mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
  301. mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
  302. mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
  303. mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
  304. mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
  305. mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
  306. mlflow/py.typed +0 -0
  307. mlflow/pydantic_ai/__init__.py +57 -0
  308. mlflow/pydantic_ai/autolog.py +173 -0
  309. mlflow/pyfunc/__init__.py +3844 -0
  310. mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
  311. mlflow/pyfunc/backend.py +523 -0
  312. mlflow/pyfunc/context.py +78 -0
  313. mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
  314. mlflow/pyfunc/loaders/__init__.py +7 -0
  315. mlflow/pyfunc/loaders/chat_agent.py +117 -0
  316. mlflow/pyfunc/loaders/chat_model.py +125 -0
  317. mlflow/pyfunc/loaders/code_model.py +31 -0
  318. mlflow/pyfunc/loaders/responses_agent.py +112 -0
  319. mlflow/pyfunc/mlserver.py +46 -0
  320. mlflow/pyfunc/model.py +1473 -0
  321. mlflow/pyfunc/scoring_server/__init__.py +604 -0
  322. mlflow/pyfunc/scoring_server/app.py +7 -0
  323. mlflow/pyfunc/scoring_server/client.py +146 -0
  324. mlflow/pyfunc/spark_model_cache.py +48 -0
  325. mlflow/pyfunc/stdin_server.py +44 -0
  326. mlflow/pyfunc/utils/__init__.py +3 -0
  327. mlflow/pyfunc/utils/data_validation.py +224 -0
  328. mlflow/pyfunc/utils/environment.py +22 -0
  329. mlflow/pyfunc/utils/input_converter.py +47 -0
  330. mlflow/pyfunc/utils/serving_data_parser.py +11 -0
  331. mlflow/pytorch/__init__.py +1171 -0
  332. mlflow/pytorch/_lightning_autolog.py +580 -0
  333. mlflow/pytorch/_pytorch_autolog.py +50 -0
  334. mlflow/pytorch/pickle_module.py +35 -0
  335. mlflow/rfunc/__init__.py +42 -0
  336. mlflow/rfunc/backend.py +134 -0
  337. mlflow/runs.py +89 -0
  338. mlflow/server/__init__.py +302 -0
  339. mlflow/server/auth/__init__.py +1224 -0
  340. mlflow/server/auth/__main__.py +4 -0
  341. mlflow/server/auth/basic_auth.ini +6 -0
  342. mlflow/server/auth/cli.py +11 -0
  343. mlflow/server/auth/client.py +537 -0
  344. mlflow/server/auth/config.py +34 -0
  345. mlflow/server/auth/db/__init__.py +0 -0
  346. mlflow/server/auth/db/cli.py +18 -0
  347. mlflow/server/auth/db/migrations/__init__.py +0 -0
  348. mlflow/server/auth/db/migrations/alembic.ini +110 -0
  349. mlflow/server/auth/db/migrations/env.py +76 -0
  350. mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
  351. mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
  352. mlflow/server/auth/db/models.py +67 -0
  353. mlflow/server/auth/db/utils.py +37 -0
  354. mlflow/server/auth/entities.py +165 -0
  355. mlflow/server/auth/logo.py +14 -0
  356. mlflow/server/auth/permissions.py +65 -0
  357. mlflow/server/auth/routes.py +18 -0
  358. mlflow/server/auth/sqlalchemy_store.py +263 -0
  359. mlflow/server/graphql/__init__.py +0 -0
  360. mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
  361. mlflow/server/graphql/graphql_custom_scalars.py +24 -0
  362. mlflow/server/graphql/graphql_errors.py +15 -0
  363. mlflow/server/graphql/graphql_no_batching.py +89 -0
  364. mlflow/server/graphql/graphql_schema_extensions.py +74 -0
  365. mlflow/server/handlers.py +3217 -0
  366. mlflow/server/prometheus_exporter.py +17 -0
  367. mlflow/server/validation.py +30 -0
  368. mlflow/shap/__init__.py +691 -0
  369. mlflow/sklearn/__init__.py +1994 -0
  370. mlflow/sklearn/utils.py +1041 -0
  371. mlflow/smolagents/__init__.py +66 -0
  372. mlflow/smolagents/autolog.py +139 -0
  373. mlflow/smolagents/chat.py +29 -0
  374. mlflow/store/__init__.py +10 -0
  375. mlflow/store/_unity_catalog/__init__.py +1 -0
  376. mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
  377. mlflow/store/_unity_catalog/lineage/constants.py +2 -0
  378. mlflow/store/_unity_catalog/registry/__init__.py +6 -0
  379. mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
  380. mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
  381. mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
  382. mlflow/store/_unity_catalog/registry/utils.py +121 -0
  383. mlflow/store/artifact/__init__.py +0 -0
  384. mlflow/store/artifact/artifact_repo.py +472 -0
  385. mlflow/store/artifact/artifact_repository_registry.py +154 -0
  386. mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
  387. mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
  388. mlflow/store/artifact/cli.py +141 -0
  389. mlflow/store/artifact/cloud_artifact_repo.py +332 -0
  390. mlflow/store/artifact/databricks_artifact_repo.py +729 -0
  391. mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
  392. mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
  393. mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
  394. mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
  395. mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
  396. mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
  397. mlflow/store/artifact/ftp_artifact_repo.py +132 -0
  398. mlflow/store/artifact/gcs_artifact_repo.py +296 -0
  399. mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
  400. mlflow/store/artifact/http_artifact_repo.py +218 -0
  401. mlflow/store/artifact/local_artifact_repo.py +142 -0
  402. mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
  403. mlflow/store/artifact/models_artifact_repo.py +259 -0
  404. mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
  405. mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
  406. mlflow/store/artifact/r2_artifact_repo.py +70 -0
  407. mlflow/store/artifact/runs_artifact_repo.py +265 -0
  408. mlflow/store/artifact/s3_artifact_repo.py +330 -0
  409. mlflow/store/artifact/sftp_artifact_repo.py +141 -0
  410. mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
  411. mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
  412. mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
  413. mlflow/store/artifact/utils/__init__.py +0 -0
  414. mlflow/store/artifact/utils/models.py +148 -0
  415. mlflow/store/db/__init__.py +0 -0
  416. mlflow/store/db/base_sql_model.py +3 -0
  417. mlflow/store/db/db_types.py +10 -0
  418. mlflow/store/db/utils.py +314 -0
  419. mlflow/store/db_migrations/__init__.py +0 -0
  420. mlflow/store/db_migrations/alembic.ini +74 -0
  421. mlflow/store/db_migrations/env.py +84 -0
  422. mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
  423. mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
  424. mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
  425. mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
  426. mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
  427. mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
  428. mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
  429. mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
  430. mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
  431. mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
  432. mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
  433. mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
  434. mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
  435. mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
  436. mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
  437. mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
  438. mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
  439. mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
  440. mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
  441. mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
  442. mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
  443. mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
  444. mlflow/store/db_migrations/versions/__init__.py +0 -0
  445. mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
  446. mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
  447. mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
  448. mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
  449. mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
  450. mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
  451. mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
  452. mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
  453. mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
  454. mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
  455. mlflow/store/entities/__init__.py +3 -0
  456. mlflow/store/entities/paged_list.py +18 -0
  457. mlflow/store/model_registry/__init__.py +10 -0
  458. mlflow/store/model_registry/abstract_store.py +1081 -0
  459. mlflow/store/model_registry/base_rest_store.py +44 -0
  460. mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
  461. mlflow/store/model_registry/dbmodels/__init__.py +0 -0
  462. mlflow/store/model_registry/dbmodels/models.py +206 -0
  463. mlflow/store/model_registry/file_store.py +1091 -0
  464. mlflow/store/model_registry/rest_store.py +481 -0
  465. mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
  466. mlflow/store/tracking/__init__.py +23 -0
  467. mlflow/store/tracking/abstract_store.py +816 -0
  468. mlflow/store/tracking/dbmodels/__init__.py +0 -0
  469. mlflow/store/tracking/dbmodels/initial_models.py +243 -0
  470. mlflow/store/tracking/dbmodels/models.py +1073 -0
  471. mlflow/store/tracking/file_store.py +2438 -0
  472. mlflow/store/tracking/postgres_managed_identity.py +146 -0
  473. mlflow/store/tracking/rest_store.py +1131 -0
  474. mlflow/store/tracking/sqlalchemy_store.py +2785 -0
  475. mlflow/system_metrics/__init__.py +61 -0
  476. mlflow/system_metrics/metrics/__init__.py +0 -0
  477. mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
  478. mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
  479. mlflow/system_metrics/metrics/disk_monitor.py +21 -0
  480. mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
  481. mlflow/system_metrics/metrics/network_monitor.py +34 -0
  482. mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
  483. mlflow/system_metrics/system_metrics_monitor.py +198 -0
  484. mlflow/tracing/__init__.py +16 -0
  485. mlflow/tracing/assessment.py +356 -0
  486. mlflow/tracing/client.py +531 -0
  487. mlflow/tracing/config.py +125 -0
  488. mlflow/tracing/constant.py +105 -0
  489. mlflow/tracing/destination.py +81 -0
  490. mlflow/tracing/display/__init__.py +40 -0
  491. mlflow/tracing/display/display_handler.py +196 -0
  492. mlflow/tracing/export/async_export_queue.py +186 -0
  493. mlflow/tracing/export/inference_table.py +138 -0
  494. mlflow/tracing/export/mlflow_v3.py +137 -0
  495. mlflow/tracing/export/utils.py +70 -0
  496. mlflow/tracing/fluent.py +1417 -0
  497. mlflow/tracing/processor/base_mlflow.py +199 -0
  498. mlflow/tracing/processor/inference_table.py +175 -0
  499. mlflow/tracing/processor/mlflow_v3.py +47 -0
  500. mlflow/tracing/processor/otel.py +73 -0
  501. mlflow/tracing/provider.py +487 -0
  502. mlflow/tracing/trace_manager.py +200 -0
  503. mlflow/tracing/utils/__init__.py +616 -0
  504. mlflow/tracing/utils/artifact_utils.py +28 -0
  505. mlflow/tracing/utils/copy.py +55 -0
  506. mlflow/tracing/utils/environment.py +55 -0
  507. mlflow/tracing/utils/exception.py +21 -0
  508. mlflow/tracing/utils/once.py +35 -0
  509. mlflow/tracing/utils/otlp.py +63 -0
  510. mlflow/tracing/utils/processor.py +54 -0
  511. mlflow/tracing/utils/search.py +292 -0
  512. mlflow/tracing/utils/timeout.py +250 -0
  513. mlflow/tracing/utils/token.py +19 -0
  514. mlflow/tracing/utils/truncation.py +124 -0
  515. mlflow/tracing/utils/warning.py +76 -0
  516. mlflow/tracking/__init__.py +39 -0
  517. mlflow/tracking/_model_registry/__init__.py +1 -0
  518. mlflow/tracking/_model_registry/client.py +764 -0
  519. mlflow/tracking/_model_registry/fluent.py +853 -0
  520. mlflow/tracking/_model_registry/registry.py +67 -0
  521. mlflow/tracking/_model_registry/utils.py +251 -0
  522. mlflow/tracking/_tracking_service/__init__.py +0 -0
  523. mlflow/tracking/_tracking_service/client.py +883 -0
  524. mlflow/tracking/_tracking_service/registry.py +56 -0
  525. mlflow/tracking/_tracking_service/utils.py +275 -0
  526. mlflow/tracking/artifact_utils.py +179 -0
  527. mlflow/tracking/client.py +5900 -0
  528. mlflow/tracking/context/__init__.py +0 -0
  529. mlflow/tracking/context/abstract_context.py +35 -0
  530. mlflow/tracking/context/databricks_cluster_context.py +15 -0
  531. mlflow/tracking/context/databricks_command_context.py +15 -0
  532. mlflow/tracking/context/databricks_job_context.py +49 -0
  533. mlflow/tracking/context/databricks_notebook_context.py +41 -0
  534. mlflow/tracking/context/databricks_repo_context.py +43 -0
  535. mlflow/tracking/context/default_context.py +51 -0
  536. mlflow/tracking/context/git_context.py +32 -0
  537. mlflow/tracking/context/registry.py +98 -0
  538. mlflow/tracking/context/system_environment_context.py +15 -0
  539. mlflow/tracking/default_experiment/__init__.py +1 -0
  540. mlflow/tracking/default_experiment/abstract_context.py +43 -0
  541. mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
  542. mlflow/tracking/default_experiment/registry.py +75 -0
  543. mlflow/tracking/fluent.py +3595 -0
  544. mlflow/tracking/metric_value_conversion_utils.py +93 -0
  545. mlflow/tracking/multimedia.py +206 -0
  546. mlflow/tracking/registry.py +86 -0
  547. mlflow/tracking/request_auth/__init__.py +0 -0
  548. mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
  549. mlflow/tracking/request_auth/registry.py +60 -0
  550. mlflow/tracking/request_header/__init__.py +0 -0
  551. mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
  552. mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
  553. mlflow/tracking/request_header/default_request_header_provider.py +17 -0
  554. mlflow/tracking/request_header/registry.py +79 -0
  555. mlflow/transformers/__init__.py +2982 -0
  556. mlflow/transformers/flavor_config.py +258 -0
  557. mlflow/transformers/hub_utils.py +83 -0
  558. mlflow/transformers/llm_inference_utils.py +468 -0
  559. mlflow/transformers/model_io.py +301 -0
  560. mlflow/transformers/peft.py +51 -0
  561. mlflow/transformers/signature.py +183 -0
  562. mlflow/transformers/torch_utils.py +55 -0
  563. mlflow/types/__init__.py +21 -0
  564. mlflow/types/agent.py +270 -0
  565. mlflow/types/chat.py +240 -0
  566. mlflow/types/llm.py +935 -0
  567. mlflow/types/responses.py +139 -0
  568. mlflow/types/responses_helpers.py +416 -0
  569. mlflow/types/schema.py +1505 -0
  570. mlflow/types/type_hints.py +647 -0
  571. mlflow/types/utils.py +753 -0
  572. mlflow/utils/__init__.py +283 -0
  573. mlflow/utils/_capture_modules.py +256 -0
  574. mlflow/utils/_capture_transformers_modules.py +75 -0
  575. mlflow/utils/_spark_utils.py +201 -0
  576. mlflow/utils/_unity_catalog_oss_utils.py +97 -0
  577. mlflow/utils/_unity_catalog_utils.py +479 -0
  578. mlflow/utils/annotations.py +218 -0
  579. mlflow/utils/arguments_utils.py +16 -0
  580. mlflow/utils/async_logging/__init__.py +1 -0
  581. mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
  582. mlflow/utils/async_logging/async_logging_queue.py +366 -0
  583. mlflow/utils/async_logging/run_artifact.py +38 -0
  584. mlflow/utils/async_logging/run_batch.py +58 -0
  585. mlflow/utils/async_logging/run_operations.py +49 -0
  586. mlflow/utils/autologging_utils/__init__.py +737 -0
  587. mlflow/utils/autologging_utils/client.py +432 -0
  588. mlflow/utils/autologging_utils/config.py +33 -0
  589. mlflow/utils/autologging_utils/events.py +294 -0
  590. mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
  591. mlflow/utils/autologging_utils/metrics_queue.py +71 -0
  592. mlflow/utils/autologging_utils/safety.py +1104 -0
  593. mlflow/utils/autologging_utils/versioning.py +95 -0
  594. mlflow/utils/checkpoint_utils.py +206 -0
  595. mlflow/utils/class_utils.py +6 -0
  596. mlflow/utils/cli_args.py +257 -0
  597. mlflow/utils/conda.py +354 -0
  598. mlflow/utils/credentials.py +231 -0
  599. mlflow/utils/data_utils.py +17 -0
  600. mlflow/utils/databricks_utils.py +1436 -0
  601. mlflow/utils/docstring_utils.py +477 -0
  602. mlflow/utils/doctor.py +133 -0
  603. mlflow/utils/download_cloud_file_chunk.py +43 -0
  604. mlflow/utils/env_manager.py +16 -0
  605. mlflow/utils/env_pack.py +131 -0
  606. mlflow/utils/environment.py +1009 -0
  607. mlflow/utils/exception_utils.py +14 -0
  608. mlflow/utils/file_utils.py +978 -0
  609. mlflow/utils/git_utils.py +77 -0
  610. mlflow/utils/gorilla.py +797 -0
  611. mlflow/utils/import_hooks/__init__.py +363 -0
  612. mlflow/utils/lazy_load.py +51 -0
  613. mlflow/utils/logging_utils.py +168 -0
  614. mlflow/utils/mime_type_utils.py +58 -0
  615. mlflow/utils/mlflow_tags.py +103 -0
  616. mlflow/utils/model_utils.py +486 -0
  617. mlflow/utils/name_utils.py +346 -0
  618. mlflow/utils/nfs_on_spark.py +62 -0
  619. mlflow/utils/openai_utils.py +164 -0
  620. mlflow/utils/os.py +12 -0
  621. mlflow/utils/oss_registry_utils.py +29 -0
  622. mlflow/utils/plugins.py +17 -0
  623. mlflow/utils/process.py +182 -0
  624. mlflow/utils/promptlab_utils.py +146 -0
  625. mlflow/utils/proto_json_utils.py +743 -0
  626. mlflow/utils/pydantic_utils.py +54 -0
  627. mlflow/utils/request_utils.py +279 -0
  628. mlflow/utils/requirements_utils.py +704 -0
  629. mlflow/utils/rest_utils.py +673 -0
  630. mlflow/utils/search_logged_model_utils.py +127 -0
  631. mlflow/utils/search_utils.py +2111 -0
  632. mlflow/utils/secure_loading.py +221 -0
  633. mlflow/utils/security_validation.py +384 -0
  634. mlflow/utils/server_cli_utils.py +61 -0
  635. mlflow/utils/spark_utils.py +15 -0
  636. mlflow/utils/string_utils.py +138 -0
  637. mlflow/utils/thread_utils.py +63 -0
  638. mlflow/utils/time.py +54 -0
  639. mlflow/utils/timeout.py +42 -0
  640. mlflow/utils/uri.py +572 -0
  641. mlflow/utils/validation.py +662 -0
  642. mlflow/utils/virtualenv.py +458 -0
  643. mlflow/utils/warnings_utils.py +25 -0
  644. mlflow/utils/yaml_utils.py +179 -0
  645. mlflow/version.py +24 -0
@@ -0,0 +1,978 @@
1
+ import atexit
2
+ import codecs
3
+ import errno
4
+ import fnmatch
5
+ import gzip
6
+ import importlib.util
7
+ import json
8
+ import logging
9
+ import math
10
+ import os
11
+ import pathlib
12
+ import posixpath
13
+ import shutil
14
+ import stat
15
+ import subprocess
16
+ import sys
17
+ import tarfile
18
+ import tempfile
19
+ import time
20
+ import urllib.parse
21
+ import urllib.request
22
+ import uuid
23
+ from concurrent.futures import as_completed
24
+ from contextlib import contextmanager
25
+ from dataclasses import dataclass
26
+ from subprocess import CalledProcessError, TimeoutExpired
27
+ from typing import Any, Optional, Union
28
+ from urllib.parse import unquote
29
+ from urllib.request import pathname2url
30
+
31
+ from mlflow.entities import FileInfo
32
+ from mlflow.environment_variables import (
33
+ _MLFLOW_MPD_NUM_RETRIES,
34
+ _MLFLOW_MPD_RETRY_INTERVAL_SECONDS,
35
+ MLFLOW_DOWNLOAD_CHUNK_TIMEOUT,
36
+ MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR,
37
+ )
38
+ from mlflow.exceptions import MlflowException
39
+ from mlflow.protos.databricks_artifacts_pb2 import ArtifactCredentialType
40
+ from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
41
+ from mlflow.utils import download_cloud_file_chunk
42
+ from mlflow.utils.databricks_utils import (
43
+ get_databricks_local_temp_dir,
44
+ get_databricks_nfs_temp_dir,
45
+ )
46
+ from mlflow.utils.os import is_windows
47
+ from mlflow.utils.process import cache_return_value_per_process
48
+ from mlflow.utils.request_utils import cloud_storage_http_request, download_chunk
49
+ from mlflow.utils.rest_utils import augmented_raise_for_status
50
+
51
+ ENCODING = "utf-8"
52
+ _PROGRESS_BAR_DISPLAY_THRESHOLD = 500_000_000 # 500 MB
53
+
54
+ _logger = logging.getLogger(__name__)
55
+
56
+ # This is for backward compatibility with databricks-feature-engineering<=0.10.2
57
+ if importlib.util.find_spec("yaml") is not None:
58
+ try:
59
+ from yaml import CSafeDumper as YamlSafeDumper
60
+ except ImportError:
61
+ from yaml import SafeDumper as YamlSafeDumper # noqa: F401
62
+
63
+
64
+ class ArtifactProgressBar:
65
+ def __init__(self, desc, total, step, **kwargs) -> None:
66
+ self.desc = desc
67
+ self.total = total
68
+ self.step = step
69
+ self.pbar = None
70
+ self.progress = 0
71
+ self.kwargs = kwargs
72
+
73
+ def set_pbar(self):
74
+ if MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR.get():
75
+ try:
76
+ from tqdm.auto import tqdm
77
+
78
+ self.pbar = tqdm(total=self.total, desc=self.desc, **self.kwargs)
79
+ except ImportError:
80
+ pass
81
+
82
+ @classmethod
83
+ def chunks(cls, file_size, desc, chunk_size):
84
+ bar = cls(
85
+ desc,
86
+ total=file_size,
87
+ step=chunk_size,
88
+ unit="iB",
89
+ unit_scale=True,
90
+ unit_divisor=1024,
91
+ miniters=1,
92
+ )
93
+ if file_size >= _PROGRESS_BAR_DISPLAY_THRESHOLD:
94
+ bar.set_pbar()
95
+ return bar
96
+
97
+ @classmethod
98
+ def files(cls, desc, total):
99
+ bar = cls(desc, total=total, step=1)
100
+ bar.set_pbar()
101
+ return bar
102
+
103
+ def update(self):
104
+ if self.pbar:
105
+ update_step = min(self.total - self.progress, self.step)
106
+ self.pbar.update(update_step)
107
+ self.pbar.refresh()
108
+ self.progress += update_step
109
+
110
+ def __enter__(self):
111
+ return self
112
+
113
+ def __exit__(self, *args):
114
+ if self.pbar:
115
+ self.pbar.close()
116
+
117
+
118
+ def is_directory(name):
119
+ return os.path.isdir(name)
120
+
121
+
122
+ def is_file(name):
123
+ return os.path.isfile(name)
124
+
125
+
126
+ def exists(name):
127
+ return os.path.exists(name)
128
+
129
+
130
+ def list_all(root, filter_func=lambda x: True, full_path=False):
131
+ """List all entities directly under 'dir_name' that satisfy 'filter_func'
132
+
133
+ Args:
134
+ root: Name of directory to start search.
135
+ filter_func: function or lambda that takes path.
136
+ full_path: If True will return results as full path including `root`.
137
+
138
+ Returns:
139
+ list of all files or directories that satisfy the criteria.
140
+
141
+ """
142
+ if not is_directory(root):
143
+ raise Exception(f"Invalid parent directory '{root}'")
144
+ matches = [x for x in os.listdir(root) if filter_func(os.path.join(root, x))]
145
+ return [os.path.join(root, m) for m in matches] if full_path else matches
146
+
147
+
148
+ def list_subdirs(dir_name, full_path=False):
149
+ """
150
+ Equivalent to UNIX command:
151
+ ``find $dir_name -depth 1 -type d``
152
+
153
+ Args:
154
+ dir_name: Name of directory to start search.
155
+ full_path: If True will return results as full path including `root`.
156
+
157
+ Returns:
158
+ list of all directories directly under 'dir_name'.
159
+ """
160
+ return list_all(dir_name, os.path.isdir, full_path)
161
+
162
+
163
+ def list_files(dir_name, full_path=False):
164
+ """
165
+ Equivalent to UNIX command:
166
+ ``find $dir_name -depth 1 -type f``
167
+
168
+ Args:
169
+ dir_name: Name of directory to start search.
170
+ full_path: If True will return results as full path including `root`.
171
+
172
+ Returns:
173
+ list of all files directly under 'dir_name'.
174
+ """
175
+ return list_all(dir_name, os.path.isfile, full_path)
176
+
177
+
178
+ def find(root, name, full_path=False):
179
+ """Search for a file in a root directory. Equivalent to:
180
+ ``find $root -name "$name" -depth 1``
181
+
182
+ Args:
183
+ root: Name of root directory for find.
184
+ name: Name of file or directory to find directly under root directory.
185
+ full_path: If True will return results as full path including `root`.
186
+
187
+ Returns:
188
+ list of matching files or directories.
189
+ """
190
+ path_name = os.path.join(root, name)
191
+ return list_all(root, lambda x: x == path_name, full_path)
192
+
193
+
194
+ def mkdir(root, name=None):
195
+ """Make directory with name "root/name", or just "root" if name is None.
196
+
197
+ Args:
198
+ root: Name of parent directory.
199
+ name: Optional name of leaf directory.
200
+
201
+ Returns:
202
+ Path to created directory.
203
+ """
204
+ target = os.path.join(root, name) if name is not None else root
205
+ try:
206
+ os.makedirs(target, exist_ok=True)
207
+ except OSError as e:
208
+ if e.errno != errno.EEXIST or not os.path.isdir(target):
209
+ raise e
210
+ return target
211
+
212
+
213
+ def make_containing_dirs(path):
214
+ """
215
+ Create the base directory for a given file path if it does not exist; also creates parent
216
+ directories.
217
+ """
218
+ dir_name = os.path.dirname(path)
219
+ if not os.path.exists(dir_name):
220
+ os.makedirs(dir_name)
221
+
222
+
223
+ def read_parquet_as_pandas_df(data_parquet_path: str):
224
+ """Deserialize and load the specified parquet file as a Pandas DataFrame.
225
+
226
+ Args:
227
+ data_parquet_path: String, path object (implementing os.PathLike[str]),
228
+ or file-like object implementing a binary read() function. The string
229
+ could be a URL. Valid URL schemes include http, ftp, s3, gs, and file.
230
+ For file URLs, a host is expected. A local file could
231
+ be: file://localhost/path/to/table.parquet. A file URL can also be a path to a
232
+ directory that contains multiple partitioned parquet files. Pyarrow
233
+ support paths to directories as well as file URLs. A directory
234
+ path could be: file://localhost/path/to/tables or s3://bucket/partition_dir.
235
+
236
+ Returns:
237
+ pandas dataframe
238
+ """
239
+ import pandas as pd
240
+
241
+ return pd.read_parquet(data_parquet_path, engine="pyarrow")
242
+
243
+
244
+ def write_pandas_df_as_parquet(df, data_parquet_path: str):
245
+ """Write a DataFrame to the binary parquet format.
246
+
247
+ Args:
248
+ df: pandas data frame.
249
+ data_parquet_path: String, path object (implementing os.PathLike[str]),
250
+ or file-like object implementing a binary write() function.
251
+
252
+ """
253
+ df.to_parquet(data_parquet_path, engine="pyarrow")
254
+
255
+
256
+ class TempDir:
257
+ def __init__(self, chdr=False, remove_on_exit=True):
258
+ self._dir = None
259
+ self._path = None
260
+ self._chdr = chdr
261
+ self._remove = remove_on_exit
262
+
263
+ def __enter__(self):
264
+ self._path = os.path.abspath(create_tmp_dir())
265
+ assert os.path.exists(self._path)
266
+ if self._chdr:
267
+ self._dir = os.path.abspath(os.getcwd())
268
+ os.chdir(self._path)
269
+ return self
270
+
271
+ def __exit__(self, tp, val, traceback):
272
+ if self._chdr and self._dir:
273
+ os.chdir(self._dir)
274
+ self._dir = None
275
+ if self._remove and os.path.exists(self._path):
276
+ shutil.rmtree(self._path)
277
+
278
+ assert not self._remove or not os.path.exists(self._path)
279
+
280
+ def path(self, *path):
281
+ return os.path.join("./", *path) if self._chdr else os.path.join(self._path, *path)
282
+
283
+
284
+ def read_file_lines(parent_path, file_name):
285
+ """Return the contents of the file as an array where each element is a separate line.
286
+
287
+ Args:
288
+ parent_path: Full path to the directory that contains the file.
289
+ file_name: Leaf file name.
290
+
291
+ Returns:
292
+ All lines in the file as an array.
293
+
294
+ """
295
+ file_path = os.path.join(parent_path, file_name)
296
+ with codecs.open(file_path, mode="r", encoding=ENCODING) as f:
297
+ return f.readlines()
298
+
299
+
300
+ def read_file(parent_path, file_name):
301
+ """Return the contents of the file.
302
+
303
+ Args:
304
+ parent_path: Full path to the directory that contains the file.
305
+ file_name: Leaf file name.
306
+
307
+ Returns:
308
+ The contents of the file.
309
+
310
+ """
311
+ file_path = os.path.join(parent_path, file_name)
312
+ with codecs.open(file_path, mode="r", encoding=ENCODING) as f:
313
+ return f.read()
314
+
315
+
316
+ def get_file_info(path, rel_path):
317
+ """Returns file meta data : location, size, ... etc
318
+
319
+ Args:
320
+ path: Path to artifact.
321
+ rel_path: Relative path.
322
+
323
+ Returns:
324
+ `FileInfo` object
325
+ """
326
+ if is_directory(path):
327
+ return FileInfo(rel_path, True, None)
328
+ else:
329
+ return FileInfo(rel_path, False, os.path.getsize(path))
330
+
331
+
332
+ def get_relative_path(root_path, target_path):
333
+ """Remove root path common prefix and return part of `path` relative to `root_path`.
334
+
335
+ Args:
336
+ root_path: Root path.
337
+ target_path: Desired path for common prefix removal.
338
+
339
+ Returns:
340
+ Path relative to root_path.
341
+ """
342
+ if len(root_path) > len(target_path):
343
+ raise Exception(f"Root path '{root_path}' longer than target path '{target_path}'")
344
+ common_prefix = os.path.commonprefix([root_path, target_path])
345
+ return os.path.relpath(target_path, common_prefix)
346
+
347
+
348
+ def mv(target, new_parent):
349
+ shutil.move(target, new_parent)
350
+
351
+
352
+ def write_to(filename, data):
353
+ with codecs.open(filename, mode="w", encoding=ENCODING) as handle:
354
+ handle.write(data)
355
+
356
+
357
+ def append_to(filename, data):
358
+ with open(filename, "a") as handle:
359
+ handle.write(data)
360
+
361
+
362
+ def make_tarfile(output_filename, source_dir, archive_name, custom_filter=None):
363
+ # Helper for filtering out modification timestamps
364
+ def _filter_timestamps(tar_info):
365
+ tar_info.mtime = 0
366
+ return tar_info if custom_filter is None else custom_filter(tar_info)
367
+
368
+ unzipped_file_handle, unzipped_filename = tempfile.mkstemp()
369
+ try:
370
+ with tarfile.open(unzipped_filename, "w") as tar:
371
+ tar.add(source_dir, arcname=archive_name, filter=_filter_timestamps)
372
+ # When gzipping the tar, don't include the tar's filename or modification time in the
373
+ # zipped archive (see https://docs.python.org/3/library/gzip.html#gzip.GzipFile)
374
+ with (
375
+ gzip.GzipFile(
376
+ filename="", fileobj=open(output_filename, "wb"), mode="wb", mtime=0
377
+ ) as gzipped_tar,
378
+ open(unzipped_filename, "rb") as tar,
379
+ ):
380
+ gzipped_tar.write(tar.read())
381
+ finally:
382
+ os.close(unzipped_file_handle)
383
+
384
+
385
+ def _copy_project(src_path, dst_path=""):
386
+ """Internal function used to copy MLflow project during development.
387
+
388
+ Copies the content of the whole directory tree except patterns defined in .dockerignore.
389
+ The MLflow is assumed to be accessible as a local directory in this case.
390
+
391
+ Args:
392
+ src_path: Path to the original MLflow project
393
+ dst_path: MLflow will be copied here
394
+
395
+ Returns:
396
+ Name of the MLflow project directory.
397
+ """
398
+
399
+ def _docker_ignore(mlflow_root):
400
+ docker_ignore = os.path.join(mlflow_root, ".dockerignore")
401
+ patterns = []
402
+ if os.path.exists(docker_ignore):
403
+ with open(docker_ignore) as f:
404
+ patterns = [x.strip() for x in f.readlines()]
405
+
406
+ def ignore(_, names):
407
+ res = set()
408
+ for p in patterns:
409
+ res.update(set(fnmatch.filter(names, p)))
410
+ return list(res)
411
+
412
+ return ignore if patterns else None
413
+
414
+ mlflow_dir = "mlflow-project"
415
+ # check if we have project root
416
+ assert os.path.isfile(os.path.join(src_path, "pyproject.toml")), "file not found " + str(
417
+ os.path.abspath(os.path.join(src_path, "pyproject.toml"))
418
+ )
419
+ shutil.copytree(src_path, os.path.join(dst_path, mlflow_dir), ignore=_docker_ignore(src_path))
420
+ return mlflow_dir
421
+
422
+
423
+ def _copy_file_or_tree(src, dst, dst_dir=None):
424
+ """
425
+ Returns:
426
+ The path to the copied artifacts, relative to `dst`.
427
+ """
428
+ dst_subpath = os.path.basename(os.path.abspath(src))
429
+ if dst_dir is not None:
430
+ dst_subpath = os.path.join(dst_dir, dst_subpath)
431
+ dst_path = os.path.join(dst, dst_subpath)
432
+ if os.path.isfile(src):
433
+ dst_dirpath = os.path.dirname(dst_path)
434
+ if not os.path.exists(dst_dirpath):
435
+ os.makedirs(dst_dirpath)
436
+ shutil.copy(src=src, dst=dst_path)
437
+ else:
438
+ shutil.copytree(src=src, dst=dst_path, ignore=shutil.ignore_patterns("__pycache__"))
439
+ return dst_subpath
440
+
441
+
442
+ def _get_local_project_dir_size(project_path):
443
+ """Internal function for reporting the size of a local project directory before copying to
444
+ destination for cli logging reporting to stdout.
445
+
446
+ Args:
447
+ project_path: local path of the project directory
448
+
449
+ Returns:
450
+ directory file sizes in KB, rounded to single decimal point for legibility
451
+ """
452
+
453
+ total_size = 0
454
+ for root, _, files in os.walk(project_path):
455
+ for f in files:
456
+ path = os.path.join(root, f)
457
+ total_size += os.path.getsize(path)
458
+ return round(total_size / 1024.0, 1)
459
+
460
+
461
+ def _get_local_file_size(file):
462
+ """
463
+ Get the size of a local file in KB
464
+ """
465
+ return round(os.path.getsize(file) / 1024.0, 1)
466
+
467
+
468
+ def get_parent_dir(path):
469
+ return os.path.abspath(os.path.join(path, os.pardir))
470
+
471
+
472
+ def relative_path_to_artifact_path(path):
473
+ if os.path == posixpath:
474
+ return path
475
+ if os.path.abspath(path) == path:
476
+ raise Exception("This method only works with relative paths.")
477
+ return unquote(pathname2url(path))
478
+
479
+
480
+ def path_to_local_file_uri(path):
481
+ """
482
+ Convert local filesystem path to local file uri.
483
+ """
484
+ return pathlib.Path(os.path.abspath(path)).as_uri()
485
+
486
+
487
+ def path_to_local_sqlite_uri(path):
488
+ """
489
+ Convert local filesystem path to sqlite uri.
490
+ """
491
+ path = posixpath.abspath(pathname2url(os.path.abspath(path)))
492
+ prefix = "sqlite://" if sys.platform == "win32" else "sqlite:///"
493
+ return prefix + path
494
+
495
+
496
+ def local_file_uri_to_path(uri):
497
+ """
498
+ Convert URI to local filesystem path.
499
+ No-op if the uri does not have the expected scheme.
500
+ """
501
+ path = uri
502
+ if uri.startswith("file:"):
503
+ parsed_path = urllib.parse.urlparse(uri)
504
+ path = parsed_path.path
505
+ # Fix for retaining server name in UNC path.
506
+ if is_windows() and parsed_path.netloc:
507
+ return urllib.request.url2pathname(rf"\\{parsed_path.netloc}{path}")
508
+ return urllib.request.url2pathname(path)
509
+
510
+
511
+ def get_local_path_or_none(path_or_uri):
512
+ """Check if the argument is a local path (no scheme or file:///) and return local path if true,
513
+ None otherwise.
514
+ """
515
+ parsed_uri = urllib.parse.urlparse(path_or_uri)
516
+ if len(parsed_uri.scheme) == 0 or parsed_uri.scheme == "file" and len(parsed_uri.netloc) == 0:
517
+ return local_file_uri_to_path(path_or_uri)
518
+ else:
519
+ return None
520
+
521
+
522
+ def yield_file_in_chunks(file, chunk_size=100000000):
523
+ """
524
+ Generator to chunk-ify the inputted file based on the chunk-size.
525
+ """
526
+ with open(file, "rb") as f:
527
+ while True:
528
+ chunk = f.read(chunk_size)
529
+ if chunk:
530
+ yield chunk
531
+ else:
532
+ break
533
+
534
+
535
+ def download_file_using_http_uri(http_uri, download_path, chunk_size=100000000, headers=None):
536
+ """
537
+ Downloads a file specified using the `http_uri` to a local `download_path`. This function
538
+ uses a `chunk_size` to ensure an OOM error is not raised a large file is downloaded.
539
+
540
+ Note : This function is meant to download files using presigned urls from various cloud
541
+ providers.
542
+ """
543
+ if headers is None:
544
+ headers = {}
545
+ with cloud_storage_http_request("get", http_uri, stream=True, headers=headers) as response:
546
+ augmented_raise_for_status(response)
547
+ with open(download_path, "wb") as output_file:
548
+ for chunk in response.iter_content(chunk_size=chunk_size):
549
+ if not chunk:
550
+ break
551
+ output_file.write(chunk)
552
+
553
+
554
+ @dataclass(frozen=True)
555
+ class _Chunk:
556
+ index: int
557
+ start: int
558
+ end: int
559
+ path: str
560
+
561
+
562
+ def _yield_chunks(path, file_size, chunk_size):
563
+ num_requests = int(math.ceil(file_size / float(chunk_size)))
564
+ for i in range(num_requests):
565
+ range_start = i * chunk_size
566
+ range_end = min(range_start + chunk_size - 1, file_size - 1)
567
+ yield _Chunk(i, range_start, range_end, path)
568
+
569
+
570
+ def parallelized_download_file_using_http_uri(
571
+ thread_pool_executor,
572
+ http_uri,
573
+ download_path,
574
+ remote_file_path,
575
+ file_size,
576
+ uri_type,
577
+ chunk_size,
578
+ env,
579
+ headers=None,
580
+ ):
581
+ """
582
+ Downloads a file specified using the `http_uri` to a local `download_path`. This function
583
+ sends multiple requests in parallel each specifying its own desired byte range as a header,
584
+ then reconstructs the file from the downloaded chunks. This allows for downloads of large files
585
+ without OOM risk.
586
+
587
+ Note : This function is meant to download files using presigned urls from various cloud
588
+ providers.
589
+ Returns a dict of chunk index : exception, if one was thrown for that index.
590
+ """
591
+
592
+ def run_download(chunk: _Chunk):
593
+ try:
594
+ subprocess.run(
595
+ [
596
+ sys.executable,
597
+ download_cloud_file_chunk.__file__,
598
+ "--range-start",
599
+ str(chunk.start),
600
+ "--range-end",
601
+ str(chunk.end),
602
+ "--headers",
603
+ json.dumps(headers or {}),
604
+ "--download-path",
605
+ download_path,
606
+ "--http-uri",
607
+ http_uri,
608
+ ],
609
+ text=True,
610
+ check=True,
611
+ capture_output=True,
612
+ timeout=MLFLOW_DOWNLOAD_CHUNK_TIMEOUT.get(),
613
+ env=env,
614
+ )
615
+ except (TimeoutExpired, CalledProcessError) as e:
616
+ raise MlflowException(
617
+ f"""
618
+ ----- stdout -----
619
+ {e.stdout.strip()}
620
+
621
+ ----- stderr -----
622
+ {e.stderr.strip()}
623
+ """
624
+ ) from e
625
+
626
+ chunks = _yield_chunks(remote_file_path, file_size, chunk_size)
627
+ # Create file if it doesn't exist or erase the contents if it does. We should do this here
628
+ # before sending to the workers so they can each individually seek to their respective positions
629
+ # and write chunks without overwriting.
630
+ with open(download_path, "w"):
631
+ pass
632
+ if uri_type == ArtifactCredentialType.GCP_SIGNED_URL or uri_type is None:
633
+ chunk = next(chunks)
634
+ # GCP files could be transcoded, in which case the range header is ignored.
635
+ # Test if this is the case by downloading one chunk and seeing if it's larger than the
636
+ # requested size. If yes, let that be the file; if not, continue downloading more chunks.
637
+ download_chunk(
638
+ range_start=chunk.start,
639
+ range_end=chunk.end,
640
+ headers=headers,
641
+ download_path=download_path,
642
+ http_uri=http_uri,
643
+ )
644
+ downloaded_size = os.path.getsize(download_path)
645
+ # If downloaded size was equal to the chunk size it would have been downloaded serially,
646
+ # so we don't need to consider this here
647
+ if downloaded_size > chunk_size:
648
+ return {}
649
+
650
+ futures = {thread_pool_executor.submit(run_download, chunk): chunk for chunk in chunks}
651
+ failed_downloads = {}
652
+ with ArtifactProgressBar.chunks(file_size, f"Downloading {download_path}", chunk_size) as pbar:
653
+ for future in as_completed(futures):
654
+ chunk = futures[future]
655
+ try:
656
+ future.result()
657
+ except Exception as e:
658
+ _logger.debug(
659
+ f"Failed to download chunk {chunk.index} for {chunk.path}: {e}. "
660
+ f"The download of this chunk will be retried later."
661
+ )
662
+ failed_downloads[chunk] = future.exception()
663
+ else:
664
+ pbar.update()
665
+
666
+ return failed_downloads
667
+
668
+
669
+ def download_chunk_retries(*, chunks, http_uri, headers, download_path):
670
+ num_retries = _MLFLOW_MPD_NUM_RETRIES.get()
671
+ interval = _MLFLOW_MPD_RETRY_INTERVAL_SECONDS.get()
672
+ for chunk in chunks:
673
+ _logger.info(f"Retrying download of chunk {chunk.index} for {chunk.path}")
674
+ for retry in range(num_retries):
675
+ try:
676
+ download_chunk(
677
+ range_start=chunk.start,
678
+ range_end=chunk.end,
679
+ headers=headers,
680
+ download_path=download_path,
681
+ http_uri=http_uri,
682
+ )
683
+ _logger.info(f"Successfully downloaded chunk {chunk.index} for {chunk.path}")
684
+ break
685
+ except Exception:
686
+ if retry == num_retries - 1:
687
+ raise
688
+ time.sleep(interval)
689
+
690
+
691
+ def _handle_readonly_on_windows(func, path, exc_info):
692
+ """
693
+ This function should not be called directly but should be passed to `onerror` of
694
+ `shutil.rmtree` in order to reattempt the removal of a read-only file after making
695
+ it writable on Windows.
696
+
697
+ References:
698
+ - https://bugs.python.org/issue19643
699
+ - https://bugs.python.org/issue43657
700
+ """
701
+ exc_type, exc_value = exc_info[:2]
702
+ should_reattempt = (
703
+ is_windows()
704
+ and func in (os.unlink, os.rmdir)
705
+ and issubclass(exc_type, PermissionError)
706
+ and exc_value.winerror == 5
707
+ )
708
+ if not should_reattempt:
709
+ raise exc_value
710
+ os.chmod(path, stat.S_IWRITE)
711
+ func(path)
712
+
713
+
714
+ def _get_tmp_dir():
715
+ from mlflow.utils.databricks_utils import get_repl_id, is_in_databricks_runtime
716
+
717
+ if is_in_databricks_runtime():
718
+ try:
719
+ return get_databricks_local_temp_dir()
720
+ except Exception:
721
+ pass
722
+
723
+ if repl_id := get_repl_id():
724
+ return os.path.join("/tmp", "repl_tmp_data", repl_id)
725
+
726
+ return None
727
+
728
+
729
+ def create_tmp_dir():
730
+ if directory := _get_tmp_dir():
731
+ os.makedirs(directory, exist_ok=True)
732
+ return tempfile.mkdtemp(dir=directory)
733
+
734
+ return tempfile.mkdtemp()
735
+
736
+
737
+ @cache_return_value_per_process
738
+ def get_or_create_tmp_dir():
739
+ """
740
+ Get or create a temporary directory which will be removed once python process exit.
741
+ """
742
+ from mlflow.utils.databricks_utils import get_repl_id, is_in_databricks_runtime
743
+
744
+ if is_in_databricks_runtime() and get_repl_id() is not None:
745
+ # Note: For python process attached to databricks notebook, atexit does not work.
746
+ # The directory returned by `get_databricks_local_tmp_dir`
747
+ # will be removed once databricks notebook detaches.
748
+ # The temp directory is designed to be used by all kinds of applications,
749
+ # so create a child directory "mlflow" for storing mlflow temp data.
750
+ try:
751
+ repl_local_tmp_dir = get_databricks_local_temp_dir()
752
+ except Exception:
753
+ repl_local_tmp_dir = os.path.join("/tmp", "repl_tmp_data", get_repl_id())
754
+
755
+ tmp_dir = os.path.join(repl_local_tmp_dir, "mlflow")
756
+ os.makedirs(tmp_dir, exist_ok=True)
757
+ else:
758
+ tmp_dir = tempfile.mkdtemp()
759
+ # mkdtemp creates a directory with permission 0o700
760
+ # change it to be 0o777 to ensure it can be seen in spark UDF
761
+ os.chmod(tmp_dir, 0o777)
762
+ atexit.register(shutil.rmtree, tmp_dir, ignore_errors=True)
763
+
764
+ return tmp_dir
765
+
766
+
767
+ @cache_return_value_per_process
768
+ def get_or_create_nfs_tmp_dir():
769
+ """
770
+ Get or create a temporary NFS directory which will be removed once python process exit.
771
+ """
772
+ from mlflow.utils.databricks_utils import get_repl_id, is_in_databricks_runtime
773
+ from mlflow.utils.nfs_on_spark import get_nfs_cache_root_dir
774
+
775
+ nfs_root_dir = get_nfs_cache_root_dir()
776
+
777
+ if is_in_databricks_runtime() and get_repl_id() is not None:
778
+ # Note: In databricks, atexit hook does not work.
779
+ # The directory returned by `get_databricks_nfs_tmp_dir`
780
+ # will be removed once databricks notebook detaches.
781
+ # The temp directory is designed to be used by all kinds of applications,
782
+ # so create a child directory "mlflow" for storing mlflow temp data.
783
+ try:
784
+ repl_nfs_tmp_dir = get_databricks_nfs_temp_dir()
785
+ except Exception:
786
+ repl_nfs_tmp_dir = os.path.join(nfs_root_dir, "repl_tmp_data", get_repl_id())
787
+
788
+ tmp_nfs_dir = os.path.join(repl_nfs_tmp_dir, "mlflow")
789
+ os.makedirs(tmp_nfs_dir, exist_ok=True)
790
+ else:
791
+ tmp_nfs_dir = tempfile.mkdtemp(dir=nfs_root_dir)
792
+ # mkdtemp creates a directory with permission 0o700
793
+ # change it to be 0o777 to ensure it can be seen in spark UDF
794
+ os.chmod(tmp_nfs_dir, 0o777)
795
+ atexit.register(shutil.rmtree, tmp_nfs_dir, ignore_errors=True)
796
+
797
+ return tmp_nfs_dir
798
+
799
+
800
+ def write_spark_dataframe_to_parquet_on_local_disk(spark_df, output_path):
801
+ """Write spark dataframe in parquet format to local disk.
802
+
803
+ Args:
804
+ spark_df: Spark dataframe.
805
+ output_path: Path to write the data to.
806
+
807
+ """
808
+ from mlflow.utils.databricks_utils import is_in_databricks_runtime
809
+
810
+ if is_in_databricks_runtime():
811
+ dbfs_path = os.path.join(".mlflow", "cache", str(uuid.uuid4()))
812
+ spark_df.coalesce(1).write.format("parquet").save(dbfs_path)
813
+ shutil.copytree("/dbfs/" + dbfs_path, output_path)
814
+ shutil.rmtree("/dbfs/" + dbfs_path)
815
+ else:
816
+ spark_df.coalesce(1).write.format("parquet").save(output_path)
817
+
818
+
819
+ def shutil_copytree_without_file_permissions(src_dir, dst_dir):
820
+ """
821
+ Copies the directory src_dir into dst_dir, without preserving filesystem permissions
822
+ """
823
+ for dirpath, dirnames, filenames in os.walk(src_dir):
824
+ for dirname in dirnames:
825
+ relative_dir_path = os.path.relpath(os.path.join(dirpath, dirname), src_dir)
826
+ # For each directory <dirname> immediately under <dirpath>, create an equivalently-named
827
+ # directory under the destination directory
828
+ abs_dir_path = os.path.join(dst_dir, relative_dir_path)
829
+ os.mkdir(abs_dir_path)
830
+ for filename in filenames:
831
+ # For each file with name <filename> immediately under <dirpath>, copy that file to
832
+ # the appropriate location in the destination directory
833
+ file_path = os.path.join(dirpath, filename)
834
+ relative_file_path = os.path.relpath(file_path, src_dir)
835
+ abs_file_path = os.path.join(dst_dir, relative_file_path)
836
+ shutil.copy2(file_path, abs_file_path)
837
+
838
+
839
+ def contains_path_separator(path):
840
+ """
841
+ Returns True if a path contains a path separator, False otherwise.
842
+ """
843
+ return any((sep in path) for sep in (os.path.sep, os.path.altsep) if sep is not None)
844
+
845
+
846
+ def contains_percent(path):
847
+ """
848
+ Returns True if a path contains a percent character, False otherwise.
849
+ """
850
+ return "%" in path
851
+
852
+
853
+ def read_chunk(path: os.PathLike, size: int, start_byte: int = 0) -> bytes:
854
+ """Read a chunk of bytes from a file.
855
+
856
+ Args:
857
+ path: Path to the file.
858
+ size: The size of the chunk.
859
+ start_byte: The start byte of the chunk.
860
+
861
+ Returns:
862
+ The chunk of bytes.
863
+
864
+ """
865
+ with open(path, "rb") as f:
866
+ if start_byte > 0:
867
+ f.seek(start_byte)
868
+ return f.read(size)
869
+
870
+
871
+ @contextmanager
872
+ def remove_on_error(path: os.PathLike, onerror=None):
873
+ """A context manager that removes a file or directory if an exception is raised during
874
+ execution.
875
+
876
+ Args:
877
+ path: Path to the file or directory.
878
+ onerror: A callback function that will be called with the captured exception before
879
+ the file or directory is removed. For example, you can use this callback to
880
+ log the exception.
881
+
882
+ """
883
+ try:
884
+ yield
885
+ except Exception as e:
886
+ if onerror:
887
+ onerror(e)
888
+ if os.path.exists(path):
889
+ if os.path.isfile(path):
890
+ os.remove(path)
891
+ elif os.path.isdir(path):
892
+ shutil.rmtree(path)
893
+ _logger.warning(
894
+ f"Failed to remove {path}" if os.path.exists(path) else f"Successfully removed {path}"
895
+ )
896
+ raise
897
+
898
+
899
+ @contextmanager
900
+ def chdir(path: str) -> None:
901
+ """Temporarily change the current working directory to the specified path.
902
+
903
+ Args:
904
+ path: The path to use as the temporary working directory.
905
+ """
906
+ cwd = os.getcwd()
907
+ try:
908
+ os.chdir(path)
909
+ yield
910
+ finally:
911
+ os.chdir(cwd)
912
+
913
+
914
+ def get_total_file_size(path: Union[str, pathlib.Path]) -> Optional[int]:
915
+ """Return the size of all files under given path, including files in subdirectories.
916
+
917
+ Args:
918
+ path: The absolute path of a local directory.
919
+
920
+ Returns:
921
+ size in bytes.
922
+
923
+ """
924
+ try:
925
+ if isinstance(path, pathlib.Path):
926
+ path = str(path)
927
+ if not os.path.exists(path):
928
+ raise MlflowException(
929
+ message=f"The given {path} does not exist.", error_code=INVALID_PARAMETER_VALUE
930
+ )
931
+ if not os.path.isdir(path):
932
+ raise MlflowException(
933
+ message=f"The given {path} is not a directory.", error_code=INVALID_PARAMETER_VALUE
934
+ )
935
+
936
+ total_size = 0
937
+ for cur_path, dirs, files in os.walk(path):
938
+ full_paths = [os.path.join(cur_path, file) for file in files]
939
+ total_size += sum([os.path.getsize(file) for file in full_paths])
940
+ return total_size
941
+ except Exception as e:
942
+ _logger.info(f"Failed to get the total size of {path} because of error :{e}")
943
+ return None
944
+
945
+
946
+ def write_yaml(
947
+ root: str,
948
+ file_name: str,
949
+ data: dict[str, Any],
950
+ overwrite: bool = False,
951
+ sort_keys: bool = True,
952
+ ensure_yaml_extension: bool = True,
953
+ ) -> None:
954
+ """
955
+ NEVER TOUCH THIS FUNCTION. KEPT FOR BACKWARD COMPATIBILITY with
956
+ databricks-feature-engineering<=0.10.2
957
+ """
958
+ import yaml
959
+
960
+ with open(os.path.join(root, file_name), "w") as f:
961
+ yaml.safe_dump(
962
+ data,
963
+ f,
964
+ default_flow_style=False,
965
+ allow_unicode=True,
966
+ sort_keys=sort_keys,
967
+ )
968
+
969
+
970
+ def read_yaml(root: str, file_name: str) -> dict[str, Any]:
971
+ """
972
+ NEVER TOUCH THIS FUNCTION. KEPT FOR BACKWARD COMPATIBILITY with
973
+ databricks-feature-engineering<=0.10.2
974
+ """
975
+ import yaml
976
+
977
+ with open(os.path.join(root, file_name)) as f:
978
+ return yaml.safe_load(f)