genesis-flow 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (645) hide show
  1. genesis_flow-1.0.0.dist-info/METADATA +822 -0
  2. genesis_flow-1.0.0.dist-info/RECORD +645 -0
  3. genesis_flow-1.0.0.dist-info/WHEEL +5 -0
  4. genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
  5. genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
  6. genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
  7. mlflow/__init__.py +367 -0
  8. mlflow/__main__.py +3 -0
  9. mlflow/ag2/__init__.py +56 -0
  10. mlflow/ag2/ag2_logger.py +294 -0
  11. mlflow/anthropic/__init__.py +40 -0
  12. mlflow/anthropic/autolog.py +129 -0
  13. mlflow/anthropic/chat.py +144 -0
  14. mlflow/artifacts/__init__.py +268 -0
  15. mlflow/autogen/__init__.py +144 -0
  16. mlflow/autogen/chat.py +142 -0
  17. mlflow/azure/__init__.py +26 -0
  18. mlflow/azure/auth_handler.py +257 -0
  19. mlflow/azure/client.py +319 -0
  20. mlflow/azure/config.py +120 -0
  21. mlflow/azure/connection_factory.py +340 -0
  22. mlflow/azure/exceptions.py +27 -0
  23. mlflow/azure/stores.py +327 -0
  24. mlflow/azure/utils.py +183 -0
  25. mlflow/bedrock/__init__.py +45 -0
  26. mlflow/bedrock/_autolog.py +202 -0
  27. mlflow/bedrock/chat.py +122 -0
  28. mlflow/bedrock/stream.py +160 -0
  29. mlflow/bedrock/utils.py +43 -0
  30. mlflow/cli.py +707 -0
  31. mlflow/client.py +12 -0
  32. mlflow/config/__init__.py +56 -0
  33. mlflow/crewai/__init__.py +79 -0
  34. mlflow/crewai/autolog.py +253 -0
  35. mlflow/crewai/chat.py +29 -0
  36. mlflow/data/__init__.py +75 -0
  37. mlflow/data/artifact_dataset_sources.py +170 -0
  38. mlflow/data/code_dataset_source.py +40 -0
  39. mlflow/data/dataset.py +123 -0
  40. mlflow/data/dataset_registry.py +168 -0
  41. mlflow/data/dataset_source.py +110 -0
  42. mlflow/data/dataset_source_registry.py +219 -0
  43. mlflow/data/delta_dataset_source.py +167 -0
  44. mlflow/data/digest_utils.py +108 -0
  45. mlflow/data/evaluation_dataset.py +562 -0
  46. mlflow/data/filesystem_dataset_source.py +81 -0
  47. mlflow/data/http_dataset_source.py +145 -0
  48. mlflow/data/huggingface_dataset.py +258 -0
  49. mlflow/data/huggingface_dataset_source.py +118 -0
  50. mlflow/data/meta_dataset.py +104 -0
  51. mlflow/data/numpy_dataset.py +223 -0
  52. mlflow/data/pandas_dataset.py +231 -0
  53. mlflow/data/polars_dataset.py +352 -0
  54. mlflow/data/pyfunc_dataset_mixin.py +31 -0
  55. mlflow/data/schema.py +76 -0
  56. mlflow/data/sources.py +1 -0
  57. mlflow/data/spark_dataset.py +406 -0
  58. mlflow/data/spark_dataset_source.py +74 -0
  59. mlflow/data/spark_delta_utils.py +118 -0
  60. mlflow/data/tensorflow_dataset.py +350 -0
  61. mlflow/data/uc_volume_dataset_source.py +81 -0
  62. mlflow/db.py +27 -0
  63. mlflow/dspy/__init__.py +17 -0
  64. mlflow/dspy/autolog.py +197 -0
  65. mlflow/dspy/callback.py +398 -0
  66. mlflow/dspy/constant.py +1 -0
  67. mlflow/dspy/load.py +93 -0
  68. mlflow/dspy/save.py +393 -0
  69. mlflow/dspy/util.py +109 -0
  70. mlflow/dspy/wrapper.py +226 -0
  71. mlflow/entities/__init__.py +104 -0
  72. mlflow/entities/_mlflow_object.py +52 -0
  73. mlflow/entities/assessment.py +545 -0
  74. mlflow/entities/assessment_error.py +80 -0
  75. mlflow/entities/assessment_source.py +141 -0
  76. mlflow/entities/dataset.py +92 -0
  77. mlflow/entities/dataset_input.py +51 -0
  78. mlflow/entities/dataset_summary.py +62 -0
  79. mlflow/entities/document.py +48 -0
  80. mlflow/entities/experiment.py +109 -0
  81. mlflow/entities/experiment_tag.py +35 -0
  82. mlflow/entities/file_info.py +45 -0
  83. mlflow/entities/input_tag.py +35 -0
  84. mlflow/entities/lifecycle_stage.py +35 -0
  85. mlflow/entities/logged_model.py +228 -0
  86. mlflow/entities/logged_model_input.py +26 -0
  87. mlflow/entities/logged_model_output.py +32 -0
  88. mlflow/entities/logged_model_parameter.py +46 -0
  89. mlflow/entities/logged_model_status.py +74 -0
  90. mlflow/entities/logged_model_tag.py +33 -0
  91. mlflow/entities/metric.py +200 -0
  92. mlflow/entities/model_registry/__init__.py +29 -0
  93. mlflow/entities/model_registry/_model_registry_entity.py +13 -0
  94. mlflow/entities/model_registry/model_version.py +243 -0
  95. mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
  96. mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
  97. mlflow/entities/model_registry/model_version_search.py +25 -0
  98. mlflow/entities/model_registry/model_version_stages.py +25 -0
  99. mlflow/entities/model_registry/model_version_status.py +35 -0
  100. mlflow/entities/model_registry/model_version_tag.py +35 -0
  101. mlflow/entities/model_registry/prompt.py +73 -0
  102. mlflow/entities/model_registry/prompt_version.py +244 -0
  103. mlflow/entities/model_registry/registered_model.py +175 -0
  104. mlflow/entities/model_registry/registered_model_alias.py +35 -0
  105. mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
  106. mlflow/entities/model_registry/registered_model_search.py +25 -0
  107. mlflow/entities/model_registry/registered_model_tag.py +35 -0
  108. mlflow/entities/multipart_upload.py +74 -0
  109. mlflow/entities/param.py +49 -0
  110. mlflow/entities/run.py +97 -0
  111. mlflow/entities/run_data.py +84 -0
  112. mlflow/entities/run_info.py +188 -0
  113. mlflow/entities/run_inputs.py +59 -0
  114. mlflow/entities/run_outputs.py +43 -0
  115. mlflow/entities/run_status.py +41 -0
  116. mlflow/entities/run_tag.py +36 -0
  117. mlflow/entities/source_type.py +31 -0
  118. mlflow/entities/span.py +774 -0
  119. mlflow/entities/span_event.py +96 -0
  120. mlflow/entities/span_status.py +102 -0
  121. mlflow/entities/trace.py +317 -0
  122. mlflow/entities/trace_data.py +71 -0
  123. mlflow/entities/trace_info.py +220 -0
  124. mlflow/entities/trace_info_v2.py +162 -0
  125. mlflow/entities/trace_location.py +173 -0
  126. mlflow/entities/trace_state.py +39 -0
  127. mlflow/entities/trace_status.py +68 -0
  128. mlflow/entities/view_type.py +51 -0
  129. mlflow/environment_variables.py +866 -0
  130. mlflow/evaluation/__init__.py +16 -0
  131. mlflow/evaluation/assessment.py +369 -0
  132. mlflow/evaluation/evaluation.py +411 -0
  133. mlflow/evaluation/evaluation_tag.py +61 -0
  134. mlflow/evaluation/fluent.py +48 -0
  135. mlflow/evaluation/utils.py +201 -0
  136. mlflow/exceptions.py +213 -0
  137. mlflow/experiments.py +140 -0
  138. mlflow/gemini/__init__.py +81 -0
  139. mlflow/gemini/autolog.py +186 -0
  140. mlflow/gemini/chat.py +261 -0
  141. mlflow/genai/__init__.py +71 -0
  142. mlflow/genai/datasets/__init__.py +67 -0
  143. mlflow/genai/datasets/evaluation_dataset.py +131 -0
  144. mlflow/genai/evaluation/__init__.py +3 -0
  145. mlflow/genai/evaluation/base.py +411 -0
  146. mlflow/genai/evaluation/constant.py +23 -0
  147. mlflow/genai/evaluation/utils.py +244 -0
  148. mlflow/genai/judges/__init__.py +21 -0
  149. mlflow/genai/judges/databricks.py +404 -0
  150. mlflow/genai/label_schemas/__init__.py +153 -0
  151. mlflow/genai/label_schemas/label_schemas.py +209 -0
  152. mlflow/genai/labeling/__init__.py +159 -0
  153. mlflow/genai/labeling/labeling.py +250 -0
  154. mlflow/genai/optimize/__init__.py +13 -0
  155. mlflow/genai/optimize/base.py +198 -0
  156. mlflow/genai/optimize/optimizers/__init__.py +4 -0
  157. mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
  158. mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
  159. mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
  160. mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
  161. mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
  162. mlflow/genai/optimize/types.py +75 -0
  163. mlflow/genai/optimize/util.py +30 -0
  164. mlflow/genai/prompts/__init__.py +206 -0
  165. mlflow/genai/scheduled_scorers.py +431 -0
  166. mlflow/genai/scorers/__init__.py +26 -0
  167. mlflow/genai/scorers/base.py +492 -0
  168. mlflow/genai/scorers/builtin_scorers.py +765 -0
  169. mlflow/genai/scorers/scorer_utils.py +138 -0
  170. mlflow/genai/scorers/validation.py +165 -0
  171. mlflow/genai/utils/data_validation.py +146 -0
  172. mlflow/genai/utils/enum_utils.py +23 -0
  173. mlflow/genai/utils/trace_utils.py +211 -0
  174. mlflow/groq/__init__.py +42 -0
  175. mlflow/groq/_groq_autolog.py +74 -0
  176. mlflow/johnsnowlabs/__init__.py +888 -0
  177. mlflow/langchain/__init__.py +24 -0
  178. mlflow/langchain/api_request_parallel_processor.py +330 -0
  179. mlflow/langchain/autolog.py +147 -0
  180. mlflow/langchain/chat_agent_langgraph.py +340 -0
  181. mlflow/langchain/constant.py +1 -0
  182. mlflow/langchain/constants.py +1 -0
  183. mlflow/langchain/databricks_dependencies.py +444 -0
  184. mlflow/langchain/langchain_tracer.py +597 -0
  185. mlflow/langchain/model.py +919 -0
  186. mlflow/langchain/output_parsers.py +142 -0
  187. mlflow/langchain/retriever_chain.py +153 -0
  188. mlflow/langchain/runnables.py +527 -0
  189. mlflow/langchain/utils/chat.py +402 -0
  190. mlflow/langchain/utils/logging.py +671 -0
  191. mlflow/langchain/utils/serialization.py +36 -0
  192. mlflow/legacy_databricks_cli/__init__.py +0 -0
  193. mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
  194. mlflow/legacy_databricks_cli/configure/provider.py +482 -0
  195. mlflow/litellm/__init__.py +175 -0
  196. mlflow/llama_index/__init__.py +22 -0
  197. mlflow/llama_index/autolog.py +55 -0
  198. mlflow/llama_index/chat.py +43 -0
  199. mlflow/llama_index/constant.py +1 -0
  200. mlflow/llama_index/model.py +577 -0
  201. mlflow/llama_index/pyfunc_wrapper.py +332 -0
  202. mlflow/llama_index/serialize_objects.py +188 -0
  203. mlflow/llama_index/tracer.py +561 -0
  204. mlflow/metrics/__init__.py +479 -0
  205. mlflow/metrics/base.py +39 -0
  206. mlflow/metrics/genai/__init__.py +25 -0
  207. mlflow/metrics/genai/base.py +101 -0
  208. mlflow/metrics/genai/genai_metric.py +771 -0
  209. mlflow/metrics/genai/metric_definitions.py +450 -0
  210. mlflow/metrics/genai/model_utils.py +371 -0
  211. mlflow/metrics/genai/prompt_template.py +68 -0
  212. mlflow/metrics/genai/prompts/__init__.py +0 -0
  213. mlflow/metrics/genai/prompts/v1.py +422 -0
  214. mlflow/metrics/genai/utils.py +6 -0
  215. mlflow/metrics/metric_definitions.py +619 -0
  216. mlflow/mismatch.py +34 -0
  217. mlflow/mistral/__init__.py +34 -0
  218. mlflow/mistral/autolog.py +71 -0
  219. mlflow/mistral/chat.py +135 -0
  220. mlflow/ml_package_versions.py +452 -0
  221. mlflow/models/__init__.py +97 -0
  222. mlflow/models/auth_policy.py +83 -0
  223. mlflow/models/cli.py +354 -0
  224. mlflow/models/container/__init__.py +294 -0
  225. mlflow/models/container/scoring_server/__init__.py +0 -0
  226. mlflow/models/container/scoring_server/nginx.conf +39 -0
  227. mlflow/models/dependencies_schemas.py +287 -0
  228. mlflow/models/display_utils.py +158 -0
  229. mlflow/models/docker_utils.py +211 -0
  230. mlflow/models/evaluation/__init__.py +23 -0
  231. mlflow/models/evaluation/_shap_patch.py +64 -0
  232. mlflow/models/evaluation/artifacts.py +194 -0
  233. mlflow/models/evaluation/base.py +1811 -0
  234. mlflow/models/evaluation/calibration_curve.py +109 -0
  235. mlflow/models/evaluation/default_evaluator.py +996 -0
  236. mlflow/models/evaluation/deprecated.py +23 -0
  237. mlflow/models/evaluation/evaluator_registry.py +80 -0
  238. mlflow/models/evaluation/evaluators/classifier.py +704 -0
  239. mlflow/models/evaluation/evaluators/default.py +233 -0
  240. mlflow/models/evaluation/evaluators/regressor.py +96 -0
  241. mlflow/models/evaluation/evaluators/shap.py +296 -0
  242. mlflow/models/evaluation/lift_curve.py +178 -0
  243. mlflow/models/evaluation/utils/metric.py +123 -0
  244. mlflow/models/evaluation/utils/trace.py +179 -0
  245. mlflow/models/evaluation/validation.py +434 -0
  246. mlflow/models/flavor_backend.py +93 -0
  247. mlflow/models/flavor_backend_registry.py +53 -0
  248. mlflow/models/model.py +1639 -0
  249. mlflow/models/model_config.py +150 -0
  250. mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
  251. mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
  252. mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
  253. mlflow/models/python_api.py +369 -0
  254. mlflow/models/rag_signatures.py +128 -0
  255. mlflow/models/resources.py +321 -0
  256. mlflow/models/signature.py +662 -0
  257. mlflow/models/utils.py +2054 -0
  258. mlflow/models/wheeled_model.py +280 -0
  259. mlflow/openai/__init__.py +57 -0
  260. mlflow/openai/_agent_tracer.py +364 -0
  261. mlflow/openai/api_request_parallel_processor.py +131 -0
  262. mlflow/openai/autolog.py +509 -0
  263. mlflow/openai/constant.py +1 -0
  264. mlflow/openai/model.py +824 -0
  265. mlflow/openai/utils/chat_schema.py +367 -0
  266. mlflow/optuna/__init__.py +3 -0
  267. mlflow/optuna/storage.py +646 -0
  268. mlflow/plugins/__init__.py +72 -0
  269. mlflow/plugins/base.py +358 -0
  270. mlflow/plugins/builtin/__init__.py +24 -0
  271. mlflow/plugins/builtin/pytorch_plugin.py +150 -0
  272. mlflow/plugins/builtin/sklearn_plugin.py +158 -0
  273. mlflow/plugins/builtin/transformers_plugin.py +187 -0
  274. mlflow/plugins/cli.py +321 -0
  275. mlflow/plugins/discovery.py +340 -0
  276. mlflow/plugins/manager.py +465 -0
  277. mlflow/plugins/registry.py +316 -0
  278. mlflow/plugins/templates/framework_plugin_template.py +329 -0
  279. mlflow/prompt/constants.py +20 -0
  280. mlflow/prompt/promptlab_model.py +197 -0
  281. mlflow/prompt/registry_utils.py +248 -0
  282. mlflow/promptflow/__init__.py +495 -0
  283. mlflow/protos/__init__.py +0 -0
  284. mlflow/protos/assessments_pb2.py +174 -0
  285. mlflow/protos/databricks_artifacts_pb2.py +489 -0
  286. mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
  287. mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
  288. mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
  289. mlflow/protos/databricks_pb2.py +267 -0
  290. mlflow/protos/databricks_trace_server_pb2.py +374 -0
  291. mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
  292. mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
  293. mlflow/protos/facet_feature_statistics_pb2.py +296 -0
  294. mlflow/protos/internal_pb2.py +77 -0
  295. mlflow/protos/mlflow_artifacts_pb2.py +336 -0
  296. mlflow/protos/model_registry_pb2.py +1073 -0
  297. mlflow/protos/scalapb/__init__.py +0 -0
  298. mlflow/protos/scalapb/scalapb_pb2.py +104 -0
  299. mlflow/protos/service_pb2.py +2600 -0
  300. mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
  301. mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
  302. mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
  303. mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
  304. mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
  305. mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
  306. mlflow/py.typed +0 -0
  307. mlflow/pydantic_ai/__init__.py +57 -0
  308. mlflow/pydantic_ai/autolog.py +173 -0
  309. mlflow/pyfunc/__init__.py +3844 -0
  310. mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
  311. mlflow/pyfunc/backend.py +523 -0
  312. mlflow/pyfunc/context.py +78 -0
  313. mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
  314. mlflow/pyfunc/loaders/__init__.py +7 -0
  315. mlflow/pyfunc/loaders/chat_agent.py +117 -0
  316. mlflow/pyfunc/loaders/chat_model.py +125 -0
  317. mlflow/pyfunc/loaders/code_model.py +31 -0
  318. mlflow/pyfunc/loaders/responses_agent.py +112 -0
  319. mlflow/pyfunc/mlserver.py +46 -0
  320. mlflow/pyfunc/model.py +1473 -0
  321. mlflow/pyfunc/scoring_server/__init__.py +604 -0
  322. mlflow/pyfunc/scoring_server/app.py +7 -0
  323. mlflow/pyfunc/scoring_server/client.py +146 -0
  324. mlflow/pyfunc/spark_model_cache.py +48 -0
  325. mlflow/pyfunc/stdin_server.py +44 -0
  326. mlflow/pyfunc/utils/__init__.py +3 -0
  327. mlflow/pyfunc/utils/data_validation.py +224 -0
  328. mlflow/pyfunc/utils/environment.py +22 -0
  329. mlflow/pyfunc/utils/input_converter.py +47 -0
  330. mlflow/pyfunc/utils/serving_data_parser.py +11 -0
  331. mlflow/pytorch/__init__.py +1171 -0
  332. mlflow/pytorch/_lightning_autolog.py +580 -0
  333. mlflow/pytorch/_pytorch_autolog.py +50 -0
  334. mlflow/pytorch/pickle_module.py +35 -0
  335. mlflow/rfunc/__init__.py +42 -0
  336. mlflow/rfunc/backend.py +134 -0
  337. mlflow/runs.py +89 -0
  338. mlflow/server/__init__.py +302 -0
  339. mlflow/server/auth/__init__.py +1224 -0
  340. mlflow/server/auth/__main__.py +4 -0
  341. mlflow/server/auth/basic_auth.ini +6 -0
  342. mlflow/server/auth/cli.py +11 -0
  343. mlflow/server/auth/client.py +537 -0
  344. mlflow/server/auth/config.py +34 -0
  345. mlflow/server/auth/db/__init__.py +0 -0
  346. mlflow/server/auth/db/cli.py +18 -0
  347. mlflow/server/auth/db/migrations/__init__.py +0 -0
  348. mlflow/server/auth/db/migrations/alembic.ini +110 -0
  349. mlflow/server/auth/db/migrations/env.py +76 -0
  350. mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
  351. mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
  352. mlflow/server/auth/db/models.py +67 -0
  353. mlflow/server/auth/db/utils.py +37 -0
  354. mlflow/server/auth/entities.py +165 -0
  355. mlflow/server/auth/logo.py +14 -0
  356. mlflow/server/auth/permissions.py +65 -0
  357. mlflow/server/auth/routes.py +18 -0
  358. mlflow/server/auth/sqlalchemy_store.py +263 -0
  359. mlflow/server/graphql/__init__.py +0 -0
  360. mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
  361. mlflow/server/graphql/graphql_custom_scalars.py +24 -0
  362. mlflow/server/graphql/graphql_errors.py +15 -0
  363. mlflow/server/graphql/graphql_no_batching.py +89 -0
  364. mlflow/server/graphql/graphql_schema_extensions.py +74 -0
  365. mlflow/server/handlers.py +3217 -0
  366. mlflow/server/prometheus_exporter.py +17 -0
  367. mlflow/server/validation.py +30 -0
  368. mlflow/shap/__init__.py +691 -0
  369. mlflow/sklearn/__init__.py +1994 -0
  370. mlflow/sklearn/utils.py +1041 -0
  371. mlflow/smolagents/__init__.py +66 -0
  372. mlflow/smolagents/autolog.py +139 -0
  373. mlflow/smolagents/chat.py +29 -0
  374. mlflow/store/__init__.py +10 -0
  375. mlflow/store/_unity_catalog/__init__.py +1 -0
  376. mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
  377. mlflow/store/_unity_catalog/lineage/constants.py +2 -0
  378. mlflow/store/_unity_catalog/registry/__init__.py +6 -0
  379. mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
  380. mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
  381. mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
  382. mlflow/store/_unity_catalog/registry/utils.py +121 -0
  383. mlflow/store/artifact/__init__.py +0 -0
  384. mlflow/store/artifact/artifact_repo.py +472 -0
  385. mlflow/store/artifact/artifact_repository_registry.py +154 -0
  386. mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
  387. mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
  388. mlflow/store/artifact/cli.py +141 -0
  389. mlflow/store/artifact/cloud_artifact_repo.py +332 -0
  390. mlflow/store/artifact/databricks_artifact_repo.py +729 -0
  391. mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
  392. mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
  393. mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
  394. mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
  395. mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
  396. mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
  397. mlflow/store/artifact/ftp_artifact_repo.py +132 -0
  398. mlflow/store/artifact/gcs_artifact_repo.py +296 -0
  399. mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
  400. mlflow/store/artifact/http_artifact_repo.py +218 -0
  401. mlflow/store/artifact/local_artifact_repo.py +142 -0
  402. mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
  403. mlflow/store/artifact/models_artifact_repo.py +259 -0
  404. mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
  405. mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
  406. mlflow/store/artifact/r2_artifact_repo.py +70 -0
  407. mlflow/store/artifact/runs_artifact_repo.py +265 -0
  408. mlflow/store/artifact/s3_artifact_repo.py +330 -0
  409. mlflow/store/artifact/sftp_artifact_repo.py +141 -0
  410. mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
  411. mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
  412. mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
  413. mlflow/store/artifact/utils/__init__.py +0 -0
  414. mlflow/store/artifact/utils/models.py +148 -0
  415. mlflow/store/db/__init__.py +0 -0
  416. mlflow/store/db/base_sql_model.py +3 -0
  417. mlflow/store/db/db_types.py +10 -0
  418. mlflow/store/db/utils.py +314 -0
  419. mlflow/store/db_migrations/__init__.py +0 -0
  420. mlflow/store/db_migrations/alembic.ini +74 -0
  421. mlflow/store/db_migrations/env.py +84 -0
  422. mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
  423. mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
  424. mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
  425. mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
  426. mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
  427. mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
  428. mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
  429. mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
  430. mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
  431. mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
  432. mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
  433. mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
  434. mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
  435. mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
  436. mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
  437. mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
  438. mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
  439. mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
  440. mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
  441. mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
  442. mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
  443. mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
  444. mlflow/store/db_migrations/versions/__init__.py +0 -0
  445. mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
  446. mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
  447. mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
  448. mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
  449. mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
  450. mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
  451. mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
  452. mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
  453. mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
  454. mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
  455. mlflow/store/entities/__init__.py +3 -0
  456. mlflow/store/entities/paged_list.py +18 -0
  457. mlflow/store/model_registry/__init__.py +10 -0
  458. mlflow/store/model_registry/abstract_store.py +1081 -0
  459. mlflow/store/model_registry/base_rest_store.py +44 -0
  460. mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
  461. mlflow/store/model_registry/dbmodels/__init__.py +0 -0
  462. mlflow/store/model_registry/dbmodels/models.py +206 -0
  463. mlflow/store/model_registry/file_store.py +1091 -0
  464. mlflow/store/model_registry/rest_store.py +481 -0
  465. mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
  466. mlflow/store/tracking/__init__.py +23 -0
  467. mlflow/store/tracking/abstract_store.py +816 -0
  468. mlflow/store/tracking/dbmodels/__init__.py +0 -0
  469. mlflow/store/tracking/dbmodels/initial_models.py +243 -0
  470. mlflow/store/tracking/dbmodels/models.py +1073 -0
  471. mlflow/store/tracking/file_store.py +2438 -0
  472. mlflow/store/tracking/postgres_managed_identity.py +146 -0
  473. mlflow/store/tracking/rest_store.py +1131 -0
  474. mlflow/store/tracking/sqlalchemy_store.py +2785 -0
  475. mlflow/system_metrics/__init__.py +61 -0
  476. mlflow/system_metrics/metrics/__init__.py +0 -0
  477. mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
  478. mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
  479. mlflow/system_metrics/metrics/disk_monitor.py +21 -0
  480. mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
  481. mlflow/system_metrics/metrics/network_monitor.py +34 -0
  482. mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
  483. mlflow/system_metrics/system_metrics_monitor.py +198 -0
  484. mlflow/tracing/__init__.py +16 -0
  485. mlflow/tracing/assessment.py +356 -0
  486. mlflow/tracing/client.py +531 -0
  487. mlflow/tracing/config.py +125 -0
  488. mlflow/tracing/constant.py +105 -0
  489. mlflow/tracing/destination.py +81 -0
  490. mlflow/tracing/display/__init__.py +40 -0
  491. mlflow/tracing/display/display_handler.py +196 -0
  492. mlflow/tracing/export/async_export_queue.py +186 -0
  493. mlflow/tracing/export/inference_table.py +138 -0
  494. mlflow/tracing/export/mlflow_v3.py +137 -0
  495. mlflow/tracing/export/utils.py +70 -0
  496. mlflow/tracing/fluent.py +1417 -0
  497. mlflow/tracing/processor/base_mlflow.py +199 -0
  498. mlflow/tracing/processor/inference_table.py +175 -0
  499. mlflow/tracing/processor/mlflow_v3.py +47 -0
  500. mlflow/tracing/processor/otel.py +73 -0
  501. mlflow/tracing/provider.py +487 -0
  502. mlflow/tracing/trace_manager.py +200 -0
  503. mlflow/tracing/utils/__init__.py +616 -0
  504. mlflow/tracing/utils/artifact_utils.py +28 -0
  505. mlflow/tracing/utils/copy.py +55 -0
  506. mlflow/tracing/utils/environment.py +55 -0
  507. mlflow/tracing/utils/exception.py +21 -0
  508. mlflow/tracing/utils/once.py +35 -0
  509. mlflow/tracing/utils/otlp.py +63 -0
  510. mlflow/tracing/utils/processor.py +54 -0
  511. mlflow/tracing/utils/search.py +292 -0
  512. mlflow/tracing/utils/timeout.py +250 -0
  513. mlflow/tracing/utils/token.py +19 -0
  514. mlflow/tracing/utils/truncation.py +124 -0
  515. mlflow/tracing/utils/warning.py +76 -0
  516. mlflow/tracking/__init__.py +39 -0
  517. mlflow/tracking/_model_registry/__init__.py +1 -0
  518. mlflow/tracking/_model_registry/client.py +764 -0
  519. mlflow/tracking/_model_registry/fluent.py +853 -0
  520. mlflow/tracking/_model_registry/registry.py +67 -0
  521. mlflow/tracking/_model_registry/utils.py +251 -0
  522. mlflow/tracking/_tracking_service/__init__.py +0 -0
  523. mlflow/tracking/_tracking_service/client.py +883 -0
  524. mlflow/tracking/_tracking_service/registry.py +56 -0
  525. mlflow/tracking/_tracking_service/utils.py +275 -0
  526. mlflow/tracking/artifact_utils.py +179 -0
  527. mlflow/tracking/client.py +5900 -0
  528. mlflow/tracking/context/__init__.py +0 -0
  529. mlflow/tracking/context/abstract_context.py +35 -0
  530. mlflow/tracking/context/databricks_cluster_context.py +15 -0
  531. mlflow/tracking/context/databricks_command_context.py +15 -0
  532. mlflow/tracking/context/databricks_job_context.py +49 -0
  533. mlflow/tracking/context/databricks_notebook_context.py +41 -0
  534. mlflow/tracking/context/databricks_repo_context.py +43 -0
  535. mlflow/tracking/context/default_context.py +51 -0
  536. mlflow/tracking/context/git_context.py +32 -0
  537. mlflow/tracking/context/registry.py +98 -0
  538. mlflow/tracking/context/system_environment_context.py +15 -0
  539. mlflow/tracking/default_experiment/__init__.py +1 -0
  540. mlflow/tracking/default_experiment/abstract_context.py +43 -0
  541. mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
  542. mlflow/tracking/default_experiment/registry.py +75 -0
  543. mlflow/tracking/fluent.py +3595 -0
  544. mlflow/tracking/metric_value_conversion_utils.py +93 -0
  545. mlflow/tracking/multimedia.py +206 -0
  546. mlflow/tracking/registry.py +86 -0
  547. mlflow/tracking/request_auth/__init__.py +0 -0
  548. mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
  549. mlflow/tracking/request_auth/registry.py +60 -0
  550. mlflow/tracking/request_header/__init__.py +0 -0
  551. mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
  552. mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
  553. mlflow/tracking/request_header/default_request_header_provider.py +17 -0
  554. mlflow/tracking/request_header/registry.py +79 -0
  555. mlflow/transformers/__init__.py +2982 -0
  556. mlflow/transformers/flavor_config.py +258 -0
  557. mlflow/transformers/hub_utils.py +83 -0
  558. mlflow/transformers/llm_inference_utils.py +468 -0
  559. mlflow/transformers/model_io.py +301 -0
  560. mlflow/transformers/peft.py +51 -0
  561. mlflow/transformers/signature.py +183 -0
  562. mlflow/transformers/torch_utils.py +55 -0
  563. mlflow/types/__init__.py +21 -0
  564. mlflow/types/agent.py +270 -0
  565. mlflow/types/chat.py +240 -0
  566. mlflow/types/llm.py +935 -0
  567. mlflow/types/responses.py +139 -0
  568. mlflow/types/responses_helpers.py +416 -0
  569. mlflow/types/schema.py +1505 -0
  570. mlflow/types/type_hints.py +647 -0
  571. mlflow/types/utils.py +753 -0
  572. mlflow/utils/__init__.py +283 -0
  573. mlflow/utils/_capture_modules.py +256 -0
  574. mlflow/utils/_capture_transformers_modules.py +75 -0
  575. mlflow/utils/_spark_utils.py +201 -0
  576. mlflow/utils/_unity_catalog_oss_utils.py +97 -0
  577. mlflow/utils/_unity_catalog_utils.py +479 -0
  578. mlflow/utils/annotations.py +218 -0
  579. mlflow/utils/arguments_utils.py +16 -0
  580. mlflow/utils/async_logging/__init__.py +1 -0
  581. mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
  582. mlflow/utils/async_logging/async_logging_queue.py +366 -0
  583. mlflow/utils/async_logging/run_artifact.py +38 -0
  584. mlflow/utils/async_logging/run_batch.py +58 -0
  585. mlflow/utils/async_logging/run_operations.py +49 -0
  586. mlflow/utils/autologging_utils/__init__.py +737 -0
  587. mlflow/utils/autologging_utils/client.py +432 -0
  588. mlflow/utils/autologging_utils/config.py +33 -0
  589. mlflow/utils/autologging_utils/events.py +294 -0
  590. mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
  591. mlflow/utils/autologging_utils/metrics_queue.py +71 -0
  592. mlflow/utils/autologging_utils/safety.py +1104 -0
  593. mlflow/utils/autologging_utils/versioning.py +95 -0
  594. mlflow/utils/checkpoint_utils.py +206 -0
  595. mlflow/utils/class_utils.py +6 -0
  596. mlflow/utils/cli_args.py +257 -0
  597. mlflow/utils/conda.py +354 -0
  598. mlflow/utils/credentials.py +231 -0
  599. mlflow/utils/data_utils.py +17 -0
  600. mlflow/utils/databricks_utils.py +1436 -0
  601. mlflow/utils/docstring_utils.py +477 -0
  602. mlflow/utils/doctor.py +133 -0
  603. mlflow/utils/download_cloud_file_chunk.py +43 -0
  604. mlflow/utils/env_manager.py +16 -0
  605. mlflow/utils/env_pack.py +131 -0
  606. mlflow/utils/environment.py +1009 -0
  607. mlflow/utils/exception_utils.py +14 -0
  608. mlflow/utils/file_utils.py +978 -0
  609. mlflow/utils/git_utils.py +77 -0
  610. mlflow/utils/gorilla.py +797 -0
  611. mlflow/utils/import_hooks/__init__.py +363 -0
  612. mlflow/utils/lazy_load.py +51 -0
  613. mlflow/utils/logging_utils.py +168 -0
  614. mlflow/utils/mime_type_utils.py +58 -0
  615. mlflow/utils/mlflow_tags.py +103 -0
  616. mlflow/utils/model_utils.py +486 -0
  617. mlflow/utils/name_utils.py +346 -0
  618. mlflow/utils/nfs_on_spark.py +62 -0
  619. mlflow/utils/openai_utils.py +164 -0
  620. mlflow/utils/os.py +12 -0
  621. mlflow/utils/oss_registry_utils.py +29 -0
  622. mlflow/utils/plugins.py +17 -0
  623. mlflow/utils/process.py +182 -0
  624. mlflow/utils/promptlab_utils.py +146 -0
  625. mlflow/utils/proto_json_utils.py +743 -0
  626. mlflow/utils/pydantic_utils.py +54 -0
  627. mlflow/utils/request_utils.py +279 -0
  628. mlflow/utils/requirements_utils.py +704 -0
  629. mlflow/utils/rest_utils.py +673 -0
  630. mlflow/utils/search_logged_model_utils.py +127 -0
  631. mlflow/utils/search_utils.py +2111 -0
  632. mlflow/utils/secure_loading.py +221 -0
  633. mlflow/utils/security_validation.py +384 -0
  634. mlflow/utils/server_cli_utils.py +61 -0
  635. mlflow/utils/spark_utils.py +15 -0
  636. mlflow/utils/string_utils.py +138 -0
  637. mlflow/utils/thread_utils.py +63 -0
  638. mlflow/utils/time.py +54 -0
  639. mlflow/utils/timeout.py +42 -0
  640. mlflow/utils/uri.py +572 -0
  641. mlflow/utils/validation.py +662 -0
  642. mlflow/utils/virtualenv.py +458 -0
  643. mlflow/utils/warnings_utils.py +25 -0
  644. mlflow/utils/yaml_utils.py +179 -0
  645. mlflow/version.py +24 -0
@@ -0,0 +1,296 @@
1
+ import datetime
2
+ import importlib.metadata
3
+ import os
4
+ import posixpath
5
+ import urllib.parse
6
+ from collections import namedtuple
7
+ from typing import Optional
8
+
9
+ from packaging.version import Version
10
+
11
+ from mlflow.entities import FileInfo
12
+ from mlflow.entities.multipart_upload import (
13
+ CreateMultipartUploadResponse,
14
+ MultipartUploadCredential,
15
+ )
16
+ from mlflow.environment_variables import (
17
+ MLFLOW_ARTIFACT_UPLOAD_DOWNLOAD_TIMEOUT,
18
+ MLFLOW_GCS_DOWNLOAD_CHUNK_SIZE,
19
+ MLFLOW_GCS_UPLOAD_CHUNK_SIZE,
20
+ )
21
+ from mlflow.exceptions import _UnsupportedMultipartUploadException
22
+ from mlflow.store.artifact.artifact_repo import (
23
+ ArtifactRepository,
24
+ MultipartUploadMixin,
25
+ _retry_with_new_creds,
26
+ )
27
+ from mlflow.utils.file_utils import relative_path_to_artifact_path
28
+
29
+ GCSMPUArguments = namedtuple("GCSMPUArguments", ["transport", "url", "headers", "content_type"])
30
+
31
+
32
+ class GCSArtifactRepository(ArtifactRepository, MultipartUploadMixin):
33
+ """
34
+ Stores artifacts on Google Cloud Storage.
35
+
36
+ Args:
37
+ artifact_uri: URI of GCS bucket
38
+ client: Optional. The client to use for GCS operations; a default
39
+ client object will be created if unspecified, using default
40
+ credentials as described in https://google-cloud.readthedocs.io/en/latest/core/auth.html
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ artifact_uri: str,
46
+ tracking_uri: Optional[str] = None,
47
+ client=None,
48
+ credential_refresh_def=None,
49
+ ) -> None:
50
+ super().__init__(artifact_uri, tracking_uri)
51
+ from google.auth.exceptions import DefaultCredentialsError
52
+ from google.cloud import storage as gcs_storage
53
+ from google.cloud.storage.constants import _DEFAULT_TIMEOUT
54
+
55
+ self._GCS_DOWNLOAD_CHUNK_SIZE = MLFLOW_GCS_DOWNLOAD_CHUNK_SIZE.get()
56
+ self._GCS_UPLOAD_CHUNK_SIZE = MLFLOW_GCS_UPLOAD_CHUNK_SIZE.get()
57
+ self._GCS_DEFAULT_TIMEOUT = (
58
+ MLFLOW_ARTIFACT_UPLOAD_DOWNLOAD_TIMEOUT.get() or _DEFAULT_TIMEOUT
59
+ )
60
+ # Method to use for refresh
61
+ self.credential_refresh_def = credential_refresh_def
62
+ # If the user-supplied timeout environment variable value is -1,
63
+ # use `None` for `self._GCS_DEFAULT_TIMEOUT`
64
+ # to use indefinite timeout
65
+ self._GCS_DEFAULT_TIMEOUT = (
66
+ None if self._GCS_DEFAULT_TIMEOUT == -1 else self._GCS_DEFAULT_TIMEOUT
67
+ )
68
+ if client is not None:
69
+ self.client = client
70
+ else:
71
+ try:
72
+ self.client = gcs_storage.Client()
73
+ except DefaultCredentialsError:
74
+ self.client = gcs_storage.Client.create_anonymous_client()
75
+
76
+ @staticmethod
77
+ def parse_gcs_uri(uri):
78
+ """Parse an GCS URI, returning (bucket, path)"""
79
+ parsed = urllib.parse.urlparse(uri)
80
+ if parsed.scheme != "gs":
81
+ raise Exception(f"Not a GCS URI: {uri}")
82
+ path = parsed.path
83
+ if path.startswith("/"):
84
+ path = path[1:]
85
+ return parsed.netloc, path
86
+
87
+ def _get_bucket(self, bucket):
88
+ return self.client.bucket(bucket)
89
+
90
+ def _refresh_credentials(self):
91
+ from google.cloud.storage import Client
92
+ from google.oauth2.credentials import Credentials
93
+
94
+ (bucket, _) = self.parse_gcs_uri(self.artifact_uri)
95
+ if not self.credential_refresh_def:
96
+ return self._get_bucket(bucket)
97
+ new_token = self.credential_refresh_def()
98
+ credentials = Credentials(new_token["oauth_token"])
99
+ self.client = Client(project="mlflow", credentials=credentials)
100
+ return self._get_bucket(bucket)
101
+
102
+ def log_artifact(self, local_file, artifact_path=None):
103
+ (bucket, dest_path) = self.parse_gcs_uri(self.artifact_uri)
104
+ if artifact_path:
105
+ dest_path = posixpath.join(dest_path, artifact_path)
106
+ dest_path = posixpath.join(dest_path, os.path.basename(local_file))
107
+
108
+ gcs_bucket = self._get_bucket(bucket)
109
+ blob = gcs_bucket.blob(dest_path, chunk_size=self._GCS_UPLOAD_CHUNK_SIZE)
110
+ blob.upload_from_filename(local_file, timeout=self._GCS_DEFAULT_TIMEOUT)
111
+
112
+ def log_artifacts(self, local_dir, artifact_path=None):
113
+ (bucket, dest_path) = self.parse_gcs_uri(self.artifact_uri)
114
+ if artifact_path:
115
+ dest_path = posixpath.join(dest_path, artifact_path)
116
+
117
+ local_dir = os.path.abspath(local_dir)
118
+
119
+ for root, _, filenames in os.walk(local_dir):
120
+ upload_path = dest_path
121
+ if root != local_dir:
122
+ rel_path = os.path.relpath(root, local_dir)
123
+ rel_path = relative_path_to_artifact_path(rel_path)
124
+ upload_path = posixpath.join(dest_path, rel_path)
125
+ for f in filenames:
126
+ gcs_bucket = self._get_bucket(bucket)
127
+ path = posixpath.join(upload_path, f)
128
+ # For large models, we need to speculatively retry a credential refresh
129
+ # and throw if it still fails. We cannot use the built-in refresh because UC
130
+ # does not return a refresh token with the oauth token
131
+ file_name = os.path.join(root, f)
132
+
133
+ def try_func(gcs_bucket):
134
+ gcs_bucket.blob(
135
+ path, chunk_size=self._GCS_UPLOAD_CHUNK_SIZE
136
+ ).upload_from_filename(file_name, timeout=self._GCS_DEFAULT_TIMEOUT)
137
+
138
+ _retry_with_new_creds(
139
+ try_func=try_func, creds_func=self._refresh_credentials, orig_creds=gcs_bucket
140
+ )
141
+
142
+ def list_artifacts(self, path=None):
143
+ (bucket, artifact_path) = self.parse_gcs_uri(self.artifact_uri)
144
+ dest_path = artifact_path
145
+ if path:
146
+ dest_path = posixpath.join(dest_path, path)
147
+ prefix = dest_path if dest_path.endswith("/") else dest_path + "/"
148
+
149
+ bkt = self._get_bucket(bucket)
150
+
151
+ infos = self._list_folders(bkt, prefix, artifact_path)
152
+
153
+ results = bkt.list_blobs(prefix=prefix, delimiter="/")
154
+ for result in results:
155
+ # skip blobs matching current directory path as list_blobs api
156
+ # returns subdirectories as well
157
+ if result.name == prefix:
158
+ continue
159
+ blob_path = result.name[len(artifact_path) + 1 :]
160
+ infos.append(FileInfo(blob_path, False, result.size))
161
+
162
+ return sorted(infos, key=lambda f: f.path)
163
+
164
+ def _list_folders(self, bkt, prefix, artifact_path):
165
+ results = bkt.list_blobs(prefix=prefix, delimiter="/")
166
+ dir_paths = set()
167
+ for page in results.pages:
168
+ dir_paths.update(page.prefixes)
169
+
170
+ return [FileInfo(path[len(artifact_path) + 1 : -1], True, None) for path in dir_paths]
171
+
172
+ def _download_file(self, remote_file_path, local_path):
173
+ (bucket, remote_root_path) = self.parse_gcs_uri(self.artifact_uri)
174
+ remote_full_path = posixpath.join(remote_root_path, remote_file_path)
175
+ gcs_bucket = self._get_bucket(bucket)
176
+ gcs_bucket.blob(
177
+ remote_full_path, chunk_size=self._GCS_DOWNLOAD_CHUNK_SIZE
178
+ ).download_to_filename(local_path, timeout=self._GCS_DEFAULT_TIMEOUT)
179
+
180
+ def delete_artifacts(self, artifact_path=None):
181
+ (bucket_name, dest_path) = self.parse_gcs_uri(self.artifact_uri)
182
+ if artifact_path:
183
+ dest_path = posixpath.join(dest_path, artifact_path)
184
+
185
+ gcs_bucket = self._get_bucket(bucket_name)
186
+ blobs = gcs_bucket.list_blobs(prefix=f"{dest_path}")
187
+ for blob in blobs:
188
+ blob.delete()
189
+
190
+ @staticmethod
191
+ def _validate_support_mpu():
192
+ if Version(importlib.metadata.version("google-cloud-storage")) < Version(
193
+ "2.12.0"
194
+ ) or Version(importlib.metadata.version("google-resumable-media")) < Version("2.6.0"):
195
+ raise _UnsupportedMultipartUploadException()
196
+
197
+ @staticmethod
198
+ def _gcs_mpu_arguments(filename: str, blob) -> GCSMPUArguments:
199
+ """See :py:func:`google.cloud.storage.transfer_manager.upload_chunks_concurrently`"""
200
+ from google.cloud.storage.transfer_manager import _headers_from_metadata
201
+
202
+ bucket = blob.bucket
203
+ client = blob.client
204
+ transport = blob._get_transport(client)
205
+
206
+ hostname = client._connection.get_api_base_url_for_mtls()
207
+ url = f"{hostname}/{bucket.name}/{blob.name}"
208
+
209
+ base_headers, object_metadata, content_type = blob._get_upload_arguments(
210
+ client, None, filename=filename, command="tm.upload_sharded"
211
+ )
212
+ headers = {**base_headers, **_headers_from_metadata(object_metadata)}
213
+
214
+ if blob.user_project is not None:
215
+ headers["x-goog-user-project"] = blob.user_project
216
+
217
+ if blob.kms_key_name is not None and "cryptoKeyVersions" not in blob.kms_key_name:
218
+ headers["x-goog-encryption-kms-key-name"] = blob.kms_key_name
219
+
220
+ return GCSMPUArguments(
221
+ transport=transport, url=url, headers=headers, content_type=content_type
222
+ )
223
+
224
+ def create_multipart_upload(self, local_file, num_parts=1, artifact_path=None):
225
+ self._validate_support_mpu()
226
+ from google.resumable_media.requests import XMLMPUContainer
227
+
228
+ (bucket, dest_path) = self.parse_gcs_uri(self.artifact_uri)
229
+ if artifact_path:
230
+ dest_path = posixpath.join(dest_path, artifact_path)
231
+ dest_path = posixpath.join(dest_path, os.path.basename(local_file))
232
+
233
+ gcs_bucket = self._get_bucket(bucket)
234
+ blob = gcs_bucket.blob(dest_path)
235
+ args = self._gcs_mpu_arguments(local_file, blob)
236
+ container = XMLMPUContainer(args.url, local_file, headers=args.headers)
237
+ container.initiate(transport=args.transport, content_type=args.content_type)
238
+ upload_id = container.upload_id
239
+
240
+ credentials = []
241
+ for i in range(1, num_parts + 1): # part number must be in [1, 10000]
242
+ signed_url = blob.generate_signed_url(
243
+ method="PUT",
244
+ version="v4",
245
+ expiration=datetime.timedelta(minutes=60),
246
+ query_parameters={
247
+ "partNumber": i,
248
+ "uploadId": upload_id,
249
+ },
250
+ )
251
+ credentials.append(
252
+ MultipartUploadCredential(
253
+ url=signed_url,
254
+ part_number=i,
255
+ headers={},
256
+ )
257
+ )
258
+ return CreateMultipartUploadResponse(
259
+ credentials=credentials,
260
+ upload_id=upload_id,
261
+ )
262
+
263
+ def complete_multipart_upload(self, local_file, upload_id, parts=None, artifact_path=None):
264
+ self._validate_support_mpu()
265
+ from google.resumable_media.requests import XMLMPUContainer
266
+
267
+ (bucket, dest_path) = self.parse_gcs_uri(self.artifact_uri)
268
+ if artifact_path:
269
+ dest_path = posixpath.join(dest_path, artifact_path)
270
+ dest_path = posixpath.join(dest_path, os.path.basename(local_file))
271
+
272
+ gcs_bucket = self._get_bucket(bucket)
273
+ blob = gcs_bucket.blob(dest_path)
274
+ args = self._gcs_mpu_arguments(local_file, blob)
275
+ container = XMLMPUContainer(args.url, local_file, headers=args.headers)
276
+ container._upload_id = upload_id
277
+ for part in parts:
278
+ container.register_part(part.part_number, part.etag)
279
+
280
+ container.finalize(transport=args.transport)
281
+
282
+ def abort_multipart_upload(self, local_file, upload_id, artifact_path=None):
283
+ self._validate_support_mpu()
284
+ from google.resumable_media.requests import XMLMPUContainer
285
+
286
+ (bucket, dest_path) = self.parse_gcs_uri(self.artifact_uri)
287
+ if artifact_path:
288
+ dest_path = posixpath.join(dest_path, artifact_path)
289
+ dest_path = posixpath.join(dest_path, os.path.basename(local_file))
290
+
291
+ gcs_bucket = self._get_bucket(bucket)
292
+ blob = gcs_bucket.blob(dest_path)
293
+ args = self._gcs_mpu_arguments(local_file, blob)
294
+ container = XMLMPUContainer(args.url, local_file, headers=args.headers)
295
+ container._upload_id = upload_id
296
+ container.cancel(transport=args.transport)
@@ -0,0 +1,209 @@
1
+ import os
2
+ import posixpath
3
+ import urllib.parse
4
+ from contextlib import contextmanager
5
+ from typing import Optional
6
+
7
+ try:
8
+ from pyarrow.fs import FileSelector, FileType, HadoopFileSystem
9
+ except ImportError:
10
+ pass
11
+
12
+ from mlflow.entities import FileInfo
13
+ from mlflow.environment_variables import (
14
+ MLFLOW_KERBEROS_TICKET_CACHE,
15
+ MLFLOW_KERBEROS_USER,
16
+ MLFLOW_PYARROW_EXTRA_CONF,
17
+ )
18
+ from mlflow.store.artifact.artifact_repo import ArtifactRepository
19
+ from mlflow.utils.file_utils import relative_path_to_artifact_path
20
+
21
+
22
+ class HdfsArtifactRepository(ArtifactRepository):
23
+ """
24
+ Stores artifacts on HDFS.
25
+
26
+ This repository is used with URIs of the form ``hdfs:/<path>``. The repository can only be used
27
+ together with the RestStore.
28
+ """
29
+
30
+ def __init__(self, artifact_uri: str, tracking_uri: Optional[str] = None) -> None:
31
+ super().__init__(artifact_uri, tracking_uri)
32
+ self.scheme, self.host, self.port, self.path = _resolve_connection_params(artifact_uri)
33
+
34
+ def log_artifact(self, local_file, artifact_path=None):
35
+ """
36
+ Log artifact in hdfs.
37
+
38
+ Args:
39
+ local_file: Source file path.
40
+ artifact_path: When specified will attempt to write under artifact_uri/artifact_path.
41
+ """
42
+ hdfs_base_path = _resolve_base_path(self.path, artifact_path)
43
+
44
+ with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:
45
+ _, file_name = os.path.split(local_file)
46
+ destination_path = posixpath.join(hdfs_base_path, file_name)
47
+ with open(local_file, "rb") as source:
48
+ with hdfs.open_output_stream(destination_path) as destination:
49
+ destination.write(source.read())
50
+
51
+ def log_artifacts(self, local_dir, artifact_path=None):
52
+ """
53
+ Log artifacts in hdfs.
54
+ Missing remote sub-directories will be created if needed.
55
+
56
+ Args:
57
+ local_dir: Source dir path.
58
+ artifact_path: When specified will attempt to write under artifact_uri/artifact_path.
59
+ """
60
+ hdfs_base_path = _resolve_base_path(self.path, artifact_path)
61
+
62
+ with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:
63
+ if not hdfs.get_file_info(hdfs_base_path).type == FileType.Directory:
64
+ hdfs.create_dir(hdfs_base_path, recursive=True)
65
+
66
+ for subdir_path, _, files in os.walk(local_dir):
67
+ relative_path = _relative_path_local(local_dir, subdir_path)
68
+
69
+ hdfs_subdir_path = (
70
+ posixpath.join(hdfs_base_path, relative_path)
71
+ if relative_path
72
+ else hdfs_base_path
73
+ )
74
+
75
+ if not hdfs.get_file_info(hdfs_subdir_path).type == FileType.Directory:
76
+ hdfs.create_dir(hdfs_subdir_path, recursive=True)
77
+
78
+ for each_file in files:
79
+ source_path = os.path.join(subdir_path, each_file)
80
+ destination_path = posixpath.join(hdfs_subdir_path, each_file)
81
+ with open(source_path, "rb") as source:
82
+ with hdfs.open_output_stream(destination_path) as destination:
83
+ destination.write(source.read())
84
+
85
+ def list_artifacts(self, path=None):
86
+ """
87
+ Lists files and directories under artifacts directory for the current run_id.
88
+ (self.path contains the base path - hdfs:/some/path/run_id/artifacts)
89
+
90
+ Args:
91
+ path: Relative source path. Possible subdirectory existing under
92
+ hdfs:/some/path/run_id/artifacts
93
+
94
+ Returns:
95
+ List of FileInfos under given path
96
+ """
97
+ hdfs_base_path = _resolve_base_path(self.path, path)
98
+
99
+ with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:
100
+ paths = []
101
+ base_info = hdfs.get_file_info(hdfs_base_path)
102
+ if base_info.type == FileType.Directory:
103
+ selector = FileSelector(hdfs_base_path)
104
+ elif base_info.type == FileType.File:
105
+ selector = [hdfs_base_path]
106
+ else:
107
+ return []
108
+
109
+ for file_detail in hdfs.get_file_info(selector):
110
+ file_name = file_detail.path
111
+
112
+ # file_name is hdfs_base_path and not a child of that path
113
+ if file_name == hdfs_base_path:
114
+ continue
115
+
116
+ # Strip off anything that comes before the artifact root e.g. hdfs://name
117
+ offset = file_name.index(self.path)
118
+ rel_path = _relative_path_remote(self.path, file_name[offset:])
119
+ is_dir = file_detail.type == FileType.Directory
120
+ size = file_detail.size
121
+ paths.append(FileInfo(rel_path, is_dir=is_dir, file_size=size))
122
+ return sorted(paths, key=lambda f: paths)
123
+
124
+ def _is_directory(self, artifact_path):
125
+ hdfs_base_path = _resolve_base_path(self.path, artifact_path)
126
+ with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:
127
+ return hdfs.get_file_info(hdfs_base_path).type == FileType.Directory
128
+
129
+ def _download_file(self, remote_file_path, local_path):
130
+ hdfs_base_path = _resolve_base_path(self.path, remote_file_path)
131
+ with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:
132
+ with hdfs.open_input_stream(hdfs_base_path) as source:
133
+ with open(local_path, "wb") as destination:
134
+ destination.write(source.read())
135
+
136
+ def delete_artifacts(self, artifact_path=None):
137
+ path = posixpath.join(self.path, artifact_path) if artifact_path else self.path
138
+ with hdfs_system(scheme=self.scheme, host=self.host, port=self.port) as hdfs:
139
+ file_info = hdfs.get_file_info(path)
140
+ if file_info.type == FileType.File:
141
+ hdfs.delete_file(path)
142
+ elif file_info.type == FileType.Directory:
143
+ hdfs.delete_dir_contents(path)
144
+
145
+
146
+ @contextmanager
147
+ def hdfs_system(scheme, host, port):
148
+ """
149
+ hdfs system context - Attempt to establish the connection to hdfs
150
+ and yields HadoopFileSystem
151
+
152
+ Args:
153
+ scheme: scheme or use hdfs:// as default
154
+ host: hostname or when relaying on the core-site.xml config use 'default'
155
+ port: port or when relaying on the core-site.xml config use 0
156
+ """
157
+ kerb_ticket = MLFLOW_KERBEROS_TICKET_CACHE.get()
158
+ kerberos_user = MLFLOW_KERBEROS_USER.get()
159
+ extra_conf = _parse_extra_conf(MLFLOW_PYARROW_EXTRA_CONF.get())
160
+
161
+ host = scheme + "://" + host if host else "default"
162
+
163
+ yield HadoopFileSystem(
164
+ host=host,
165
+ port=port or 0,
166
+ user=kerberos_user,
167
+ kerb_ticket=kerb_ticket,
168
+ extra_conf=extra_conf,
169
+ )
170
+
171
+
172
+ def _resolve_connection_params(artifact_uri):
173
+ parsed = urllib.parse.urlparse(artifact_uri)
174
+
175
+ return parsed.scheme, parsed.hostname, parsed.port, parsed.path
176
+
177
+
178
+ def _resolve_base_path(path, artifact_path):
179
+ if path == artifact_path:
180
+ return path
181
+ if artifact_path:
182
+ return posixpath.join(path, artifact_path)
183
+ return path
184
+
185
+
186
+ def _relative_path(base_dir, subdir_path, path_module):
187
+ relative_path = path_module.relpath(subdir_path, base_dir)
188
+ return relative_path if relative_path != "." else None
189
+
190
+
191
+ def _relative_path_local(base_dir, subdir_path):
192
+ rel_path = _relative_path(base_dir, subdir_path, os.path)
193
+ return relative_path_to_artifact_path(rel_path) if rel_path is not None else None
194
+
195
+
196
+ def _relative_path_remote(base_dir, subdir_path):
197
+ return _relative_path(base_dir, subdir_path, posixpath)
198
+
199
+
200
+ def _parse_extra_conf(extra_conf):
201
+ if extra_conf:
202
+
203
+ def as_pair(config):
204
+ key, val = config.split("=")
205
+ return key, val
206
+
207
+ list_of_key_val = [as_pair(conf) for conf in extra_conf.split(",")]
208
+ return dict(list_of_key_val)
209
+ return None