genesis-flow 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (645) hide show
  1. genesis_flow-1.0.0.dist-info/METADATA +822 -0
  2. genesis_flow-1.0.0.dist-info/RECORD +645 -0
  3. genesis_flow-1.0.0.dist-info/WHEEL +5 -0
  4. genesis_flow-1.0.0.dist-info/entry_points.txt +19 -0
  5. genesis_flow-1.0.0.dist-info/licenses/LICENSE.txt +202 -0
  6. genesis_flow-1.0.0.dist-info/top_level.txt +1 -0
  7. mlflow/__init__.py +367 -0
  8. mlflow/__main__.py +3 -0
  9. mlflow/ag2/__init__.py +56 -0
  10. mlflow/ag2/ag2_logger.py +294 -0
  11. mlflow/anthropic/__init__.py +40 -0
  12. mlflow/anthropic/autolog.py +129 -0
  13. mlflow/anthropic/chat.py +144 -0
  14. mlflow/artifacts/__init__.py +268 -0
  15. mlflow/autogen/__init__.py +144 -0
  16. mlflow/autogen/chat.py +142 -0
  17. mlflow/azure/__init__.py +26 -0
  18. mlflow/azure/auth_handler.py +257 -0
  19. mlflow/azure/client.py +319 -0
  20. mlflow/azure/config.py +120 -0
  21. mlflow/azure/connection_factory.py +340 -0
  22. mlflow/azure/exceptions.py +27 -0
  23. mlflow/azure/stores.py +327 -0
  24. mlflow/azure/utils.py +183 -0
  25. mlflow/bedrock/__init__.py +45 -0
  26. mlflow/bedrock/_autolog.py +202 -0
  27. mlflow/bedrock/chat.py +122 -0
  28. mlflow/bedrock/stream.py +160 -0
  29. mlflow/bedrock/utils.py +43 -0
  30. mlflow/cli.py +707 -0
  31. mlflow/client.py +12 -0
  32. mlflow/config/__init__.py +56 -0
  33. mlflow/crewai/__init__.py +79 -0
  34. mlflow/crewai/autolog.py +253 -0
  35. mlflow/crewai/chat.py +29 -0
  36. mlflow/data/__init__.py +75 -0
  37. mlflow/data/artifact_dataset_sources.py +170 -0
  38. mlflow/data/code_dataset_source.py +40 -0
  39. mlflow/data/dataset.py +123 -0
  40. mlflow/data/dataset_registry.py +168 -0
  41. mlflow/data/dataset_source.py +110 -0
  42. mlflow/data/dataset_source_registry.py +219 -0
  43. mlflow/data/delta_dataset_source.py +167 -0
  44. mlflow/data/digest_utils.py +108 -0
  45. mlflow/data/evaluation_dataset.py +562 -0
  46. mlflow/data/filesystem_dataset_source.py +81 -0
  47. mlflow/data/http_dataset_source.py +145 -0
  48. mlflow/data/huggingface_dataset.py +258 -0
  49. mlflow/data/huggingface_dataset_source.py +118 -0
  50. mlflow/data/meta_dataset.py +104 -0
  51. mlflow/data/numpy_dataset.py +223 -0
  52. mlflow/data/pandas_dataset.py +231 -0
  53. mlflow/data/polars_dataset.py +352 -0
  54. mlflow/data/pyfunc_dataset_mixin.py +31 -0
  55. mlflow/data/schema.py +76 -0
  56. mlflow/data/sources.py +1 -0
  57. mlflow/data/spark_dataset.py +406 -0
  58. mlflow/data/spark_dataset_source.py +74 -0
  59. mlflow/data/spark_delta_utils.py +118 -0
  60. mlflow/data/tensorflow_dataset.py +350 -0
  61. mlflow/data/uc_volume_dataset_source.py +81 -0
  62. mlflow/db.py +27 -0
  63. mlflow/dspy/__init__.py +17 -0
  64. mlflow/dspy/autolog.py +197 -0
  65. mlflow/dspy/callback.py +398 -0
  66. mlflow/dspy/constant.py +1 -0
  67. mlflow/dspy/load.py +93 -0
  68. mlflow/dspy/save.py +393 -0
  69. mlflow/dspy/util.py +109 -0
  70. mlflow/dspy/wrapper.py +226 -0
  71. mlflow/entities/__init__.py +104 -0
  72. mlflow/entities/_mlflow_object.py +52 -0
  73. mlflow/entities/assessment.py +545 -0
  74. mlflow/entities/assessment_error.py +80 -0
  75. mlflow/entities/assessment_source.py +141 -0
  76. mlflow/entities/dataset.py +92 -0
  77. mlflow/entities/dataset_input.py +51 -0
  78. mlflow/entities/dataset_summary.py +62 -0
  79. mlflow/entities/document.py +48 -0
  80. mlflow/entities/experiment.py +109 -0
  81. mlflow/entities/experiment_tag.py +35 -0
  82. mlflow/entities/file_info.py +45 -0
  83. mlflow/entities/input_tag.py +35 -0
  84. mlflow/entities/lifecycle_stage.py +35 -0
  85. mlflow/entities/logged_model.py +228 -0
  86. mlflow/entities/logged_model_input.py +26 -0
  87. mlflow/entities/logged_model_output.py +32 -0
  88. mlflow/entities/logged_model_parameter.py +46 -0
  89. mlflow/entities/logged_model_status.py +74 -0
  90. mlflow/entities/logged_model_tag.py +33 -0
  91. mlflow/entities/metric.py +200 -0
  92. mlflow/entities/model_registry/__init__.py +29 -0
  93. mlflow/entities/model_registry/_model_registry_entity.py +13 -0
  94. mlflow/entities/model_registry/model_version.py +243 -0
  95. mlflow/entities/model_registry/model_version_deployment_job_run_state.py +44 -0
  96. mlflow/entities/model_registry/model_version_deployment_job_state.py +70 -0
  97. mlflow/entities/model_registry/model_version_search.py +25 -0
  98. mlflow/entities/model_registry/model_version_stages.py +25 -0
  99. mlflow/entities/model_registry/model_version_status.py +35 -0
  100. mlflow/entities/model_registry/model_version_tag.py +35 -0
  101. mlflow/entities/model_registry/prompt.py +73 -0
  102. mlflow/entities/model_registry/prompt_version.py +244 -0
  103. mlflow/entities/model_registry/registered_model.py +175 -0
  104. mlflow/entities/model_registry/registered_model_alias.py +35 -0
  105. mlflow/entities/model_registry/registered_model_deployment_job_state.py +39 -0
  106. mlflow/entities/model_registry/registered_model_search.py +25 -0
  107. mlflow/entities/model_registry/registered_model_tag.py +35 -0
  108. mlflow/entities/multipart_upload.py +74 -0
  109. mlflow/entities/param.py +49 -0
  110. mlflow/entities/run.py +97 -0
  111. mlflow/entities/run_data.py +84 -0
  112. mlflow/entities/run_info.py +188 -0
  113. mlflow/entities/run_inputs.py +59 -0
  114. mlflow/entities/run_outputs.py +43 -0
  115. mlflow/entities/run_status.py +41 -0
  116. mlflow/entities/run_tag.py +36 -0
  117. mlflow/entities/source_type.py +31 -0
  118. mlflow/entities/span.py +774 -0
  119. mlflow/entities/span_event.py +96 -0
  120. mlflow/entities/span_status.py +102 -0
  121. mlflow/entities/trace.py +317 -0
  122. mlflow/entities/trace_data.py +71 -0
  123. mlflow/entities/trace_info.py +220 -0
  124. mlflow/entities/trace_info_v2.py +162 -0
  125. mlflow/entities/trace_location.py +173 -0
  126. mlflow/entities/trace_state.py +39 -0
  127. mlflow/entities/trace_status.py +68 -0
  128. mlflow/entities/view_type.py +51 -0
  129. mlflow/environment_variables.py +866 -0
  130. mlflow/evaluation/__init__.py +16 -0
  131. mlflow/evaluation/assessment.py +369 -0
  132. mlflow/evaluation/evaluation.py +411 -0
  133. mlflow/evaluation/evaluation_tag.py +61 -0
  134. mlflow/evaluation/fluent.py +48 -0
  135. mlflow/evaluation/utils.py +201 -0
  136. mlflow/exceptions.py +213 -0
  137. mlflow/experiments.py +140 -0
  138. mlflow/gemini/__init__.py +81 -0
  139. mlflow/gemini/autolog.py +186 -0
  140. mlflow/gemini/chat.py +261 -0
  141. mlflow/genai/__init__.py +71 -0
  142. mlflow/genai/datasets/__init__.py +67 -0
  143. mlflow/genai/datasets/evaluation_dataset.py +131 -0
  144. mlflow/genai/evaluation/__init__.py +3 -0
  145. mlflow/genai/evaluation/base.py +411 -0
  146. mlflow/genai/evaluation/constant.py +23 -0
  147. mlflow/genai/evaluation/utils.py +244 -0
  148. mlflow/genai/judges/__init__.py +21 -0
  149. mlflow/genai/judges/databricks.py +404 -0
  150. mlflow/genai/label_schemas/__init__.py +153 -0
  151. mlflow/genai/label_schemas/label_schemas.py +209 -0
  152. mlflow/genai/labeling/__init__.py +159 -0
  153. mlflow/genai/labeling/labeling.py +250 -0
  154. mlflow/genai/optimize/__init__.py +13 -0
  155. mlflow/genai/optimize/base.py +198 -0
  156. mlflow/genai/optimize/optimizers/__init__.py +4 -0
  157. mlflow/genai/optimize/optimizers/base_optimizer.py +38 -0
  158. mlflow/genai/optimize/optimizers/dspy_mipro_optimizer.py +221 -0
  159. mlflow/genai/optimize/optimizers/dspy_optimizer.py +91 -0
  160. mlflow/genai/optimize/optimizers/utils/dspy_mipro_callback.py +76 -0
  161. mlflow/genai/optimize/optimizers/utils/dspy_mipro_utils.py +18 -0
  162. mlflow/genai/optimize/types.py +75 -0
  163. mlflow/genai/optimize/util.py +30 -0
  164. mlflow/genai/prompts/__init__.py +206 -0
  165. mlflow/genai/scheduled_scorers.py +431 -0
  166. mlflow/genai/scorers/__init__.py +26 -0
  167. mlflow/genai/scorers/base.py +492 -0
  168. mlflow/genai/scorers/builtin_scorers.py +765 -0
  169. mlflow/genai/scorers/scorer_utils.py +138 -0
  170. mlflow/genai/scorers/validation.py +165 -0
  171. mlflow/genai/utils/data_validation.py +146 -0
  172. mlflow/genai/utils/enum_utils.py +23 -0
  173. mlflow/genai/utils/trace_utils.py +211 -0
  174. mlflow/groq/__init__.py +42 -0
  175. mlflow/groq/_groq_autolog.py +74 -0
  176. mlflow/johnsnowlabs/__init__.py +888 -0
  177. mlflow/langchain/__init__.py +24 -0
  178. mlflow/langchain/api_request_parallel_processor.py +330 -0
  179. mlflow/langchain/autolog.py +147 -0
  180. mlflow/langchain/chat_agent_langgraph.py +340 -0
  181. mlflow/langchain/constant.py +1 -0
  182. mlflow/langchain/constants.py +1 -0
  183. mlflow/langchain/databricks_dependencies.py +444 -0
  184. mlflow/langchain/langchain_tracer.py +597 -0
  185. mlflow/langchain/model.py +919 -0
  186. mlflow/langchain/output_parsers.py +142 -0
  187. mlflow/langchain/retriever_chain.py +153 -0
  188. mlflow/langchain/runnables.py +527 -0
  189. mlflow/langchain/utils/chat.py +402 -0
  190. mlflow/langchain/utils/logging.py +671 -0
  191. mlflow/langchain/utils/serialization.py +36 -0
  192. mlflow/legacy_databricks_cli/__init__.py +0 -0
  193. mlflow/legacy_databricks_cli/configure/__init__.py +0 -0
  194. mlflow/legacy_databricks_cli/configure/provider.py +482 -0
  195. mlflow/litellm/__init__.py +175 -0
  196. mlflow/llama_index/__init__.py +22 -0
  197. mlflow/llama_index/autolog.py +55 -0
  198. mlflow/llama_index/chat.py +43 -0
  199. mlflow/llama_index/constant.py +1 -0
  200. mlflow/llama_index/model.py +577 -0
  201. mlflow/llama_index/pyfunc_wrapper.py +332 -0
  202. mlflow/llama_index/serialize_objects.py +188 -0
  203. mlflow/llama_index/tracer.py +561 -0
  204. mlflow/metrics/__init__.py +479 -0
  205. mlflow/metrics/base.py +39 -0
  206. mlflow/metrics/genai/__init__.py +25 -0
  207. mlflow/metrics/genai/base.py +101 -0
  208. mlflow/metrics/genai/genai_metric.py +771 -0
  209. mlflow/metrics/genai/metric_definitions.py +450 -0
  210. mlflow/metrics/genai/model_utils.py +371 -0
  211. mlflow/metrics/genai/prompt_template.py +68 -0
  212. mlflow/metrics/genai/prompts/__init__.py +0 -0
  213. mlflow/metrics/genai/prompts/v1.py +422 -0
  214. mlflow/metrics/genai/utils.py +6 -0
  215. mlflow/metrics/metric_definitions.py +619 -0
  216. mlflow/mismatch.py +34 -0
  217. mlflow/mistral/__init__.py +34 -0
  218. mlflow/mistral/autolog.py +71 -0
  219. mlflow/mistral/chat.py +135 -0
  220. mlflow/ml_package_versions.py +452 -0
  221. mlflow/models/__init__.py +97 -0
  222. mlflow/models/auth_policy.py +83 -0
  223. mlflow/models/cli.py +354 -0
  224. mlflow/models/container/__init__.py +294 -0
  225. mlflow/models/container/scoring_server/__init__.py +0 -0
  226. mlflow/models/container/scoring_server/nginx.conf +39 -0
  227. mlflow/models/dependencies_schemas.py +287 -0
  228. mlflow/models/display_utils.py +158 -0
  229. mlflow/models/docker_utils.py +211 -0
  230. mlflow/models/evaluation/__init__.py +23 -0
  231. mlflow/models/evaluation/_shap_patch.py +64 -0
  232. mlflow/models/evaluation/artifacts.py +194 -0
  233. mlflow/models/evaluation/base.py +1811 -0
  234. mlflow/models/evaluation/calibration_curve.py +109 -0
  235. mlflow/models/evaluation/default_evaluator.py +996 -0
  236. mlflow/models/evaluation/deprecated.py +23 -0
  237. mlflow/models/evaluation/evaluator_registry.py +80 -0
  238. mlflow/models/evaluation/evaluators/classifier.py +704 -0
  239. mlflow/models/evaluation/evaluators/default.py +233 -0
  240. mlflow/models/evaluation/evaluators/regressor.py +96 -0
  241. mlflow/models/evaluation/evaluators/shap.py +296 -0
  242. mlflow/models/evaluation/lift_curve.py +178 -0
  243. mlflow/models/evaluation/utils/metric.py +123 -0
  244. mlflow/models/evaluation/utils/trace.py +179 -0
  245. mlflow/models/evaluation/validation.py +434 -0
  246. mlflow/models/flavor_backend.py +93 -0
  247. mlflow/models/flavor_backend_registry.py +53 -0
  248. mlflow/models/model.py +1639 -0
  249. mlflow/models/model_config.py +150 -0
  250. mlflow/models/notebook_resources/agent_evaluation_template.html +235 -0
  251. mlflow/models/notebook_resources/eval_with_dataset_example.py +22 -0
  252. mlflow/models/notebook_resources/eval_with_synthetic_example.py +22 -0
  253. mlflow/models/python_api.py +369 -0
  254. mlflow/models/rag_signatures.py +128 -0
  255. mlflow/models/resources.py +321 -0
  256. mlflow/models/signature.py +662 -0
  257. mlflow/models/utils.py +2054 -0
  258. mlflow/models/wheeled_model.py +280 -0
  259. mlflow/openai/__init__.py +57 -0
  260. mlflow/openai/_agent_tracer.py +364 -0
  261. mlflow/openai/api_request_parallel_processor.py +131 -0
  262. mlflow/openai/autolog.py +509 -0
  263. mlflow/openai/constant.py +1 -0
  264. mlflow/openai/model.py +824 -0
  265. mlflow/openai/utils/chat_schema.py +367 -0
  266. mlflow/optuna/__init__.py +3 -0
  267. mlflow/optuna/storage.py +646 -0
  268. mlflow/plugins/__init__.py +72 -0
  269. mlflow/plugins/base.py +358 -0
  270. mlflow/plugins/builtin/__init__.py +24 -0
  271. mlflow/plugins/builtin/pytorch_plugin.py +150 -0
  272. mlflow/plugins/builtin/sklearn_plugin.py +158 -0
  273. mlflow/plugins/builtin/transformers_plugin.py +187 -0
  274. mlflow/plugins/cli.py +321 -0
  275. mlflow/plugins/discovery.py +340 -0
  276. mlflow/plugins/manager.py +465 -0
  277. mlflow/plugins/registry.py +316 -0
  278. mlflow/plugins/templates/framework_plugin_template.py +329 -0
  279. mlflow/prompt/constants.py +20 -0
  280. mlflow/prompt/promptlab_model.py +197 -0
  281. mlflow/prompt/registry_utils.py +248 -0
  282. mlflow/promptflow/__init__.py +495 -0
  283. mlflow/protos/__init__.py +0 -0
  284. mlflow/protos/assessments_pb2.py +174 -0
  285. mlflow/protos/databricks_artifacts_pb2.py +489 -0
  286. mlflow/protos/databricks_filesystem_service_pb2.py +196 -0
  287. mlflow/protos/databricks_managed_catalog_messages_pb2.py +95 -0
  288. mlflow/protos/databricks_managed_catalog_service_pb2.py +86 -0
  289. mlflow/protos/databricks_pb2.py +267 -0
  290. mlflow/protos/databricks_trace_server_pb2.py +374 -0
  291. mlflow/protos/databricks_uc_registry_messages_pb2.py +1249 -0
  292. mlflow/protos/databricks_uc_registry_service_pb2.py +170 -0
  293. mlflow/protos/facet_feature_statistics_pb2.py +296 -0
  294. mlflow/protos/internal_pb2.py +77 -0
  295. mlflow/protos/mlflow_artifacts_pb2.py +336 -0
  296. mlflow/protos/model_registry_pb2.py +1073 -0
  297. mlflow/protos/scalapb/__init__.py +0 -0
  298. mlflow/protos/scalapb/scalapb_pb2.py +104 -0
  299. mlflow/protos/service_pb2.py +2600 -0
  300. mlflow/protos/unity_catalog_oss_messages_pb2.py +457 -0
  301. mlflow/protos/unity_catalog_oss_service_pb2.py +130 -0
  302. mlflow/protos/unity_catalog_prompt_messages_pb2.py +447 -0
  303. mlflow/protos/unity_catalog_prompt_messages_pb2_grpc.py +24 -0
  304. mlflow/protos/unity_catalog_prompt_service_pb2.py +164 -0
  305. mlflow/protos/unity_catalog_prompt_service_pb2_grpc.py +785 -0
  306. mlflow/py.typed +0 -0
  307. mlflow/pydantic_ai/__init__.py +57 -0
  308. mlflow/pydantic_ai/autolog.py +173 -0
  309. mlflow/pyfunc/__init__.py +3844 -0
  310. mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py +61 -0
  311. mlflow/pyfunc/backend.py +523 -0
  312. mlflow/pyfunc/context.py +78 -0
  313. mlflow/pyfunc/dbconnect_artifact_cache.py +144 -0
  314. mlflow/pyfunc/loaders/__init__.py +7 -0
  315. mlflow/pyfunc/loaders/chat_agent.py +117 -0
  316. mlflow/pyfunc/loaders/chat_model.py +125 -0
  317. mlflow/pyfunc/loaders/code_model.py +31 -0
  318. mlflow/pyfunc/loaders/responses_agent.py +112 -0
  319. mlflow/pyfunc/mlserver.py +46 -0
  320. mlflow/pyfunc/model.py +1473 -0
  321. mlflow/pyfunc/scoring_server/__init__.py +604 -0
  322. mlflow/pyfunc/scoring_server/app.py +7 -0
  323. mlflow/pyfunc/scoring_server/client.py +146 -0
  324. mlflow/pyfunc/spark_model_cache.py +48 -0
  325. mlflow/pyfunc/stdin_server.py +44 -0
  326. mlflow/pyfunc/utils/__init__.py +3 -0
  327. mlflow/pyfunc/utils/data_validation.py +224 -0
  328. mlflow/pyfunc/utils/environment.py +22 -0
  329. mlflow/pyfunc/utils/input_converter.py +47 -0
  330. mlflow/pyfunc/utils/serving_data_parser.py +11 -0
  331. mlflow/pytorch/__init__.py +1171 -0
  332. mlflow/pytorch/_lightning_autolog.py +580 -0
  333. mlflow/pytorch/_pytorch_autolog.py +50 -0
  334. mlflow/pytorch/pickle_module.py +35 -0
  335. mlflow/rfunc/__init__.py +42 -0
  336. mlflow/rfunc/backend.py +134 -0
  337. mlflow/runs.py +89 -0
  338. mlflow/server/__init__.py +302 -0
  339. mlflow/server/auth/__init__.py +1224 -0
  340. mlflow/server/auth/__main__.py +4 -0
  341. mlflow/server/auth/basic_auth.ini +6 -0
  342. mlflow/server/auth/cli.py +11 -0
  343. mlflow/server/auth/client.py +537 -0
  344. mlflow/server/auth/config.py +34 -0
  345. mlflow/server/auth/db/__init__.py +0 -0
  346. mlflow/server/auth/db/cli.py +18 -0
  347. mlflow/server/auth/db/migrations/__init__.py +0 -0
  348. mlflow/server/auth/db/migrations/alembic.ini +110 -0
  349. mlflow/server/auth/db/migrations/env.py +76 -0
  350. mlflow/server/auth/db/migrations/versions/8606fa83a998_initial_migration.py +51 -0
  351. mlflow/server/auth/db/migrations/versions/__init__.py +0 -0
  352. mlflow/server/auth/db/models.py +67 -0
  353. mlflow/server/auth/db/utils.py +37 -0
  354. mlflow/server/auth/entities.py +165 -0
  355. mlflow/server/auth/logo.py +14 -0
  356. mlflow/server/auth/permissions.py +65 -0
  357. mlflow/server/auth/routes.py +18 -0
  358. mlflow/server/auth/sqlalchemy_store.py +263 -0
  359. mlflow/server/graphql/__init__.py +0 -0
  360. mlflow/server/graphql/autogenerated_graphql_schema.py +353 -0
  361. mlflow/server/graphql/graphql_custom_scalars.py +24 -0
  362. mlflow/server/graphql/graphql_errors.py +15 -0
  363. mlflow/server/graphql/graphql_no_batching.py +89 -0
  364. mlflow/server/graphql/graphql_schema_extensions.py +74 -0
  365. mlflow/server/handlers.py +3217 -0
  366. mlflow/server/prometheus_exporter.py +17 -0
  367. mlflow/server/validation.py +30 -0
  368. mlflow/shap/__init__.py +691 -0
  369. mlflow/sklearn/__init__.py +1994 -0
  370. mlflow/sklearn/utils.py +1041 -0
  371. mlflow/smolagents/__init__.py +66 -0
  372. mlflow/smolagents/autolog.py +139 -0
  373. mlflow/smolagents/chat.py +29 -0
  374. mlflow/store/__init__.py +10 -0
  375. mlflow/store/_unity_catalog/__init__.py +1 -0
  376. mlflow/store/_unity_catalog/lineage/__init__.py +1 -0
  377. mlflow/store/_unity_catalog/lineage/constants.py +2 -0
  378. mlflow/store/_unity_catalog/registry/__init__.py +6 -0
  379. mlflow/store/_unity_catalog/registry/prompt_info.py +75 -0
  380. mlflow/store/_unity_catalog/registry/rest_store.py +1740 -0
  381. mlflow/store/_unity_catalog/registry/uc_oss_rest_store.py +507 -0
  382. mlflow/store/_unity_catalog/registry/utils.py +121 -0
  383. mlflow/store/artifact/__init__.py +0 -0
  384. mlflow/store/artifact/artifact_repo.py +472 -0
  385. mlflow/store/artifact/artifact_repository_registry.py +154 -0
  386. mlflow/store/artifact/azure_blob_artifact_repo.py +275 -0
  387. mlflow/store/artifact/azure_data_lake_artifact_repo.py +295 -0
  388. mlflow/store/artifact/cli.py +141 -0
  389. mlflow/store/artifact/cloud_artifact_repo.py +332 -0
  390. mlflow/store/artifact/databricks_artifact_repo.py +729 -0
  391. mlflow/store/artifact/databricks_artifact_repo_resources.py +301 -0
  392. mlflow/store/artifact/databricks_logged_model_artifact_repo.py +93 -0
  393. mlflow/store/artifact/databricks_models_artifact_repo.py +216 -0
  394. mlflow/store/artifact/databricks_sdk_artifact_repo.py +134 -0
  395. mlflow/store/artifact/databricks_sdk_models_artifact_repo.py +97 -0
  396. mlflow/store/artifact/dbfs_artifact_repo.py +240 -0
  397. mlflow/store/artifact/ftp_artifact_repo.py +132 -0
  398. mlflow/store/artifact/gcs_artifact_repo.py +296 -0
  399. mlflow/store/artifact/hdfs_artifact_repo.py +209 -0
  400. mlflow/store/artifact/http_artifact_repo.py +218 -0
  401. mlflow/store/artifact/local_artifact_repo.py +142 -0
  402. mlflow/store/artifact/mlflow_artifacts_repo.py +94 -0
  403. mlflow/store/artifact/models_artifact_repo.py +259 -0
  404. mlflow/store/artifact/optimized_s3_artifact_repo.py +356 -0
  405. mlflow/store/artifact/presigned_url_artifact_repo.py +173 -0
  406. mlflow/store/artifact/r2_artifact_repo.py +70 -0
  407. mlflow/store/artifact/runs_artifact_repo.py +265 -0
  408. mlflow/store/artifact/s3_artifact_repo.py +330 -0
  409. mlflow/store/artifact/sftp_artifact_repo.py +141 -0
  410. mlflow/store/artifact/uc_volume_artifact_repo.py +76 -0
  411. mlflow/store/artifact/unity_catalog_models_artifact_repo.py +168 -0
  412. mlflow/store/artifact/unity_catalog_oss_models_artifact_repo.py +168 -0
  413. mlflow/store/artifact/utils/__init__.py +0 -0
  414. mlflow/store/artifact/utils/models.py +148 -0
  415. mlflow/store/db/__init__.py +0 -0
  416. mlflow/store/db/base_sql_model.py +3 -0
  417. mlflow/store/db/db_types.py +10 -0
  418. mlflow/store/db/utils.py +314 -0
  419. mlflow/store/db_migrations/__init__.py +0 -0
  420. mlflow/store/db_migrations/alembic.ini +74 -0
  421. mlflow/store/db_migrations/env.py +84 -0
  422. mlflow/store/db_migrations/versions/0584bdc529eb_add_cascading_deletion_to_datasets_from_experiments.py +88 -0
  423. mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +49 -0
  424. mlflow/store/db_migrations/versions/0c779009ac13_add_deleted_time_field_to_runs_table.py +24 -0
  425. mlflow/store/db_migrations/versions/181f10493468_allow_nulls_for_metric_values.py +35 -0
  426. mlflow/store/db_migrations/versions/27a6a02d2cf1_add_model_version_tags_table.py +38 -0
  427. mlflow/store/db_migrations/versions/2b4d017a5e9b_add_model_registry_tables_to_db.py +77 -0
  428. mlflow/store/db_migrations/versions/2d6e25af4d3e_increase_max_param_val_length.py +33 -0
  429. mlflow/store/db_migrations/versions/3500859a5d39_add_model_aliases_table.py +50 -0
  430. mlflow/store/db_migrations/versions/39d1c3be5f05_add_is_nan_constraint_for_metrics_tables_if_necessary.py +41 -0
  431. mlflow/store/db_migrations/versions/400f98739977_add_logged_model_tables.py +123 -0
  432. mlflow/store/db_migrations/versions/4465047574b1_increase_max_dataset_schema_size.py +38 -0
  433. mlflow/store/db_migrations/versions/451aebb31d03_add_metric_step.py +35 -0
  434. mlflow/store/db_migrations/versions/5b0e9adcef9c_add_cascade_deletion_to_trace_tables_fk.py +40 -0
  435. mlflow/store/db_migrations/versions/6953534de441_add_step_to_inputs_table.py +25 -0
  436. mlflow/store/db_migrations/versions/728d730b5ebd_add_registered_model_tags_table.py +38 -0
  437. mlflow/store/db_migrations/versions/7ac759974ad8_update_run_tags_with_larger_limit.py +36 -0
  438. mlflow/store/db_migrations/versions/7f2a7d5fae7d_add_datasets_inputs_input_tags_tables.py +82 -0
  439. mlflow/store/db_migrations/versions/84291f40a231_add_run_link_to_model_version.py +26 -0
  440. mlflow/store/db_migrations/versions/867495a8f9d4_add_trace_tables.py +90 -0
  441. mlflow/store/db_migrations/versions/89d4b8295536_create_latest_metrics_table.py +169 -0
  442. mlflow/store/db_migrations/versions/90e64c465722_migrate_user_column_to_tags.py +64 -0
  443. mlflow/store/db_migrations/versions/97727af70f4d_creation_time_last_update_time_experiments.py +25 -0
  444. mlflow/store/db_migrations/versions/__init__.py +0 -0
  445. mlflow/store/db_migrations/versions/a8c4a736bde6_allow_nulls_for_run_id.py +27 -0
  446. mlflow/store/db_migrations/versions/acf3f17fdcc7_add_storage_location_field_to_model_.py +29 -0
  447. mlflow/store/db_migrations/versions/bd07f7e963c5_create_index_on_run_uuid.py +26 -0
  448. mlflow/store/db_migrations/versions/bda7b8c39065_increase_model_version_tag_value_limit.py +38 -0
  449. mlflow/store/db_migrations/versions/c48cb773bb87_reset_default_value_for_is_nan_in_metrics_table_for_mysql.py +41 -0
  450. mlflow/store/db_migrations/versions/cbc13b556ace_add_v3_trace_schema_columns.py +31 -0
  451. mlflow/store/db_migrations/versions/cc1f77228345_change_param_value_length_to_500.py +34 -0
  452. mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +78 -0
  453. mlflow/store/db_migrations/versions/df50e92ffc5e_add_experiment_tags_table.py +38 -0
  454. mlflow/store/db_migrations/versions/f5a4f2784254_increase_run_tag_value_limit.py +36 -0
  455. mlflow/store/entities/__init__.py +3 -0
  456. mlflow/store/entities/paged_list.py +18 -0
  457. mlflow/store/model_registry/__init__.py +10 -0
  458. mlflow/store/model_registry/abstract_store.py +1081 -0
  459. mlflow/store/model_registry/base_rest_store.py +44 -0
  460. mlflow/store/model_registry/databricks_workspace_model_registry_rest_store.py +37 -0
  461. mlflow/store/model_registry/dbmodels/__init__.py +0 -0
  462. mlflow/store/model_registry/dbmodels/models.py +206 -0
  463. mlflow/store/model_registry/file_store.py +1091 -0
  464. mlflow/store/model_registry/rest_store.py +481 -0
  465. mlflow/store/model_registry/sqlalchemy_store.py +1286 -0
  466. mlflow/store/tracking/__init__.py +23 -0
  467. mlflow/store/tracking/abstract_store.py +816 -0
  468. mlflow/store/tracking/dbmodels/__init__.py +0 -0
  469. mlflow/store/tracking/dbmodels/initial_models.py +243 -0
  470. mlflow/store/tracking/dbmodels/models.py +1073 -0
  471. mlflow/store/tracking/file_store.py +2438 -0
  472. mlflow/store/tracking/postgres_managed_identity.py +146 -0
  473. mlflow/store/tracking/rest_store.py +1131 -0
  474. mlflow/store/tracking/sqlalchemy_store.py +2785 -0
  475. mlflow/system_metrics/__init__.py +61 -0
  476. mlflow/system_metrics/metrics/__init__.py +0 -0
  477. mlflow/system_metrics/metrics/base_metrics_monitor.py +32 -0
  478. mlflow/system_metrics/metrics/cpu_monitor.py +23 -0
  479. mlflow/system_metrics/metrics/disk_monitor.py +21 -0
  480. mlflow/system_metrics/metrics/gpu_monitor.py +71 -0
  481. mlflow/system_metrics/metrics/network_monitor.py +34 -0
  482. mlflow/system_metrics/metrics/rocm_monitor.py +123 -0
  483. mlflow/system_metrics/system_metrics_monitor.py +198 -0
  484. mlflow/tracing/__init__.py +16 -0
  485. mlflow/tracing/assessment.py +356 -0
  486. mlflow/tracing/client.py +531 -0
  487. mlflow/tracing/config.py +125 -0
  488. mlflow/tracing/constant.py +105 -0
  489. mlflow/tracing/destination.py +81 -0
  490. mlflow/tracing/display/__init__.py +40 -0
  491. mlflow/tracing/display/display_handler.py +196 -0
  492. mlflow/tracing/export/async_export_queue.py +186 -0
  493. mlflow/tracing/export/inference_table.py +138 -0
  494. mlflow/tracing/export/mlflow_v3.py +137 -0
  495. mlflow/tracing/export/utils.py +70 -0
  496. mlflow/tracing/fluent.py +1417 -0
  497. mlflow/tracing/processor/base_mlflow.py +199 -0
  498. mlflow/tracing/processor/inference_table.py +175 -0
  499. mlflow/tracing/processor/mlflow_v3.py +47 -0
  500. mlflow/tracing/processor/otel.py +73 -0
  501. mlflow/tracing/provider.py +487 -0
  502. mlflow/tracing/trace_manager.py +200 -0
  503. mlflow/tracing/utils/__init__.py +616 -0
  504. mlflow/tracing/utils/artifact_utils.py +28 -0
  505. mlflow/tracing/utils/copy.py +55 -0
  506. mlflow/tracing/utils/environment.py +55 -0
  507. mlflow/tracing/utils/exception.py +21 -0
  508. mlflow/tracing/utils/once.py +35 -0
  509. mlflow/tracing/utils/otlp.py +63 -0
  510. mlflow/tracing/utils/processor.py +54 -0
  511. mlflow/tracing/utils/search.py +292 -0
  512. mlflow/tracing/utils/timeout.py +250 -0
  513. mlflow/tracing/utils/token.py +19 -0
  514. mlflow/tracing/utils/truncation.py +124 -0
  515. mlflow/tracing/utils/warning.py +76 -0
  516. mlflow/tracking/__init__.py +39 -0
  517. mlflow/tracking/_model_registry/__init__.py +1 -0
  518. mlflow/tracking/_model_registry/client.py +764 -0
  519. mlflow/tracking/_model_registry/fluent.py +853 -0
  520. mlflow/tracking/_model_registry/registry.py +67 -0
  521. mlflow/tracking/_model_registry/utils.py +251 -0
  522. mlflow/tracking/_tracking_service/__init__.py +0 -0
  523. mlflow/tracking/_tracking_service/client.py +883 -0
  524. mlflow/tracking/_tracking_service/registry.py +56 -0
  525. mlflow/tracking/_tracking_service/utils.py +275 -0
  526. mlflow/tracking/artifact_utils.py +179 -0
  527. mlflow/tracking/client.py +5900 -0
  528. mlflow/tracking/context/__init__.py +0 -0
  529. mlflow/tracking/context/abstract_context.py +35 -0
  530. mlflow/tracking/context/databricks_cluster_context.py +15 -0
  531. mlflow/tracking/context/databricks_command_context.py +15 -0
  532. mlflow/tracking/context/databricks_job_context.py +49 -0
  533. mlflow/tracking/context/databricks_notebook_context.py +41 -0
  534. mlflow/tracking/context/databricks_repo_context.py +43 -0
  535. mlflow/tracking/context/default_context.py +51 -0
  536. mlflow/tracking/context/git_context.py +32 -0
  537. mlflow/tracking/context/registry.py +98 -0
  538. mlflow/tracking/context/system_environment_context.py +15 -0
  539. mlflow/tracking/default_experiment/__init__.py +1 -0
  540. mlflow/tracking/default_experiment/abstract_context.py +43 -0
  541. mlflow/tracking/default_experiment/databricks_notebook_experiment_provider.py +44 -0
  542. mlflow/tracking/default_experiment/registry.py +75 -0
  543. mlflow/tracking/fluent.py +3595 -0
  544. mlflow/tracking/metric_value_conversion_utils.py +93 -0
  545. mlflow/tracking/multimedia.py +206 -0
  546. mlflow/tracking/registry.py +86 -0
  547. mlflow/tracking/request_auth/__init__.py +0 -0
  548. mlflow/tracking/request_auth/abstract_request_auth_provider.py +34 -0
  549. mlflow/tracking/request_auth/registry.py +60 -0
  550. mlflow/tracking/request_header/__init__.py +0 -0
  551. mlflow/tracking/request_header/abstract_request_header_provider.py +36 -0
  552. mlflow/tracking/request_header/databricks_request_header_provider.py +38 -0
  553. mlflow/tracking/request_header/default_request_header_provider.py +17 -0
  554. mlflow/tracking/request_header/registry.py +79 -0
  555. mlflow/transformers/__init__.py +2982 -0
  556. mlflow/transformers/flavor_config.py +258 -0
  557. mlflow/transformers/hub_utils.py +83 -0
  558. mlflow/transformers/llm_inference_utils.py +468 -0
  559. mlflow/transformers/model_io.py +301 -0
  560. mlflow/transformers/peft.py +51 -0
  561. mlflow/transformers/signature.py +183 -0
  562. mlflow/transformers/torch_utils.py +55 -0
  563. mlflow/types/__init__.py +21 -0
  564. mlflow/types/agent.py +270 -0
  565. mlflow/types/chat.py +240 -0
  566. mlflow/types/llm.py +935 -0
  567. mlflow/types/responses.py +139 -0
  568. mlflow/types/responses_helpers.py +416 -0
  569. mlflow/types/schema.py +1505 -0
  570. mlflow/types/type_hints.py +647 -0
  571. mlflow/types/utils.py +753 -0
  572. mlflow/utils/__init__.py +283 -0
  573. mlflow/utils/_capture_modules.py +256 -0
  574. mlflow/utils/_capture_transformers_modules.py +75 -0
  575. mlflow/utils/_spark_utils.py +201 -0
  576. mlflow/utils/_unity_catalog_oss_utils.py +97 -0
  577. mlflow/utils/_unity_catalog_utils.py +479 -0
  578. mlflow/utils/annotations.py +218 -0
  579. mlflow/utils/arguments_utils.py +16 -0
  580. mlflow/utils/async_logging/__init__.py +1 -0
  581. mlflow/utils/async_logging/async_artifacts_logging_queue.py +258 -0
  582. mlflow/utils/async_logging/async_logging_queue.py +366 -0
  583. mlflow/utils/async_logging/run_artifact.py +38 -0
  584. mlflow/utils/async_logging/run_batch.py +58 -0
  585. mlflow/utils/async_logging/run_operations.py +49 -0
  586. mlflow/utils/autologging_utils/__init__.py +737 -0
  587. mlflow/utils/autologging_utils/client.py +432 -0
  588. mlflow/utils/autologging_utils/config.py +33 -0
  589. mlflow/utils/autologging_utils/events.py +294 -0
  590. mlflow/utils/autologging_utils/logging_and_warnings.py +328 -0
  591. mlflow/utils/autologging_utils/metrics_queue.py +71 -0
  592. mlflow/utils/autologging_utils/safety.py +1104 -0
  593. mlflow/utils/autologging_utils/versioning.py +95 -0
  594. mlflow/utils/checkpoint_utils.py +206 -0
  595. mlflow/utils/class_utils.py +6 -0
  596. mlflow/utils/cli_args.py +257 -0
  597. mlflow/utils/conda.py +354 -0
  598. mlflow/utils/credentials.py +231 -0
  599. mlflow/utils/data_utils.py +17 -0
  600. mlflow/utils/databricks_utils.py +1436 -0
  601. mlflow/utils/docstring_utils.py +477 -0
  602. mlflow/utils/doctor.py +133 -0
  603. mlflow/utils/download_cloud_file_chunk.py +43 -0
  604. mlflow/utils/env_manager.py +16 -0
  605. mlflow/utils/env_pack.py +131 -0
  606. mlflow/utils/environment.py +1009 -0
  607. mlflow/utils/exception_utils.py +14 -0
  608. mlflow/utils/file_utils.py +978 -0
  609. mlflow/utils/git_utils.py +77 -0
  610. mlflow/utils/gorilla.py +797 -0
  611. mlflow/utils/import_hooks/__init__.py +363 -0
  612. mlflow/utils/lazy_load.py +51 -0
  613. mlflow/utils/logging_utils.py +168 -0
  614. mlflow/utils/mime_type_utils.py +58 -0
  615. mlflow/utils/mlflow_tags.py +103 -0
  616. mlflow/utils/model_utils.py +486 -0
  617. mlflow/utils/name_utils.py +346 -0
  618. mlflow/utils/nfs_on_spark.py +62 -0
  619. mlflow/utils/openai_utils.py +164 -0
  620. mlflow/utils/os.py +12 -0
  621. mlflow/utils/oss_registry_utils.py +29 -0
  622. mlflow/utils/plugins.py +17 -0
  623. mlflow/utils/process.py +182 -0
  624. mlflow/utils/promptlab_utils.py +146 -0
  625. mlflow/utils/proto_json_utils.py +743 -0
  626. mlflow/utils/pydantic_utils.py +54 -0
  627. mlflow/utils/request_utils.py +279 -0
  628. mlflow/utils/requirements_utils.py +704 -0
  629. mlflow/utils/rest_utils.py +673 -0
  630. mlflow/utils/search_logged_model_utils.py +127 -0
  631. mlflow/utils/search_utils.py +2111 -0
  632. mlflow/utils/secure_loading.py +221 -0
  633. mlflow/utils/security_validation.py +384 -0
  634. mlflow/utils/server_cli_utils.py +61 -0
  635. mlflow/utils/spark_utils.py +15 -0
  636. mlflow/utils/string_utils.py +138 -0
  637. mlflow/utils/thread_utils.py +63 -0
  638. mlflow/utils/time.py +54 -0
  639. mlflow/utils/timeout.py +42 -0
  640. mlflow/utils/uri.py +572 -0
  641. mlflow/utils/validation.py +662 -0
  642. mlflow/utils/virtualenv.py +458 -0
  643. mlflow/utils/warnings_utils.py +25 -0
  644. mlflow/utils/yaml_utils.py +179 -0
  645. mlflow/version.py +24 -0
@@ -0,0 +1,468 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ import uuid
5
+ from typing import TYPE_CHECKING, Any, Optional, Union
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ from mlflow.exceptions import MlflowException
11
+ from mlflow.models import ModelSignature
12
+ from mlflow.protos.databricks_pb2 import BAD_REQUEST, INVALID_PARAMETER_VALUE
13
+ from mlflow.transformers.flavor_config import FlavorKey
14
+ from mlflow.types.llm import (
15
+ CHAT_MODEL_INPUT_SCHEMA,
16
+ CHAT_MODEL_OUTPUT_SCHEMA,
17
+ COMPLETIONS_MODEL_INPUT_SCHEMA,
18
+ COMPLETIONS_MODEL_OUTPUT_SCHEMA,
19
+ EMBEDDING_MODEL_INPUT_SCHEMA,
20
+ EMBEDDING_MODEL_OUTPUT_SCHEMA,
21
+ )
22
+
23
+ if TYPE_CHECKING:
24
+ import torch
25
+
26
+ _LLM_INFERENCE_TASK_KEY = "inference_task"
27
+ # The LLM inference task is saved as "task" in the metadata for forward compatibility with
28
+ # future Databricks Provisioned Throughput support of more model architectures for inference.
29
+ _METADATA_LLM_INFERENCE_TASK_KEY = "task"
30
+
31
+ _LLM_INFERENCE_TASK_PREFIX = "llm/v1"
32
+ _LLM_INFERENCE_TASK_COMPLETIONS = f"{_LLM_INFERENCE_TASK_PREFIX}/completions"
33
+ _LLM_INFERENCE_TASK_CHAT = f"{_LLM_INFERENCE_TASK_PREFIX}/chat"
34
+ _LLM_INFERENCE_TASK_EMBEDDING = f"{_LLM_INFERENCE_TASK_PREFIX}/embeddings"
35
+
36
+ _LLM_V1_EMBEDDING_INPUT_KEY = "input"
37
+
38
+
39
+ _LLM_INFERENCE_OBJECT_NAME = {
40
+ _LLM_INFERENCE_TASK_COMPLETIONS: "text_completion",
41
+ _LLM_INFERENCE_TASK_CHAT: "chat.completion",
42
+ }
43
+
44
+ _SUPPORTED_LLM_INFERENCE_TASK_TYPES_BY_PIPELINE_TASK = {
45
+ "text-generation": [_LLM_INFERENCE_TASK_COMPLETIONS, _LLM_INFERENCE_TASK_CHAT],
46
+ "feature-extraction": [_LLM_INFERENCE_TASK_EMBEDDING],
47
+ }
48
+
49
+ _SIGNATURE_FOR_LLM_INFERENCE_TASK = {
50
+ _LLM_INFERENCE_TASK_CHAT: ModelSignature(
51
+ inputs=CHAT_MODEL_INPUT_SCHEMA, outputs=CHAT_MODEL_OUTPUT_SCHEMA
52
+ ),
53
+ _LLM_INFERENCE_TASK_COMPLETIONS: ModelSignature(
54
+ inputs=COMPLETIONS_MODEL_INPUT_SCHEMA, outputs=COMPLETIONS_MODEL_OUTPUT_SCHEMA
55
+ ),
56
+ _LLM_INFERENCE_TASK_EMBEDDING: ModelSignature(
57
+ inputs=EMBEDDING_MODEL_INPUT_SCHEMA, outputs=EMBEDDING_MODEL_OUTPUT_SCHEMA
58
+ ),
59
+ }
60
+
61
+ _LLM_INFERENCE_TASK_TO_DATA_FIELD = {
62
+ _LLM_INFERENCE_TASK_CHAT: "messages",
63
+ _LLM_INFERENCE_TASK_COMPLETIONS: "prompt",
64
+ }
65
+
66
+
67
+ def infer_signature_from_llm_inference_task(
68
+ inference_task: str, signature: Optional[ModelSignature] = None
69
+ ) -> ModelSignature:
70
+ """
71
+ Infers the signature according to the MLflow inference task.
72
+ Raises exception if a signature is given.
73
+ """
74
+ inferred_signature = _SIGNATURE_FOR_LLM_INFERENCE_TASK[inference_task]
75
+
76
+ if signature is not None and signature != inferred_signature:
77
+ raise MlflowException(
78
+ f"When `task` is specified as `{inference_task}`, the signature would "
79
+ "be set by MLflow. Please do not set the signature."
80
+ )
81
+ return inferred_signature
82
+
83
+
84
+ def convert_messages_to_prompt(messages: list[dict[str, Any]], tokenizer) -> str:
85
+ """For the Chat inference task, apply chat template to messages to create prompt.
86
+
87
+ Args:
88
+ messages: List of message e.g. [{"role": user, "content": xxx}, ...]
89
+ tokenizer: The tokenizer object used for inference.
90
+
91
+ Returns:
92
+ The prompt string contains the messages.
93
+ """
94
+ if not (isinstance(messages, list) and all(isinstance(msg, dict) for msg in messages)):
95
+ raise MlflowException(
96
+ f"Input messages should be list of dictionaries, but got: {type(messages)}.",
97
+ error_code=INVALID_PARAMETER_VALUE,
98
+ )
99
+
100
+ try:
101
+ return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
102
+ except Exception as e:
103
+ raise MlflowException(f"Failed to apply chat template: {e}")
104
+
105
+
106
+ def preprocess_llm_inference_input(
107
+ data: Union[pd.DataFrame, dict[str, Any]],
108
+ params: Optional[dict[str, Any]] = None,
109
+ flavor_config: Optional[dict[str, Any]] = None,
110
+ ) -> tuple[list[Any], dict[str, Any]]:
111
+ """
112
+ When a MLflow inference task is given, return updated `data` and `params` that
113
+ - Extract the parameters from the input data (from the first row if passed multiple rows)
114
+ - Replace OpenAI specific parameters with Hugging Face specific parameters, in particular
115
+ - `max_tokens` with `max_new_tokens`
116
+ - `stop` with `stopping_criteria`
117
+
118
+ Args:
119
+ data: Input data for the LLM inference task. Either a pandas DataFrame (after signature
120
+ enforcement) or a raw dictionary payload.
121
+ params: Optional dictionary of parameters.
122
+ flavor_config: Optional dictionary of flavor configuration.
123
+ """
124
+ if isinstance(data, pd.DataFrame):
125
+ # Pandas convert None to np.nan internally, which is not preferred
126
+ data = data.replace(np.nan, None).to_dict(orient="list")
127
+ elif isinstance(data, dict):
128
+ # Convert single value to list for consistency with DataFrame
129
+ data = {k: [v] for k, v in data.items()}
130
+ else:
131
+ raise MlflowException(
132
+ "Input data for a Transformer model logged with `llm/v1/chat` or `llm/v1/completions`"
133
+ f"task is expected to be a pandas DataFrame or a dictionary, but got: {type(data)}.",
134
+ error_code=BAD_REQUEST,
135
+ )
136
+
137
+ flavor_config = flavor_config or {}
138
+ params = params or {}
139
+
140
+ # Extract list of input data (prompt, messages) to LLM
141
+ task = flavor_config[_LLM_INFERENCE_TASK_KEY]
142
+ input_col = _LLM_INFERENCE_TASK_TO_DATA_FIELD.get(task)
143
+ if input_col not in data:
144
+ raise MlflowException(
145
+ f"Transformer model saved with `{task}` task excepts `{input_col}`"
146
+ "to be passed as input data.",
147
+ error_code=BAD_REQUEST,
148
+ )
149
+ update_data = data.pop(input_col)
150
+
151
+ # The rest of fields in input payload should goes to params and override default ones
152
+ params_in_data = {k: v[0] for k, v in data.items() if v[0] is not None}
153
+ params = {**params, **params_in_data}
154
+
155
+ if max_tokens := params.pop("max_tokens", None):
156
+ params["max_new_tokens"] = max_tokens
157
+ if stop := params.pop("stop", None):
158
+ params["stopping_criteria"] = _get_stopping_criteria(
159
+ stop,
160
+ flavor_config.get(FlavorKey.MODEL_NAME),
161
+ )
162
+ return update_data, params
163
+
164
+
165
+ def _get_stopping_criteria(stop: Optional[Union[str, list[str]]], model_name: Optional[str] = None):
166
+ """Return a list of Hugging Face stopping criteria objects for the given stop sequences."""
167
+ from transformers import AutoTokenizer, StoppingCriteria
168
+
169
+ if stop is None or model_name is None:
170
+ return None
171
+
172
+ if isinstance(stop, str):
173
+ stop = [stop]
174
+
175
+ # To tokenize the stop sequences for stopping criteria, we need to use the slow tokenizer
176
+ # for matching the actual tokens, according to https://github.com/huggingface/transformers/issues/27704
177
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
178
+
179
+ def _get_slow_token_ids(seq: str):
180
+ return tokenizer.convert_tokens_to_ids(tokenizer._tokenize(seq))
181
+
182
+ # NB: We need to define this as an inner class to avoid importing
183
+ # transformers in the global scope that confuses autologging
184
+ class _StopSequenceMatchCriteria(StoppingCriteria):
185
+ def __init__(self, stop_sequence_ids):
186
+ self.stop_sequence_ids = stop_sequence_ids
187
+
188
+ def __call__(
189
+ self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
190
+ ) -> bool:
191
+ last_ids = input_ids[:, -len(self.stop_sequence_ids) :].tolist()
192
+ return self.stop_sequence_ids in last_ids
193
+
194
+ stopping_criteria = []
195
+ for stop_sequence in stop:
196
+ # Add stopping criteria for both with and without space, such as "stopword" and " stopword"
197
+ token_ids = _get_slow_token_ids(stop_sequence)
198
+ token_ids_with_space = _get_slow_token_ids(" " + stop_sequence)
199
+ stopping_criteria += [
200
+ _StopSequenceMatchCriteria(token_ids),
201
+ _StopSequenceMatchCriteria(token_ids_with_space),
202
+ ]
203
+
204
+ return stopping_criteria
205
+
206
+
207
+ def postprocess_output_for_llm_inference_task(
208
+ data: list[str],
209
+ output_tensors: list[list[int]],
210
+ pipeline,
211
+ flavor_config,
212
+ model_config,
213
+ inference_task,
214
+ ):
215
+ """
216
+ Wrap output data with usage information according to the MLflow inference task.
217
+
218
+ Example:
219
+ .. code-block:: python
220
+ data = ["How to learn Python in 3 weeks?"]
221
+ output_tensors = [
222
+ [
223
+ 1128,
224
+ 304,
225
+ ...,
226
+ 29879,
227
+ ]
228
+ ]
229
+ output_dicts = postprocess_output_for_llm_inference_task(data, output_tensors, **kwargs)
230
+
231
+ assert output_dicts == [
232
+ {
233
+ "id": "e4f3b3e3-3b3e-4b3e-8b3e-3b3e4b3e8b3e",
234
+ "object": "text_completion",
235
+ "created": 1707466970,
236
+ "model": "loaded_model_name",
237
+ "choices": [
238
+ {
239
+ "index": 0,
240
+ "finish_reason": "length",
241
+ "text": "1. Start with a beginner's",
242
+ }
243
+ ],
244
+ "usage": {"prompt_tokens": 9, "completion_tokens": 10, "total_tokens": 19},
245
+ }
246
+ ]
247
+
248
+ Args:
249
+ data: List of text input prompts.
250
+ output_tensors: List of output tensors that contain the generated tokens (including
251
+ the prompt tokens) corresponding to each input prompt.
252
+ pipeline: The pipeline object used for inference.
253
+ flavor_config: The flavor configuration dictionary for the model.
254
+ model_config: The model configuration dictionary used for inference.
255
+ inference_task: The MLflow inference task.
256
+
257
+ Returns:
258
+ List of dictionaries containing the output text and usage information for each input prompt.
259
+ """
260
+ output_dicts = []
261
+ for input_data, output_tensor in zip(data, output_tensors):
262
+ output_dict = _get_output_and_usage_from_tensor(
263
+ input_data, output_tensor, pipeline, flavor_config, model_config, inference_task
264
+ )
265
+ output_dicts.append(output_dict)
266
+
267
+ return output_dicts
268
+
269
+
270
+ def _get_output_and_usage_from_tensor(
271
+ prompt: str, output_tensor: list[int], pipeline, flavor_config, model_config, inference_task
272
+ ):
273
+ """
274
+ Decode the output tensor and return the output text and usage information as a dictionary
275
+ to make the output in OpenAI compatible format.
276
+ """
277
+ usage = _get_token_usage(prompt, output_tensor, pipeline, model_config)
278
+ completions_text = _get_completions_text(prompt, output_tensor, pipeline)
279
+ finish_reason = _get_finish_reason(
280
+ usage["total_tokens"], usage["completion_tokens"], model_config
281
+ )
282
+
283
+ output_dict = {
284
+ "id": str(uuid.uuid4()),
285
+ "object": _LLM_INFERENCE_OBJECT_NAME[inference_task],
286
+ "created": int(time.time()),
287
+ "model": flavor_config.get("source_model_name", ""),
288
+ "usage": usage,
289
+ }
290
+
291
+ completion_choice = {
292
+ "index": 0,
293
+ "finish_reason": finish_reason,
294
+ }
295
+
296
+ if inference_task == _LLM_INFERENCE_TASK_COMPLETIONS:
297
+ completion_choice["text"] = completions_text
298
+ elif inference_task == _LLM_INFERENCE_TASK_CHAT:
299
+ completion_choice["message"] = {"role": "assistant", "content": completions_text}
300
+
301
+ output_dict["choices"] = [completion_choice]
302
+
303
+ return output_dict
304
+
305
+
306
+ def _get_completions_text(prompt: str, output_tensor: list[int], pipeline):
307
+ """Decode generated text from output tensor and remove the input prompt."""
308
+ generated_text = pipeline.tokenizer.decode(
309
+ output_tensor,
310
+ skip_special_tokens=True,
311
+ clean_up_tokenization_spaces=True,
312
+ )
313
+
314
+ # In order to correctly remove the prompt tokens from the decoded tokens,
315
+ # we need to acquire the length of the prompt without special tokens
316
+ prompt_ids_without_special_tokens = pipeline.tokenizer(
317
+ prompt, return_tensors=pipeline.framework, add_special_tokens=False
318
+ )["input_ids"][0]
319
+
320
+ prompt_length = len(
321
+ pipeline.tokenizer.decode(
322
+ prompt_ids_without_special_tokens,
323
+ skip_special_tokens=True,
324
+ clean_up_tokenization_spaces=True,
325
+ )
326
+ )
327
+
328
+ return generated_text[prompt_length:].lstrip()
329
+
330
+
331
+ def _get_token_usage(prompt: str, output_tensor: list[int], pipeline, model_config):
332
+ """Return the prompt tokens, completion tokens, and the total tokens as dict."""
333
+ inputs = pipeline.tokenizer(
334
+ prompt,
335
+ return_tensors=pipeline.framework,
336
+ max_length=model_config.get("max_length", None),
337
+ add_special_tokens=False,
338
+ )
339
+
340
+ prompt_tokens = inputs["input_ids"].shape[-1]
341
+ total_tokens = len(output_tensor)
342
+ completions_tokens = total_tokens - prompt_tokens
343
+
344
+ return {
345
+ "prompt_tokens": prompt_tokens,
346
+ "completion_tokens": completions_tokens,
347
+ "total_tokens": total_tokens,
348
+ }
349
+
350
+
351
+ def _get_finish_reason(total_tokens: int, completion_tokens: int, model_config):
352
+ """Determine the reason that the text generation finished."""
353
+ finish_reason = "stop"
354
+
355
+ if total_tokens > model_config.get(
356
+ "max_length", float("inf")
357
+ ) or completion_tokens == model_config.get("max_new_tokens", float("inf")):
358
+ finish_reason = "length"
359
+
360
+ return finish_reason
361
+
362
+
363
+ def _get_default_task_for_llm_inference_task(llm_inference_task: Optional[str]) -> Optional[str]:
364
+ """
365
+ Get corresponding original Transformers task for the given LLM inference task.
366
+
367
+ NB: This assumes there is only one original Transformers task for each LLM inference
368
+ task, which might not be true in the future.
369
+ """
370
+ for task, llm_tasks in _SUPPORTED_LLM_INFERENCE_TASK_TYPES_BY_PIPELINE_TASK.items():
371
+ if llm_inference_task in llm_tasks:
372
+ return task
373
+ return None
374
+
375
+
376
+ def preprocess_llm_embedding_params(
377
+ data: Union[pd.DataFrame, dict[str, Any]],
378
+ ) -> tuple[list[str], dict[str, Any]]:
379
+ """
380
+ When `llm/v1/embeddings` task is given, extract the input data (with "input" key) and
381
+ parameters, and format the input data into the unified format for easier downstream handling.
382
+
383
+ The handling is more complicated than other LLM inference tasks because the embedding endpoint
384
+ accepts heterogeneous input - both string and list of strings as input. Also we don't enforce
385
+ the input schema always, so there are 4 possible input types:
386
+ (1) Pandas DataFrame with string column
387
+ (2) Pandas DataFrame with list of strings column
388
+ (3) Dictionary with string value
389
+ (4) Dictionary with list of strings value
390
+ In all cases, the returned input data will be a list of strings.
391
+
392
+ Args:
393
+ data: Input data for the embedding task.
394
+
395
+ Returns:
396
+ Tuple of input data and parameters dictionary.
397
+ """
398
+ if isinstance(data, pd.DataFrame):
399
+ params = {}
400
+ for col in data.columns:
401
+ if col == _LLM_V1_EMBEDDING_INPUT_KEY:
402
+ input_data = data[col].to_list()
403
+ if isinstance(input_data[0], list):
404
+ input_data = input_data[0]
405
+ else:
406
+ params[col] = data[col].tolist()[0]
407
+ else:
408
+ # NB: Input schema is not enforced for the embedding task because of the heterogeneous
409
+ # input type, so we have to cast the input data into unified format here.
410
+ input_data = data.get(_LLM_V1_EMBEDDING_INPUT_KEY)
411
+ if isinstance(input, str):
412
+ input_data = [input_data]
413
+ params = {k: v for k, v in data.items() if k != _LLM_V1_EMBEDDING_INPUT_KEY}
414
+
415
+ return input_data, params
416
+
417
+
418
+ def postprocess_output_for_llm_v1_embedding_task(
419
+ input_prompts: list[str],
420
+ output_tensors: list[list[float]],
421
+ tokenizer,
422
+ ):
423
+ """
424
+ Wrap output data with usage information.
425
+
426
+ Examples:
427
+ .. code-block:: python
428
+ input_prompt = ["hello world and hello mlflow"]
429
+ output_embedding = [0.47137904, 0.4669448, ..., 0.69726706]
430
+ output_dicts = postprocess_output_for_llm_v1_embedding_task(
431
+ input_prompt, output_embedding
432
+ )
433
+ assert output_dicts == [
434
+ {
435
+ "object": "list",
436
+ "data": [
437
+ {
438
+ "object": "embedding",
439
+ "index": 0,
440
+ "embedding": [0.47137904, 0.4669448, ..., 0.69726706],
441
+ }
442
+ ],
443
+ "usage": {"prompt_tokens": 8, "total_tokens": 8},
444
+ }
445
+ ]
446
+
447
+ Args:
448
+ input_prompts: text input prompts
449
+ output_tensors: List of output tensors that contain the generated embeddings
450
+ tokenizer: The tokenizer object used for inference.
451
+
452
+ Returns:
453
+ Dictionaries containing the output embedding and usage information for each
454
+ input prompt.
455
+ """
456
+ prompt_tokens = sum(len(tokenizer(prompt)["input_ids"]) for prompt in input_prompts)
457
+ return {
458
+ "object": "list",
459
+ "data": [
460
+ {
461
+ "object": "embedding",
462
+ "index": i,
463
+ "embedding": tensor,
464
+ }
465
+ for i, tensor in enumerate(output_tensors)
466
+ ],
467
+ "usage": {"prompt_tokens": prompt_tokens, "total_tokens": prompt_tokens},
468
+ }