llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.5.dist-info/RECORD +0 -625
  445. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -10,11 +10,20 @@ from abc import ABC, abstractmethod
10
10
  from collections.abc import AsyncIterator, Iterable
11
11
  from typing import Any
12
12
 
13
- from openai import NOT_GIVEN, AsyncOpenAI
13
+ from openai import AsyncOpenAI
14
14
  from pydantic import BaseModel, ConfigDict
15
15
 
16
- from llama_stack.apis.inference import (
16
+ from llama_stack.core.request_headers import NeedsRequestProviderData
17
+ from llama_stack.log import get_logger
18
+ from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
19
+ from llama_stack.providers.utils.inference.openai_compat import (
20
+ get_stream_options_for_telemetry,
21
+ prepare_openai_completion_params,
22
+ )
23
+ from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
24
+ from llama_stack_api import (
17
25
  Model,
26
+ ModelType,
18
27
  OpenAIChatCompletion,
19
28
  OpenAIChatCompletionChunk,
20
29
  OpenAIChatCompletionRequestWithExtraBody,
@@ -26,12 +35,6 @@ from llama_stack.apis.inference import (
26
35
  OpenAIEmbeddingUsage,
27
36
  OpenAIMessageParam,
28
37
  )
29
- from llama_stack.apis.models import ModelType
30
- from llama_stack.core.request_headers import NeedsRequestProviderData
31
- from llama_stack.log import get_logger
32
- from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
33
- from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
34
- from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
35
38
 
36
39
  logger = get_logger(name=__name__, category="providers::utils")
37
40
 
@@ -47,7 +50,9 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
47
50
  The behavior of this class can be customized by child classes in the following ways:
48
51
  - overwrite_completion_id: If True, overwrites the 'id' field in OpenAI responses
49
52
  - download_images: If True, downloads images and converts to base64 for providers that require it
53
+ - supports_stream_options: If False, disables stream_options injection for providers that don't support it
50
54
  - embedding_model_metadata: A dictionary mapping model IDs to their embedding metadata
55
+ - construct_model_from_identifier: Method to construct a Model instance corresponding to the given identifier
51
56
  - provider_data_api_key_field: Optional field name in provider data to look for API key
52
57
  - list_provider_model_ids: Method to list available models from the provider
53
58
  - get_extra_client_params: Method to provide extra parameters to the AsyncOpenAI client
@@ -73,6 +78,10 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
73
78
  # for providers that require base64 encoded images instead of URLs.
74
79
  download_images: bool = False
75
80
 
81
+ # Allow subclasses to control whether the provider supports stream_options parameter
82
+ # Set to False for providers that don't support stream_options (e.g., Ollama, vLLM)
83
+ supports_stream_options: bool = True
84
+
76
85
  # Embedding model metadata for this provider
77
86
  # Can be set by subclasses or instances to provide embedding models
78
87
  # Format: {"model_id": {"embedding_dimension": 1536, "context_length": 8192}}
@@ -118,6 +127,30 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
118
127
  """
119
128
  return {}
120
129
 
130
+ def construct_model_from_identifier(self, identifier: str) -> Model:
131
+ """
132
+ Construct a Model instance corresponding to the given identifier
133
+
134
+ Child classes can override this to customize model typing/metadata.
135
+
136
+ :param identifier: The provider's model identifier
137
+ :return: A Model instance
138
+ """
139
+ if metadata := self.embedding_model_metadata.get(identifier):
140
+ return Model(
141
+ provider_id=self.__provider_id__, # type: ignore[attr-defined]
142
+ provider_resource_id=identifier,
143
+ identifier=identifier,
144
+ model_type=ModelType.embedding,
145
+ metadata=metadata,
146
+ )
147
+ return Model(
148
+ provider_id=self.__provider_id__, # type: ignore[attr-defined]
149
+ provider_resource_id=identifier,
150
+ identifier=identifier,
151
+ model_type=ModelType.llm,
152
+ )
153
+
121
154
  async def list_provider_model_ids(self) -> Iterable[str]:
122
155
  """
123
156
  List available models from the provider.
@@ -223,30 +256,33 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
223
256
  return model_obj.provider_resource_id
224
257
 
225
258
  async def _maybe_overwrite_id(self, resp: Any, stream: bool | None) -> Any:
226
- if not self.overwrite_completion_id:
227
- return resp
228
-
229
- new_id = f"cltsd-{uuid.uuid4()}"
230
259
  if stream:
260
+ new_id = f"cltsd-{uuid.uuid4()}" if self.overwrite_completion_id else None
231
261
 
232
262
  async def _gen():
233
263
  async for chunk in resp:
234
- chunk.id = new_id
264
+ if new_id:
265
+ chunk.id = new_id
235
266
  yield chunk
236
267
 
237
268
  return _gen()
238
269
  else:
239
- resp.id = new_id
270
+ if self.overwrite_completion_id:
271
+ resp.id = f"cltsd-{uuid.uuid4()}"
240
272
  return resp
241
273
 
242
274
  async def openai_completion(
243
275
  self,
244
276
  params: OpenAICompletionRequestWithExtraBody,
245
- ) -> OpenAICompletion:
277
+ ) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
246
278
  """
247
279
  Direct OpenAI completion API call.
248
280
  """
249
- # TODO: fix openai_completion to return type compatible with OpenAI's API response
281
+ # Inject stream_options when streaming and telemetry is active
282
+ stream_options = get_stream_options_for_telemetry(
283
+ params.stream_options, params.stream or False, self.supports_stream_options
284
+ )
285
+
250
286
  provider_model_id = await self._get_provider_model_id(params.model)
251
287
  self._validate_model_allowed(provider_model_id)
252
288
 
@@ -264,7 +300,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
264
300
  seed=params.seed,
265
301
  stop=params.stop,
266
302
  stream=params.stream,
267
- stream_options=params.stream_options,
303
+ stream_options=stream_options,
268
304
  temperature=params.temperature,
269
305
  top_p=params.top_p,
270
306
  user=params.user,
@@ -283,6 +319,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
283
319
  """
284
320
  Direct OpenAI chat completion API call.
285
321
  """
322
+ # Inject stream_options when streaming and telemetry is active
323
+ stream_options = get_stream_options_for_telemetry(
324
+ params.stream_options, params.stream or False, self.supports_stream_options
325
+ )
326
+
286
327
  provider_model_id = await self._get_provider_model_id(params.model)
287
328
  self._validate_model_allowed(provider_model_id)
288
329
 
@@ -323,7 +364,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
323
364
  seed=params.seed,
324
365
  stop=params.stop,
325
366
  stream=params.stream,
326
- stream_options=params.stream_options,
367
+ stream_options=stream_options,
327
368
  temperature=params.temperature,
328
369
  tool_choice=params.tool_choice,
329
370
  tools=params.tools,
@@ -353,17 +394,16 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
353
394
  request_params: dict[str, Any] = {
354
395
  "model": provider_model_id,
355
396
  "input": params.input,
356
- "encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN,
357
- "dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN,
358
- "user": params.user if params.user is not None else NOT_GIVEN,
359
397
  }
398
+ if params.encoding_format is not None:
399
+ request_params["encoding_format"] = params.encoding_format
400
+ if params.dimensions is not None:
401
+ request_params["dimensions"] = params.dimensions
402
+ if params.user is not None:
403
+ request_params["user"] = params.user
404
+ if params.model_extra:
405
+ request_params["extra_body"] = params.model_extra
360
406
 
361
- # Add extra_body if present
362
- extra_body = params.model_extra
363
- if extra_body:
364
- request_params["extra_body"] = extra_body
365
-
366
- # Call OpenAI embeddings API with properly typed parameters
367
407
  response = await self.client.embeddings.create(**request_params)
368
408
 
369
409
  data = []
@@ -439,21 +479,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
439
479
  if self.config.allowed_models is not None and provider_model_id not in self.config.allowed_models:
440
480
  logger.info(f"Skipping model {provider_model_id} as it is not in the allowed models list")
441
481
  continue
442
- if metadata := self.embedding_model_metadata.get(provider_model_id):
443
- model = Model(
444
- provider_id=self.__provider_id__, # type: ignore[attr-defined]
445
- provider_resource_id=provider_model_id,
446
- identifier=provider_model_id,
447
- model_type=ModelType.embedding,
448
- metadata=metadata,
449
- )
450
- else:
451
- model = Model(
452
- provider_id=self.__provider_id__, # type: ignore[attr-defined]
453
- provider_resource_id=provider_model_id,
454
- identifier=provider_model_id,
455
- model_type=ModelType.llm,
456
- )
482
+ model = self.construct_model_from_identifier(provider_model_id)
457
483
  self._model_cache[provider_model_id] = model
458
484
 
459
485
  return list(self._model_cache.values())
@@ -14,27 +14,6 @@ from typing import Any
14
14
  import httpx
15
15
  from PIL import Image as PIL_Image
16
16
 
17
- from llama_stack.apis.common.content_types import (
18
- ImageContentItem,
19
- InterleavedContent,
20
- InterleavedContentItem,
21
- TextContentItem,
22
- )
23
- from llama_stack.apis.inference import (
24
- ChatCompletionRequest,
25
- CompletionRequest,
26
- Message,
27
- OpenAIChatCompletionContentPartImageParam,
28
- OpenAIChatCompletionContentPartTextParam,
29
- OpenAIFile,
30
- ResponseFormat,
31
- ResponseFormatType,
32
- SystemMessage,
33
- SystemMessageBehavior,
34
- ToolChoice,
35
- ToolDefinition,
36
- UserMessage,
37
- )
38
17
  from llama_stack.log import get_logger
39
18
  from llama_stack.models.llama.datatypes import (
40
19
  RawContent,
@@ -42,33 +21,37 @@ from llama_stack.models.llama.datatypes import (
42
21
  RawMediaItem,
43
22
  RawMessage,
44
23
  RawTextItem,
45
- Role,
46
24
  StopReason,
25
+ ToolCall,
26
+ ToolDefinition,
47
27
  ToolPromptFormat,
48
28
  )
49
29
  from llama_stack.models.llama.llama3.chat_format import ChatFormat
50
- from llama_stack.models.llama.llama3.prompt_templates import (
51
- BuiltinToolGenerator,
52
- FunctionTagCustomToolGenerator,
53
- JsonCustomToolGenerator,
54
- PythonListCustomToolGenerator,
55
- SystemDefaultGenerator,
56
- )
57
30
  from llama_stack.models.llama.llama3.tokenizer import Tokenizer
58
- from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
59
- PythonListCustomToolGenerator as PythonListCustomToolGeneratorLlama4,
60
- )
61
31
  from llama_stack.models.llama.sku_list import resolve_model
62
32
  from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
63
- from llama_stack.providers.utils.inference import supported_inference_models
33
+ from llama_stack_api import (
34
+ CompletionRequest,
35
+ ImageContentItem,
36
+ InterleavedContent,
37
+ InterleavedContentItem,
38
+ OpenAIAssistantMessageParam,
39
+ OpenAIChatCompletionContentPartImageParam,
40
+ OpenAIChatCompletionContentPartTextParam,
41
+ OpenAIFile,
42
+ OpenAIMessageParam,
43
+ OpenAISystemMessageParam,
44
+ OpenAIToolMessageParam,
45
+ OpenAIUserMessageParam,
46
+ ResponseFormat,
47
+ ResponseFormatType,
48
+ TextContentItem,
49
+ ToolChoice,
50
+ )
64
51
 
65
52
  log = get_logger(name=__name__, category="providers::utils")
66
53
 
67
54
 
68
- class ChatCompletionRequestWithRawContent(ChatCompletionRequest):
69
- messages: list[RawMessage]
70
-
71
-
72
55
  class CompletionRequestWithRawContent(CompletionRequest):
73
56
  content: RawContent
74
57
 
@@ -103,28 +86,6 @@ def interleaved_content_as_str(
103
86
  return _process(content)
104
87
 
105
88
 
106
- async def convert_request_to_raw(
107
- request: ChatCompletionRequest | CompletionRequest,
108
- ) -> ChatCompletionRequestWithRawContent | CompletionRequestWithRawContent:
109
- if isinstance(request, ChatCompletionRequest):
110
- messages = []
111
- for m in request.messages:
112
- content = await interleaved_content_convert_to_raw(m.content)
113
- d = m.model_dump()
114
- d["content"] = content
115
- messages.append(RawMessage(**d))
116
-
117
- d = request.model_dump()
118
- d["messages"] = messages
119
- request = ChatCompletionRequestWithRawContent(**d)
120
- else:
121
- d = request.model_dump()
122
- d["content"] = await interleaved_content_convert_to_raw(request.content)
123
- request = CompletionRequestWithRawContent(**d)
124
-
125
- return request
126
-
127
-
128
89
  async def interleaved_content_convert_to_raw(
129
90
  content: InterleavedContent,
130
91
  ) -> RawContent:
@@ -171,6 +132,36 @@ async def interleaved_content_convert_to_raw(
171
132
  return await _localize_single(content)
172
133
 
173
134
 
135
+ async def convert_openai_message_to_raw_message(message: OpenAIMessageParam) -> RawMessage:
136
+ """Convert OpenAI message format to RawMessage format used by Llama formatters."""
137
+ if isinstance(message, OpenAIUserMessageParam):
138
+ content = await interleaved_content_convert_to_raw(message.content) # type: ignore[arg-type]
139
+ return RawMessage(role="user", content=content)
140
+ elif isinstance(message, OpenAISystemMessageParam):
141
+ content = await interleaved_content_convert_to_raw(message.content) # type: ignore[arg-type]
142
+ return RawMessage(role="system", content=content)
143
+ elif isinstance(message, OpenAIAssistantMessageParam):
144
+ content = await interleaved_content_convert_to_raw(message.content or "") # type: ignore[arg-type]
145
+ tool_calls = []
146
+ if message.tool_calls:
147
+ for tc in message.tool_calls:
148
+ if tc.function:
149
+ tool_calls.append(
150
+ ToolCall(
151
+ call_id=tc.id or "",
152
+ tool_name=tc.function.name or "",
153
+ arguments=tc.function.arguments or "{}",
154
+ )
155
+ )
156
+ return RawMessage(role="assistant", content=content, tool_calls=tool_calls)
157
+ elif isinstance(message, OpenAIToolMessageParam):
158
+ content = await interleaved_content_convert_to_raw(message.content) # type: ignore[arg-type]
159
+ return RawMessage(role="tool", content=content)
160
+ else:
161
+ # Handle OpenAIDeveloperMessageParam if needed
162
+ raise ValueError(f"Unsupported message type: {type(message)}")
163
+
164
+
174
165
  def content_has_media(content: InterleavedContent):
175
166
  def _has_media_content(c):
176
167
  return isinstance(c, ImageContentItem)
@@ -181,17 +172,6 @@ def content_has_media(content: InterleavedContent):
181
172
  return _has_media_content(content)
182
173
 
183
174
 
184
- def messages_have_media(messages: list[Message]):
185
- return any(content_has_media(m.content) for m in messages)
186
-
187
-
188
- def request_has_media(request: ChatCompletionRequest | CompletionRequest):
189
- if isinstance(request, ChatCompletionRequest):
190
- return messages_have_media(request.messages)
191
- else:
192
- return content_has_media(request.content)
193
-
194
-
195
175
  async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
196
176
  if uri.startswith("http"):
197
177
  async with httpx.AsyncClient() as client:
@@ -253,79 +233,6 @@ def augment_content_with_response_format_prompt(response_format, content):
253
233
  return content
254
234
 
255
235
 
256
- async def chat_completion_request_to_prompt(request: ChatCompletionRequest, llama_model: str) -> str:
257
- messages = chat_completion_request_to_messages(request, llama_model)
258
- request.messages = messages
259
- request = await convert_request_to_raw(request)
260
-
261
- formatter = ChatFormat(tokenizer=Tokenizer.get_instance())
262
- model_input = formatter.encode_dialog_prompt(
263
- request.messages,
264
- tool_prompt_format=request.tool_config.tool_prompt_format or get_default_tool_prompt_format(llama_model),
265
- )
266
- return formatter.tokenizer.decode(model_input.tokens)
267
-
268
-
269
- async def chat_completion_request_to_model_input_info(
270
- request: ChatCompletionRequest, llama_model: str
271
- ) -> tuple[str, int]:
272
- messages = chat_completion_request_to_messages(request, llama_model)
273
- request.messages = messages
274
- request = await convert_request_to_raw(request)
275
-
276
- formatter = ChatFormat(tokenizer=Tokenizer.get_instance())
277
- model_input = formatter.encode_dialog_prompt(
278
- request.messages,
279
- tool_prompt_format=request.tool_config.tool_prompt_format or get_default_tool_prompt_format(llama_model),
280
- )
281
- return (
282
- formatter.tokenizer.decode(model_input.tokens),
283
- len(model_input.tokens),
284
- )
285
-
286
-
287
- def chat_completion_request_to_messages(
288
- request: ChatCompletionRequest,
289
- llama_model: str,
290
- ) -> list[Message]:
291
- """Reads chat completion request and augments the messages to handle tools.
292
- For eg. for llama_3_1, add system message with the appropriate tools or
293
- add user messsage for custom tools, etc.
294
- """
295
- assert llama_model is not None, "llama_model is required"
296
- model = resolve_model(llama_model)
297
- if model is None:
298
- log.error(f"Could not resolve model {llama_model}")
299
- return request.messages
300
-
301
- allowed_models = supported_inference_models()
302
- descriptors = [m.descriptor() for m in allowed_models]
303
- if model.descriptor() not in descriptors:
304
- log.error(f"Unsupported inference model? {model.descriptor()}")
305
- return request.messages
306
-
307
- if model.model_family == ModelFamily.llama3_1 or (
308
- model.model_family == ModelFamily.llama3_2 and is_multimodal(model.core_model_id)
309
- ):
310
- # llama3.1 and llama3.2 multimodal models follow the same tool prompt format
311
- messages = augment_messages_for_tools_llama_3_1(request)
312
- elif model.model_family in (
313
- ModelFamily.llama3_2,
314
- ModelFamily.llama3_3,
315
- ):
316
- # llama3.2, llama3.3 follow the same tool prompt format
317
- messages = augment_messages_for_tools_llama(request, PythonListCustomToolGenerator)
318
- elif model.model_family == ModelFamily.llama4:
319
- messages = augment_messages_for_tools_llama(request, PythonListCustomToolGeneratorLlama4)
320
- else:
321
- messages = request.messages
322
-
323
- if fmt_prompt := response_format_prompt(request.response_format):
324
- messages.append(UserMessage(content=fmt_prompt))
325
-
326
- return messages
327
-
328
-
329
236
  def response_format_prompt(fmt: ResponseFormat | None):
330
237
  if not fmt:
331
238
  return None
@@ -338,128 +245,6 @@ def response_format_prompt(fmt: ResponseFormat | None):
338
245
  raise ValueError(f"Unknown response format {fmt.type}")
339
246
 
340
247
 
341
- def augment_messages_for_tools_llama_3_1(
342
- request: ChatCompletionRequest,
343
- ) -> list[Message]:
344
- existing_messages = request.messages
345
- existing_system_message = None
346
- if existing_messages[0].role == Role.system.value:
347
- existing_system_message = existing_messages.pop(0)
348
-
349
- assert existing_messages[0].role != Role.system.value, "Should only have 1 system message"
350
-
351
- messages = []
352
-
353
- default_gen = SystemDefaultGenerator()
354
- default_template = default_gen.gen()
355
-
356
- sys_content = ""
357
-
358
- tool_template = None
359
- if request.tools:
360
- tool_gen = BuiltinToolGenerator()
361
- tool_template = tool_gen.gen(request.tools)
362
-
363
- sys_content += tool_template.render()
364
- sys_content += "\n"
365
-
366
- sys_content += default_template.render()
367
-
368
- if existing_system_message:
369
- # TODO: this fn is needed in many places
370
- def _process(c):
371
- if isinstance(c, str):
372
- return c
373
- else:
374
- return "<media>"
375
-
376
- sys_content += "\n"
377
-
378
- if isinstance(existing_system_message.content, str):
379
- sys_content += _process(existing_system_message.content)
380
- elif isinstance(existing_system_message.content, list):
381
- sys_content += "\n".join([_process(c) for c in existing_system_message.content])
382
-
383
- tool_choice_prompt = _get_tool_choice_prompt(request.tool_config.tool_choice, request.tools)
384
- if tool_choice_prompt:
385
- sys_content += "\n" + tool_choice_prompt
386
-
387
- messages.append(SystemMessage(content=sys_content))
388
-
389
- has_custom_tools = request.tools is not None and any(isinstance(dfn.tool_name, str) for dfn in request.tools)
390
- if has_custom_tools:
391
- fmt = request.tool_config.tool_prompt_format or ToolPromptFormat.json
392
- if fmt == ToolPromptFormat.json:
393
- tool_gen = JsonCustomToolGenerator()
394
- elif fmt == ToolPromptFormat.function_tag:
395
- tool_gen = FunctionTagCustomToolGenerator()
396
- else:
397
- raise ValueError(f"Non supported ToolPromptFormat {fmt}")
398
-
399
- custom_tools = [t for t in request.tools if isinstance(t.tool_name, str)]
400
- custom_template = tool_gen.gen(custom_tools)
401
- messages.append(UserMessage(content=custom_template.render()))
402
-
403
- # Add back existing messages from the request
404
- messages += existing_messages
405
-
406
- return messages
407
-
408
-
409
- def augment_messages_for_tools_llama(
410
- request: ChatCompletionRequest,
411
- custom_tool_prompt_generator,
412
- ) -> list[Message]:
413
- existing_messages = request.messages
414
- existing_system_message = None
415
- if existing_messages[0].role == Role.system.value:
416
- existing_system_message = existing_messages.pop(0)
417
-
418
- assert existing_messages[0].role != Role.system.value, "Should only have 1 system message"
419
-
420
- sys_content = ""
421
- custom_tools, builtin_tools = [], []
422
- for t in request.tools:
423
- if isinstance(t.tool_name, str):
424
- custom_tools.append(t)
425
- else:
426
- builtin_tools.append(t)
427
-
428
- if builtin_tools:
429
- tool_gen = BuiltinToolGenerator()
430
- tool_template = tool_gen.gen(builtin_tools)
431
-
432
- sys_content += tool_template.render()
433
- sys_content += "\n"
434
-
435
- custom_tools = [dfn for dfn in request.tools if isinstance(dfn.tool_name, str)]
436
- if custom_tools:
437
- fmt = request.tool_config.tool_prompt_format or ToolPromptFormat.python_list
438
- if fmt != ToolPromptFormat.python_list:
439
- raise ValueError(f"Non supported ToolPromptFormat {request.tool_config.tool_prompt_format}")
440
-
441
- system_prompt = None
442
- if existing_system_message and request.tool_config.system_message_behavior == SystemMessageBehavior.replace:
443
- system_prompt = existing_system_message.content
444
-
445
- tool_template = custom_tool_prompt_generator().gen(custom_tools, system_prompt)
446
-
447
- sys_content += tool_template.render()
448
- sys_content += "\n"
449
-
450
- if existing_system_message and (
451
- request.tool_config.system_message_behavior == SystemMessageBehavior.append or not custom_tools
452
- ):
453
- sys_content += interleaved_content_as_str(existing_system_message.content, sep="\n")
454
-
455
- tool_choice_prompt = _get_tool_choice_prompt(request.tool_config.tool_choice, request.tools)
456
- if tool_choice_prompt:
457
- sys_content += "\n" + tool_choice_prompt
458
-
459
- messages = [SystemMessage(content=sys_content.strip("\n")), *existing_messages]
460
- return messages
461
-
462
-
463
248
  def _get_tool_choice_prompt(tool_choice: ToolChoice | str, tools: list[ToolDefinition]) -> str:
464
249
  if tool_choice == ToolChoice.auto:
465
250
  return ""
@@ -0,0 +1,23 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from collections.abc import AsyncIterator
8
+
9
+ from llama_stack.log import get_logger
10
+
11
+ log = get_logger(name=__name__, category="providers::utils")
12
+
13
+
14
+ async def wrap_async_stream[T](stream: AsyncIterator[T]) -> AsyncIterator[T]:
15
+ """
16
+ Wrap an async stream to ensure it returns a proper AsyncIterator.
17
+ """
18
+ try:
19
+ async for item in stream:
20
+ yield item
21
+ except Exception as e:
22
+ log.error(f"Error in wrapped async stream: {e}")
23
+ raise
@@ -3,3 +3,5 @@
3
3
  #
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
+
7
+ __all__ = []
@@ -8,7 +8,7 @@ import base64
8
8
  import mimetypes
9
9
  import os
10
10
 
11
- from llama_stack.apis.common.content_types import URL
11
+ from llama_stack_api import URL
12
12
 
13
13
 
14
14
  def data_url_from_file(file_path: str) -> URL: