llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -0,0 +1,124 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Annotated
8
+
9
+ from fastapi import APIRouter, Depends, UploadFile
10
+ from fastapi.param_functions import File, Form
11
+ from fastapi.responses import Response
12
+
13
+ from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
14
+ from llama_stack_api.version import LLAMA_STACK_API_V1
15
+
16
+ from .api import Files
17
+ from .models import (
18
+ DeleteFileRequest,
19
+ ExpiresAfter,
20
+ ListFilesRequest,
21
+ ListOpenAIFileResponse,
22
+ OpenAIFileDeleteResponse,
23
+ OpenAIFileObject,
24
+ OpenAIFilePurpose,
25
+ RetrieveFileContentRequest,
26
+ RetrieveFileRequest,
27
+ UploadFileRequest,
28
+ )
29
+
30
+ # Automatically generate dependency functions from Pydantic models
31
+ # This ensures the models are the single source of truth for descriptions
32
+ get_list_files_request = create_query_dependency(ListFilesRequest)
33
+ get_get_files_request = create_path_dependency(RetrieveFileRequest)
34
+ get_delete_files_request = create_path_dependency(DeleteFileRequest)
35
+ get_retrieve_file_content_request = create_path_dependency(RetrieveFileContentRequest)
36
+
37
+
38
+ def create_router(impl: Files) -> APIRouter:
39
+ router = APIRouter(
40
+ prefix=f"/{LLAMA_STACK_API_V1}",
41
+ tags=["Files"],
42
+ responses=standard_responses,
43
+ )
44
+
45
+ @router.get(
46
+ "/files",
47
+ response_model=ListOpenAIFileResponse,
48
+ summary="List files",
49
+ description="List files",
50
+ responses={
51
+ 200: {"description": "The list of files."},
52
+ },
53
+ )
54
+ async def list_files(
55
+ request: Annotated[ListFilesRequest, Depends(get_list_files_request)],
56
+ ) -> ListOpenAIFileResponse:
57
+ return await impl.openai_list_files(request)
58
+
59
+ @router.get(
60
+ "/files/{file_id}",
61
+ response_model=OpenAIFileObject,
62
+ summary="Get file",
63
+ description="Get file",
64
+ responses={
65
+ 200: {"description": "The file."},
66
+ },
67
+ )
68
+ async def get_file(
69
+ request: Annotated[RetrieveFileRequest, Depends(get_get_files_request)],
70
+ ) -> OpenAIFileObject:
71
+ return await impl.openai_retrieve_file(request)
72
+
73
+ @router.delete(
74
+ "/files/{file_id}",
75
+ response_model=OpenAIFileDeleteResponse,
76
+ summary="Delete file",
77
+ description="Delete file",
78
+ responses={
79
+ 200: {"description": "The file was deleted."},
80
+ },
81
+ )
82
+ async def delete_file(
83
+ request: Annotated[DeleteFileRequest, Depends(get_delete_files_request)],
84
+ ) -> OpenAIFileDeleteResponse:
85
+ return await impl.openai_delete_file(request)
86
+
87
+ @router.get(
88
+ "/files/{file_id}/content",
89
+ status_code=200,
90
+ summary="Retrieve file content",
91
+ description="Retrieve file content",
92
+ responses={
93
+ 200: {
94
+ "description": "The raw file content as a binary response.",
95
+ "content": {"application/json": {"schema": {"$ref": "#/components/schemas/Response"}}},
96
+ },
97
+ },
98
+ )
99
+ async def retrieve_file_content(
100
+ request: Annotated[RetrieveFileContentRequest, Depends(get_retrieve_file_content_request)],
101
+ ) -> Response:
102
+ return await impl.openai_retrieve_file_content(request)
103
+
104
+ @router.post(
105
+ "/files",
106
+ response_model=OpenAIFileObject,
107
+ summary="Upload file",
108
+ description="Upload a file.",
109
+ responses={
110
+ 200: {"description": "The uploaded file."},
111
+ },
112
+ )
113
+ async def upload_file(
114
+ file: Annotated[UploadFile, File(description="The file to upload.")],
115
+ purpose: Annotated[OpenAIFilePurpose, Form(description="The intended purpose of the uploaded file.")],
116
+ expires_after: Annotated[ExpiresAfter | None, Form(description="Optional expiration settings.")] = None,
117
+ ) -> OpenAIFileObject:
118
+ request = UploadFileRequest(
119
+ purpose=purpose,
120
+ expires_after=expires_after,
121
+ )
122
+ return await impl.openai_upload_file(request, file)
123
+
124
+ return router
@@ -0,0 +1,107 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from enum import StrEnum
8
+ from typing import ClassVar, Literal
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+ from llama_stack_api.common.responses import Order
13
+ from llama_stack_api.schema_utils import json_schema_type
14
+
15
+
16
+ class OpenAIFilePurpose(StrEnum):
17
+ """
18
+ Valid purpose values for OpenAI Files API.
19
+ """
20
+
21
+ ASSISTANTS = "assistants"
22
+ BATCH = "batch"
23
+
24
+
25
+ @json_schema_type
26
+ class OpenAIFileObject(BaseModel):
27
+ """OpenAI File object as defined in the OpenAI Files API."""
28
+
29
+ object: Literal["file"] = Field(default="file", description="The object type, which is always 'file'.")
30
+ id: str = Field(..., description="The file identifier, which can be referenced in the API endpoints.")
31
+ bytes: int = Field(..., description="The size of the file, in bytes.")
32
+ created_at: int = Field(..., description="The Unix timestamp (in seconds) for when the file was created.")
33
+ expires_at: int = Field(..., description="The Unix timestamp (in seconds) for when the file expires.")
34
+ filename: str = Field(..., description="The name of the file.")
35
+ purpose: OpenAIFilePurpose = Field(..., description="The intended purpose of the file.")
36
+
37
+
38
+ @json_schema_type
39
+ class ExpiresAfter(BaseModel):
40
+ """Control expiration of uploaded files."""
41
+
42
+ MIN: ClassVar[int] = 3600 # 1 hour
43
+ MAX: ClassVar[int] = 2592000 # 30 days
44
+
45
+ anchor: Literal["created_at"] = Field(..., description="The anchor point for expiration, must be 'created_at'.")
46
+ seconds: int = Field(
47
+ ..., ge=MIN, le=MAX, description="Seconds until expiration, between 3600 (1 hour) and 2592000 (30 days)."
48
+ )
49
+
50
+
51
+ @json_schema_type
52
+ class ListOpenAIFileResponse(BaseModel):
53
+ """Response for listing files in OpenAI Files API."""
54
+
55
+ data: list[OpenAIFileObject] = Field(..., description="The list of files.")
56
+ has_more: bool = Field(..., description="Whether there are more files available beyond this page.")
57
+ first_id: str = Field(..., description="The ID of the first file in the list for pagination.")
58
+ last_id: str = Field(..., description="The ID of the last file in the list for pagination.")
59
+ object: Literal["list"] = Field(default="list", description="The object type, which is always 'list'.")
60
+
61
+
62
+ @json_schema_type
63
+ class OpenAIFileDeleteResponse(BaseModel):
64
+ """Response for deleting a file in OpenAI Files API."""
65
+
66
+ id: str = Field(..., description="The file identifier that was deleted.")
67
+ object: Literal["file"] = Field(default="file", description="The object type, which is always 'file'.")
68
+ deleted: bool = Field(..., description="Whether the file was successfully deleted.")
69
+
70
+
71
+ @json_schema_type
72
+ class ListFilesRequest(BaseModel):
73
+ """Request model for listing files."""
74
+
75
+ after: str | None = Field(default=None, description="A cursor for pagination. Returns files after this ID.")
76
+ limit: int | None = Field(default=10000, description="Maximum number of files to return (1-10,000).")
77
+ order: Order | None = Field(default=Order.desc, description="Sort order by created_at timestamp ('asc' or 'desc').")
78
+ purpose: OpenAIFilePurpose | None = Field(default=None, description="Filter files by purpose.")
79
+
80
+
81
+ @json_schema_type
82
+ class RetrieveFileRequest(BaseModel):
83
+ """Request model for retrieving a file."""
84
+
85
+ file_id: str = Field(..., description="The ID of the file to retrieve.")
86
+
87
+
88
+ @json_schema_type
89
+ class DeleteFileRequest(BaseModel):
90
+ """Request model for deleting a file."""
91
+
92
+ file_id: str = Field(..., description="The ID of the file to delete.")
93
+
94
+
95
+ @json_schema_type
96
+ class RetrieveFileContentRequest(BaseModel):
97
+ """Request model for retrieving file content."""
98
+
99
+ file_id: str = Field(..., description="The ID of the file to retrieve content from.")
100
+
101
+
102
+ @json_schema_type
103
+ class UploadFileRequest(BaseModel):
104
+ """Request model for uploading a file."""
105
+
106
+ purpose: OpenAIFilePurpose = Field(..., description="The intended purpose of the uploaded file.")
107
+ expires_after: ExpiresAfter | None = Field(default=None, description="Optional expiration settings for the file.")
@@ -5,7 +5,7 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
  from collections.abc import AsyncIterator
8
- from enum import Enum
8
+ from enum import Enum, StrEnum
9
9
  from typing import (
10
10
  Annotated,
11
11
  Any,
@@ -15,28 +15,16 @@ from typing import (
15
15
  )
16
16
 
17
17
  from fastapi import Body
18
- from pydantic import BaseModel, Field, field_validator
18
+ from pydantic import BaseModel, Field
19
19
  from typing_extensions import TypedDict
20
20
 
21
- from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
22
- from llama_stack.apis.common.responses import Order
23
- from llama_stack.apis.models import Model
24
- from llama_stack.apis.telemetry import MetricResponseMixin
25
- from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
26
- from llama_stack.models.llama.datatypes import (
27
- BuiltinTool,
28
- StopReason,
29
- ToolCall,
30
- ToolDefinition,
31
- ToolPromptFormat,
21
+ from llama_stack_api.common.content_types import InterleavedContent
22
+ from llama_stack_api.common.responses import (
23
+ Order,
32
24
  )
33
- from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
34
- from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
35
-
36
- register_schema(ToolCall)
37
- register_schema(ToolDefinition)
38
-
39
- from enum import StrEnum
25
+ from llama_stack_api.models import Model
26
+ from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
27
+ from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
40
28
 
41
29
 
42
30
  @json_schema_type
@@ -97,7 +85,7 @@ class SamplingParams(BaseModel):
97
85
 
98
86
  strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
99
87
 
100
- max_tokens: int | None = 0
88
+ max_tokens: int | None = None
101
89
  repetition_penalty: float | None = 1.0
102
90
  stop: list[str] | None = None
103
91
 
@@ -202,58 +190,6 @@ class ToolResponseMessage(BaseModel):
202
190
  content: InterleavedContent
203
191
 
204
192
 
205
- @json_schema_type
206
- class CompletionMessage(BaseModel):
207
- """A message containing the model's (assistant) response in a chat conversation.
208
-
209
- :param role: Must be "assistant" to identify this as the model's response
210
- :param content: The content of the model's response
211
- :param stop_reason: Reason why the model stopped generating. Options are:
212
- - `StopReason.end_of_turn`: The model finished generating the entire response.
213
- - `StopReason.end_of_message`: The model finished generating but generated a partial response -- usually, a tool call. The user may call the tool and continue the conversation with the tool's response.
214
- - `StopReason.out_of_tokens`: The model ran out of token budget.
215
- :param tool_calls: List of tool calls. Each tool call is a ToolCall object.
216
- """
217
-
218
- role: Literal["assistant"] = "assistant"
219
- content: InterleavedContent
220
- stop_reason: StopReason
221
- tool_calls: list[ToolCall] | None = Field(default_factory=lambda: [])
222
-
223
-
224
- Message = Annotated[
225
- UserMessage | SystemMessage | ToolResponseMessage | CompletionMessage,
226
- Field(discriminator="role"),
227
- ]
228
- register_schema(Message, name="Message")
229
-
230
-
231
- @json_schema_type
232
- class ToolResponse(BaseModel):
233
- """Response from a tool invocation.
234
-
235
- :param call_id: Unique identifier for the tool call this response is for
236
- :param tool_name: Name of the tool that was invoked
237
- :param content: The response content from the tool
238
- :param metadata: (Optional) Additional metadata about the tool response
239
- """
240
-
241
- call_id: str
242
- tool_name: BuiltinTool | str
243
- content: InterleavedContent
244
- metadata: dict[str, Any] | None = None
245
-
246
- @field_validator("tool_name", mode="before")
247
- @classmethod
248
- def validate_field(cls, v):
249
- if isinstance(v, str):
250
- try:
251
- return BuiltinTool(v)
252
- except ValueError:
253
- return v
254
- return v
255
-
256
-
257
193
  class ToolChoice(Enum):
258
194
  """Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.
259
195
 
@@ -290,22 +226,6 @@ class ChatCompletionResponseEventType(Enum):
290
226
  progress = "progress"
291
227
 
292
228
 
293
- @json_schema_type
294
- class ChatCompletionResponseEvent(BaseModel):
295
- """An event during chat completion generation.
296
-
297
- :param event_type: Type of the event
298
- :param delta: Content generated since last event. This can be one or more tokens, or a tool call.
299
- :param logprobs: Optional log probabilities for generated tokens
300
- :param stop_reason: Optional reason why generation stopped, if complete
301
- """
302
-
303
- event_type: ChatCompletionResponseEventType
304
- delta: ContentDelta
305
- logprobs: list[TokenLogProbs] | None = None
306
- stop_reason: StopReason | None = None
307
-
308
-
309
229
  class ResponseFormatType(StrEnum):
310
230
  """Types of formats for structured (guided) decoding.
311
231
 
@@ -358,34 +278,6 @@ class CompletionRequest(BaseModel):
358
278
  logprobs: LogProbConfig | None = None
359
279
 
360
280
 
361
- @json_schema_type
362
- class CompletionResponse(MetricResponseMixin):
363
- """Response from a completion request.
364
-
365
- :param content: The generated completion text
366
- :param stop_reason: Reason why generation stopped
367
- :param logprobs: Optional log probabilities for generated tokens
368
- """
369
-
370
- content: str
371
- stop_reason: StopReason
372
- logprobs: list[TokenLogProbs] | None = None
373
-
374
-
375
- @json_schema_type
376
- class CompletionResponseStreamChunk(MetricResponseMixin):
377
- """A chunk of a streamed completion response.
378
-
379
- :param delta: New content generated since last chunk. This can be one or more tokens.
380
- :param stop_reason: Optional reason why generation stopped, if complete
381
- :param logprobs: Optional log probabilities for generated tokens
382
- """
383
-
384
- delta: str
385
- stop_reason: StopReason | None = None
386
- logprobs: list[TokenLogProbs] | None = None
387
-
388
-
389
281
  class SystemMessageBehavior(Enum):
390
282
  """Config for how to override the default system prompt.
391
283
 
@@ -399,70 +291,6 @@ class SystemMessageBehavior(Enum):
399
291
  replace = "replace"
400
292
 
401
293
 
402
- @json_schema_type
403
- class ToolConfig(BaseModel):
404
- """Configuration for tool use.
405
-
406
- :param tool_choice: (Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto.
407
- :param tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model.
408
- - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
409
- - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag.
410
- - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls.
411
- :param system_message_behavior: (Optional) Config for how to override the default system prompt.
412
- - `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt.
413
- - `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string
414
- '{{function_definitions}}' to indicate where the function definitions should be inserted.
415
- """
416
-
417
- tool_choice: ToolChoice | str | None = Field(default=ToolChoice.auto)
418
- tool_prompt_format: ToolPromptFormat | None = Field(default=None)
419
- system_message_behavior: SystemMessageBehavior | None = Field(default=SystemMessageBehavior.append)
420
-
421
- def model_post_init(self, __context: Any) -> None:
422
- if isinstance(self.tool_choice, str):
423
- try:
424
- self.tool_choice = ToolChoice[self.tool_choice]
425
- except KeyError:
426
- pass
427
-
428
-
429
- # This is an internally used class
430
- @json_schema_type
431
- class ChatCompletionRequest(BaseModel):
432
- model: str
433
- messages: list[Message]
434
- sampling_params: SamplingParams | None = Field(default_factory=SamplingParams)
435
-
436
- tools: list[ToolDefinition] | None = Field(default_factory=lambda: [])
437
- tool_config: ToolConfig | None = Field(default_factory=ToolConfig)
438
-
439
- response_format: ResponseFormat | None = None
440
- stream: bool | None = False
441
- logprobs: LogProbConfig | None = None
442
-
443
-
444
- @json_schema_type
445
- class ChatCompletionResponseStreamChunk(MetricResponseMixin):
446
- """A chunk of a streamed chat completion response.
447
-
448
- :param event: The event containing the new content
449
- """
450
-
451
- event: ChatCompletionResponseEvent
452
-
453
-
454
- @json_schema_type
455
- class ChatCompletionResponse(MetricResponseMixin):
456
- """Response from a chat completion request.
457
-
458
- :param completion_message: The complete response message
459
- :param logprobs: Optional log probabilities for generated tokens
460
- """
461
-
462
- completion_message: CompletionMessage
463
- logprobs: list[TokenLogProbs] | None = None
464
-
465
-
466
294
  @json_schema_type
467
295
  class EmbeddingsResponse(BaseModel):
468
296
  """Response containing generated embeddings.
@@ -727,6 +555,81 @@ OpenAIResponseFormatParam = Annotated[
727
555
  register_schema(OpenAIResponseFormatParam, name="OpenAIResponseFormatParam")
728
556
 
729
557
 
558
+ @json_schema_type
559
+ class FunctionToolConfig(BaseModel):
560
+ name: str
561
+
562
+
563
+ @json_schema_type
564
+ class OpenAIChatCompletionToolChoiceFunctionTool(BaseModel):
565
+ """Function tool choice for OpenAI-compatible chat completion requests.
566
+
567
+ :param type: Must be "function" to indicate function tool choice
568
+ :param function: The function tool configuration
569
+ """
570
+
571
+ type: Literal["function"] = "function"
572
+ function: FunctionToolConfig
573
+
574
+ def __init__(self, name: str):
575
+ super().__init__(type="function", function=FunctionToolConfig(name=name))
576
+
577
+
578
+ @json_schema_type
579
+ class CustomToolConfig(BaseModel):
580
+ """Custom tool configuration for OpenAI-compatible chat completion requests.
581
+
582
+ :param name: Name of the custom tool
583
+ """
584
+
585
+ name: str
586
+
587
+
588
+ @json_schema_type
589
+ class OpenAIChatCompletionToolChoiceCustomTool(BaseModel):
590
+ """Custom tool choice for OpenAI-compatible chat completion requests.
591
+
592
+ :param type: Must be "custom" to indicate custom tool choice
593
+ """
594
+
595
+ type: Literal["custom"] = "custom"
596
+ custom: CustomToolConfig
597
+
598
+ def __init__(self, name: str):
599
+ super().__init__(type="custom", custom=CustomToolConfig(name=name))
600
+
601
+
602
+ @json_schema_type
603
+ class AllowedToolsConfig(BaseModel):
604
+ tools: list[dict[str, Any]]
605
+ mode: Literal["auto", "required"]
606
+
607
+
608
+ @json_schema_type
609
+ class OpenAIChatCompletionToolChoiceAllowedTools(BaseModel):
610
+ """Allowed tools response format for OpenAI-compatible chat completion requests.
611
+
612
+ :param type: Must be "allowed_tools" to indicate allowed tools response format
613
+ """
614
+
615
+ type: Literal["allowed_tools"] = "allowed_tools"
616
+ allowed_tools: AllowedToolsConfig
617
+
618
+ def __init__(self, tools: list[dict[str, Any]], mode: Literal["auto", "required"]):
619
+ super().__init__(type="allowed_tools", allowed_tools=AllowedToolsConfig(tools=tools, mode=mode))
620
+
621
+
622
+ # Define the object-level union with discriminator
623
+ OpenAIChatCompletionToolChoice = Annotated[
624
+ OpenAIChatCompletionToolChoiceAllowedTools
625
+ | OpenAIChatCompletionToolChoiceFunctionTool
626
+ | OpenAIChatCompletionToolChoiceCustomTool,
627
+ Field(discriminator="type"),
628
+ ]
629
+
630
+ register_schema(OpenAIChatCompletionToolChoice, name="OpenAIChatCompletionToolChoice")
631
+
632
+
730
633
  @json_schema_type
731
634
  class OpenAITopLogProb(BaseModel):
732
635
  """The top log probability for a token from an OpenAI-compatible chat completion response.
@@ -754,7 +657,7 @@ class OpenAITokenLogProb(BaseModel):
754
657
  token: str
755
658
  bytes: list[int] | None = None
756
659
  logprob: float
757
- top_logprobs: list[OpenAITopLogProb]
660
+ top_logprobs: list[OpenAITopLogProb] | None = None
758
661
 
759
662
 
760
663
  @json_schema_type
@@ -1160,7 +1063,6 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
1160
1063
 
1161
1064
 
1162
1065
  @runtime_checkable
1163
- @trace_protocol
1164
1066
  class InferenceProvider(Protocol):
1165
1067
  """
1166
1068
  This protocol defines the interface that should be implemented by all inference providers.
@@ -1189,20 +1091,18 @@ class InferenceProvider(Protocol):
1189
1091
  raise NotImplementedError("Reranking is not implemented")
1190
1092
  return # this is so mypy's safe-super rule will consider the method concrete
1191
1093
 
1192
- @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
1193
1094
  @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
1194
1095
  async def openai_completion(
1195
1096
  self,
1196
1097
  params: Annotated[OpenAICompletionRequestWithExtraBody, Body(...)],
1197
- ) -> OpenAICompletion:
1098
+ ) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
1198
1099
  """Create completion.
1199
1100
 
1200
1101
  Generate an OpenAI-compatible completion for the given prompt using the specified model.
1201
- :returns: An OpenAICompletion.
1102
+ :returns: An OpenAICompletion. When streaming, returns Server-Sent Events (SSE) with OpenAICompletion chunks.
1202
1103
  """
1203
1104
  ...
1204
1105
 
1205
- @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
1206
1106
  @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
1207
1107
  async def openai_chat_completion(
1208
1108
  self,
@@ -1211,11 +1111,10 @@ class InferenceProvider(Protocol):
1211
1111
  """Create chat completions.
1212
1112
 
1213
1113
  Generate an OpenAI-compatible chat completion for the given messages using the specified model.
1214
- :returns: An OpenAIChatCompletion.
1114
+ :returns: An OpenAIChatCompletion. When streaming, returns Server-Sent Events (SSE) with OpenAIChatCompletionChunk objects.
1215
1115
  """
1216
1116
  ...
1217
1117
 
1218
- @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
1219
1118
  @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
1220
1119
  async def openai_embeddings(
1221
1120
  self,
@@ -1234,12 +1133,12 @@ class Inference(InferenceProvider):
1234
1133
 
1235
1134
  Llama Stack Inference API for generating completions, chat completions, and embeddings.
1236
1135
 
1237
- This API provides the raw interface to the underlying models. Two kinds of models are supported:
1136
+ This API provides the raw interface to the underlying models. Three kinds of models are supported:
1238
1137
  - LLM models: these models generate "raw" and "chat" (conversational) completions.
1239
1138
  - Embedding models: these models generate embeddings to be used for semantic search.
1139
+ - Rerank models: these models reorder the documents based on their relevance to a query.
1240
1140
  """
1241
1141
 
1242
- @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
1243
1142
  @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
1244
1143
  async def list_chat_completions(
1245
1144
  self,
@@ -1258,9 +1157,6 @@ class Inference(InferenceProvider):
1258
1157
  """
1259
1158
  raise NotImplementedError("List chat completions is not implemented")
1260
1159
 
1261
- @webmethod(
1262
- route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
1263
- )
1264
1160
  @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
1265
1161
  async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
1266
1162
  """Get chat completion.
@@ -0,0 +1,37 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Inspect API protocol and models.
8
+
9
+ This module contains the Inspect protocol definition.
10
+ Pydantic models are defined in llama_stack_api.inspect.models.
11
+ The FastAPI router is defined in llama_stack_api.inspect.fastapi_routes.
12
+ """
13
+
14
+ # Import fastapi_routes for router factory access
15
+ from . import fastapi_routes
16
+
17
+ # Import protocol for re-export
18
+ from .api import Inspect
19
+
20
+ # Import models for re-export
21
+ from .models import (
22
+ ApiFilter,
23
+ HealthInfo,
24
+ ListRoutesResponse,
25
+ RouteInfo,
26
+ VersionInfo,
27
+ )
28
+
29
+ __all__ = [
30
+ "Inspect",
31
+ "ApiFilter",
32
+ "HealthInfo",
33
+ "ListRoutesResponse",
34
+ "RouteInfo",
35
+ "VersionInfo",
36
+ "fastapi_routes",
37
+ ]
@@ -0,0 +1,25 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Protocol, runtime_checkable
8
+
9
+ from .models import (
10
+ ApiFilter,
11
+ HealthInfo,
12
+ ListRoutesResponse,
13
+ VersionInfo,
14
+ )
15
+
16
+
17
+ @runtime_checkable
18
+ class Inspect(Protocol):
19
+ """APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers."""
20
+
21
+ async def list_routes(self, api_filter: ApiFilter | None = None) -> ListRoutesResponse: ...
22
+
23
+ async def health(self) -> HealthInfo: ...
24
+
25
+ async def version(self) -> VersionInfo: ...