llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -10,11 +10,11 @@ from typing import Annotated, Any, Literal, Protocol
10
10
 
11
11
  from pydantic import BaseModel, Field
12
12
 
13
- from llama_stack.apis.common.content_types import URL
14
- from llama_stack.apis.common.job_types import JobStatus
15
- from llama_stack.apis.common.training_types import Checkpoint
16
- from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
17
- from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
13
+ from llama_stack_api.common.content_types import URL
14
+ from llama_stack_api.common.job_types import JobStatus
15
+ from llama_stack_api.common.training_types import Checkpoint
16
+ from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
17
+ from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
18
18
 
19
19
 
20
20
  @json_schema_type
@@ -236,6 +236,7 @@ class PostTrainingRLHFRequest(BaseModel):
236
236
  logger_config: dict[str, Any]
237
237
 
238
238
 
239
+ @json_schema_type
239
240
  class PostTrainingJob(BaseModel):
240
241
  job_uuid: str
241
242
 
@@ -265,6 +266,7 @@ class PostTrainingJobStatusResponse(BaseModel):
265
266
  checkpoints: list[Checkpoint] = Field(default_factory=list)
266
267
 
267
268
 
269
+ @json_schema_type
268
270
  class ListPostTrainingJobsResponse(BaseModel):
269
271
  data: list[PostTrainingJob]
270
272
 
@@ -284,7 +286,6 @@ class PostTrainingJobArtifactsResponse(BaseModel):
284
286
 
285
287
 
286
288
  class PostTraining(Protocol):
287
- @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
288
289
  @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
289
290
  async def supervised_fine_tune(
290
291
  self,
@@ -312,7 +313,6 @@ class PostTraining(Protocol):
312
313
  """
313
314
  ...
314
315
 
315
- @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
316
316
  @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
317
317
  async def preference_optimize(
318
318
  self,
@@ -335,7 +335,6 @@ class PostTraining(Protocol):
335
335
  """
336
336
  ...
337
337
 
338
- @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
339
338
  @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
340
339
  async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
341
340
  """Get all training jobs.
@@ -344,7 +343,6 @@ class PostTraining(Protocol):
344
343
  """
345
344
  ...
346
345
 
347
- @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
348
346
  @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
349
347
  async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
350
348
  """Get the status of a training job.
@@ -354,7 +352,6 @@ class PostTraining(Protocol):
354
352
  """
355
353
  ...
356
354
 
357
- @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
358
355
  @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
359
356
  async def cancel_training_job(self, job_uuid: str) -> None:
360
357
  """Cancel a training job.
@@ -363,7 +360,6 @@ class PostTraining(Protocol):
363
360
  """
364
361
  ...
365
362
 
366
- @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
367
363
  @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
368
364
  async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
369
365
  """Get the artifacts of a training job.
@@ -10,9 +10,8 @@ from typing import Protocol, runtime_checkable
10
10
 
11
11
  from pydantic import BaseModel, Field, field_validator, model_validator
12
12
 
13
- from llama_stack.apis.version import LLAMA_STACK_API_V1
14
- from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
15
- from llama_stack.schema_utils import json_schema_type, webmethod
13
+ from llama_stack_api.schema_utils import json_schema_type, webmethod
14
+ from llama_stack_api.version import LLAMA_STACK_API_V1
16
15
 
17
16
 
18
17
  @json_schema_type
@@ -85,6 +84,7 @@ class Prompt(BaseModel):
85
84
  return f"pmpt_{hex_string}"
86
85
 
87
86
 
87
+ @json_schema_type
88
88
  class ListPromptsResponse(BaseModel):
89
89
  """Response model to list prompts."""
90
90
 
@@ -92,7 +92,6 @@ class ListPromptsResponse(BaseModel):
92
92
 
93
93
 
94
94
  @runtime_checkable
95
- @trace_protocol
96
95
  class Prompts(Protocol):
97
96
  """Prompts
98
97
 
@@ -0,0 +1,33 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Providers API protocol and models.
8
+
9
+ This module contains the Providers protocol definition.
10
+ Pydantic models are defined in llama_stack_api.providers.models.
11
+ The FastAPI router is defined in llama_stack_api.providers.fastapi_routes.
12
+ """
13
+
14
+ # Import fastapi_routes for router factory access
15
+ from . import fastapi_routes
16
+
17
+ # Import protocol for re-export
18
+ from .api import Providers
19
+
20
+ # Import models for re-export
21
+ from .models import (
22
+ InspectProviderRequest,
23
+ ListProvidersResponse,
24
+ ProviderInfo,
25
+ )
26
+
27
+ __all__ = [
28
+ "Providers",
29
+ "ProviderInfo",
30
+ "ListProvidersResponse",
31
+ "InspectProviderRequest",
32
+ "fastapi_routes",
33
+ ]
@@ -0,0 +1,16 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Protocol, runtime_checkable
8
+
9
+ from .models import InspectProviderRequest, ListProvidersResponse, ProviderInfo
10
+
11
+
12
+ @runtime_checkable
13
+ class Providers(Protocol):
14
+ async def list_providers(self) -> ListProvidersResponse: ...
15
+
16
+ async def inspect_provider(self, request: InspectProviderRequest) -> ProviderInfo: ...
@@ -0,0 +1,57 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """FastAPI router for the Providers API.
8
+
9
+ This module defines the FastAPI router for the Providers API using standard
10
+ FastAPI route decorators.
11
+ """
12
+
13
+ from typing import Annotated
14
+
15
+ from fastapi import APIRouter, Depends
16
+
17
+ from llama_stack_api.router_utils import create_path_dependency, standard_responses
18
+ from llama_stack_api.version import LLAMA_STACK_API_V1
19
+
20
+ from .api import Providers
21
+ from .models import InspectProviderRequest, ListProvidersResponse, ProviderInfo
22
+
23
+ # Path parameter dependencies for single-field models
24
+ get_inspect_provider_request = create_path_dependency(InspectProviderRequest)
25
+
26
+
27
+ def create_router(impl: Providers) -> APIRouter:
28
+ """Create a FastAPI router for the Providers API."""
29
+ router = APIRouter(
30
+ prefix=f"/{LLAMA_STACK_API_V1}",
31
+ tags=["Providers"],
32
+ responses=standard_responses,
33
+ )
34
+
35
+ @router.get(
36
+ "/providers",
37
+ response_model=ListProvidersResponse,
38
+ summary="List providers.",
39
+ description="List all available providers.",
40
+ responses={200: {"description": "A ListProvidersResponse containing information about all providers."}},
41
+ )
42
+ async def list_providers() -> ListProvidersResponse:
43
+ return await impl.list_providers()
44
+
45
+ @router.get(
46
+ "/providers/{provider_id}",
47
+ response_model=ProviderInfo,
48
+ summary="Get provider.",
49
+ description="Get detailed information about a specific provider.",
50
+ responses={200: {"description": "A ProviderInfo object containing the provider's details."}},
51
+ )
52
+ async def inspect_provider(
53
+ request: Annotated[InspectProviderRequest, Depends(get_inspect_provider_request)],
54
+ ) -> ProviderInfo:
55
+ return await impl.inspect_provider(request)
56
+
57
+ return router
@@ -0,0 +1,24 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Pydantic models for Providers API requests and responses.
8
+
9
+ This module re-exports models from llama_stack_api.admin.models to ensure
10
+ a single source of truth and avoid type conflicts.
11
+ """
12
+
13
+ # Import and re-export shared models from admin
14
+ from llama_stack_api.admin.models import (
15
+ InspectProviderRequest,
16
+ ListProvidersResponse,
17
+ ProviderInfo,
18
+ )
19
+
20
+ __all__ = [
21
+ "ProviderInfo",
22
+ "ListProvidersResponse",
23
+ "InspectProviderRequest",
24
+ ]
@@ -5,18 +5,13 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
  from enum import Enum, StrEnum
8
- from typing import Annotated, Any, Literal, Protocol
8
+ from typing import Annotated, Any, Literal
9
9
 
10
10
  from pydantic import BaseModel, Field, field_validator
11
- from typing_extensions import runtime_checkable
12
11
 
13
- from llama_stack.apis.common.content_types import URL, InterleavedContent
14
- from llama_stack.apis.version import LLAMA_STACK_API_V1
15
- from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
16
- from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
12
+ from llama_stack_api.common.content_types import URL, InterleavedContent
17
13
 
18
14
 
19
- @json_schema_type
20
15
  class RRFRanker(BaseModel):
21
16
  """
22
17
  Reciprocal Rank Fusion (RRF) ranker configuration.
@@ -30,7 +25,6 @@ class RRFRanker(BaseModel):
30
25
  impact_factor: float = Field(default=60.0, gt=0.0) # default of 60 for optimal performance
31
26
 
32
27
 
33
- @json_schema_type
34
28
  class WeightedRanker(BaseModel):
35
29
  """
36
30
  Weighted ranker configuration that combines vector and keyword scores.
@@ -55,10 +49,8 @@ Ranker = Annotated[
55
49
  RRFRanker | WeightedRanker,
56
50
  Field(discriminator="type"),
57
51
  ]
58
- register_schema(Ranker, name="Ranker")
59
52
 
60
53
 
61
- @json_schema_type
62
54
  class RAGDocument(BaseModel):
63
55
  """
64
56
  A document to be used for document ingestion in the RAG Tool.
@@ -75,7 +67,6 @@ class RAGDocument(BaseModel):
75
67
  metadata: dict[str, Any] = Field(default_factory=dict)
76
68
 
77
69
 
78
- @json_schema_type
79
70
  class RAGQueryResult(BaseModel):
80
71
  """Result of a RAG query containing retrieved content and metadata.
81
72
 
@@ -87,7 +78,6 @@ class RAGQueryResult(BaseModel):
87
78
  metadata: dict[str, Any] = Field(default_factory=dict)
88
79
 
89
80
 
90
- @json_schema_type
91
81
  class RAGQueryGenerator(Enum):
92
82
  """Types of query generators for RAG systems.
93
83
 
@@ -101,7 +91,6 @@ class RAGQueryGenerator(Enum):
101
91
  custom = "custom"
102
92
 
103
93
 
104
- @json_schema_type
105
94
  class RAGSearchMode(StrEnum):
106
95
  """
107
96
  Search modes for RAG query retrieval:
@@ -115,7 +104,6 @@ class RAGSearchMode(StrEnum):
115
104
  HYBRID = "hybrid"
116
105
 
117
106
 
118
- @json_schema_type
119
107
  class DefaultRAGQueryGeneratorConfig(BaseModel):
120
108
  """Configuration for the default RAG query generator.
121
109
 
@@ -127,7 +115,6 @@ class DefaultRAGQueryGeneratorConfig(BaseModel):
127
115
  separator: str = " "
128
116
 
129
117
 
130
- @json_schema_type
131
118
  class LLMRAGQueryGeneratorConfig(BaseModel):
132
119
  """Configuration for the LLM-based RAG query generator.
133
120
 
@@ -145,10 +132,8 @@ RAGQueryGeneratorConfig = Annotated[
145
132
  DefaultRAGQueryGeneratorConfig | LLMRAGQueryGeneratorConfig,
146
133
  Field(discriminator="type"),
147
134
  ]
148
- register_schema(RAGQueryGeneratorConfig, name="RAGQueryGeneratorConfig")
149
135
 
150
136
 
151
- @json_schema_type
152
137
  class RAGQueryConfig(BaseModel):
153
138
  """
154
139
  Configuration for the RAG query generation.
@@ -181,38 +166,3 @@ class RAGQueryConfig(BaseModel):
181
166
  if len(v) == 0:
182
167
  raise ValueError("chunk_template must not be empty")
183
168
  return v
184
-
185
-
186
- @runtime_checkable
187
- @trace_protocol
188
- class RAGToolRuntime(Protocol):
189
- @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
190
- async def insert(
191
- self,
192
- documents: list[RAGDocument],
193
- vector_db_id: str,
194
- chunk_size_in_tokens: int = 512,
195
- ) -> None:
196
- """Index documents so they can be used by the RAG system.
197
-
198
- :param documents: List of documents to index in the RAG system
199
- :param vector_db_id: ID of the vector database to store the document embeddings
200
- :param chunk_size_in_tokens: (Optional) Size in tokens for document chunking during indexing
201
- """
202
- ...
203
-
204
- @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
205
- async def query(
206
- self,
207
- content: InterleavedContent,
208
- vector_db_ids: list[str],
209
- query_config: RAGQueryConfig | None = None,
210
- ) -> RAGQueryResult:
211
- """Query the RAG system for context; typically invoked by the agent.
212
-
213
- :param content: The query content to search for in the indexed documents
214
- :param vector_db_ids: List of vector database IDs to search within
215
- :param query_config: (Optional) Configuration parameters for the query operation
216
- :returns: RAGQueryResult containing the retrieved content and metadata
217
- """
218
- ...
@@ -4,7 +4,6 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
-
8
7
  from enum import StrEnum
9
8
 
10
9
  from pydantic import BaseModel, Field
@@ -20,6 +19,7 @@ class ResourceType(StrEnum):
20
19
  tool = "tool"
21
20
  tool_group = "tool_group"
22
21
  prompt = "prompt"
22
+ connector = "connector"
23
23
 
24
24
 
25
25
  class Resource(BaseModel):
@@ -0,0 +1,160 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Utilities for creating FastAPI routers with standard error responses.
8
+
9
+ This module provides standard error response definitions for FastAPI routers.
10
+ These responses use OpenAPI $ref references to component responses defined
11
+ in the OpenAPI specification.
12
+ """
13
+
14
+ import inspect
15
+ from collections.abc import Callable
16
+ from typing import Annotated, Any, TypeVar
17
+
18
+ from fastapi import Path, Query
19
+ from pydantic import BaseModel
20
+
21
+ # OpenAPI extension key to mark routes that don't require authentication.
22
+ # Use this in FastAPI route decorators: @router.get("/health", openapi_extra={PUBLIC_ROUTE_KEY: True})
23
+ PUBLIC_ROUTE_KEY = "x-public"
24
+
25
+
26
+ standard_responses: dict[int | str, dict[str, Any]] = {
27
+ 400: {"$ref": "#/components/responses/BadRequest400"},
28
+ 429: {"$ref": "#/components/responses/TooManyRequests429"},
29
+ 500: {"$ref": "#/components/responses/InternalServerError500"},
30
+ "default": {"$ref": "#/components/responses/DefaultError"},
31
+ }
32
+
33
+ T = TypeVar("T", bound=BaseModel)
34
+
35
+
36
+ def create_query_dependency[T: BaseModel](model_class: type[T]) -> Callable[..., T]:
37
+ """Create a FastAPI dependency function from a Pydantic model for query parameters.
38
+
39
+ FastAPI does not natively support using Pydantic models as query parameters
40
+ without a dependency function. Using a dependency function typically leads to
41
+ duplication: field types, default values, and descriptions must be repeated in
42
+ `Query(...)` annotations even though they already exist in the Pydantic model.
43
+
44
+ This function automatically generates a dependency function that extracts query parameters
45
+ from the request and constructs an instance of the Pydantic model. The descriptions and
46
+ defaults are automatically extracted from the model's Field definitions, making the model
47
+ the single source of truth.
48
+
49
+ Args:
50
+ model_class: The Pydantic model class to create a dependency for
51
+
52
+ Returns:
53
+ A dependency function that can be used with FastAPI's Depends()
54
+ ```
55
+ """
56
+ # Build function signature dynamically from model fields
57
+ annotations: dict[str, Any] = {}
58
+ defaults: dict[str, Any] = {}
59
+
60
+ for field_name, field_info in model_class.model_fields.items():
61
+ # Extract description from Field
62
+ description = field_info.description
63
+
64
+ # Create Query annotation with description from model
65
+ query_annotation = Query(description=description) if description else Query()
66
+
67
+ # Create Annotated type with Query
68
+ field_type = field_info.annotation
69
+ annotations[field_name] = Annotated[field_type, query_annotation]
70
+
71
+ # Set default value from model
72
+ if field_info.default is not inspect.Parameter.empty:
73
+ defaults[field_name] = field_info.default
74
+
75
+ # Create the dependency function dynamically
76
+ def dependency_func(**kwargs: Any) -> T:
77
+ return model_class(**kwargs)
78
+
79
+ # Set function signature
80
+ sig_params = []
81
+ for field_name, field_type in annotations.items():
82
+ default = defaults.get(field_name, inspect.Parameter.empty)
83
+ param = inspect.Parameter(
84
+ field_name,
85
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
86
+ default=default,
87
+ annotation=field_type,
88
+ )
89
+ sig_params.append(param)
90
+
91
+ # These attributes are set dynamically at runtime. While mypy can't verify them statically,
92
+ # they are standard Python function attributes that exist on all callable objects at runtime.
93
+ # Setting them allows FastAPI to properly introspect the function signature for dependency injection.
94
+ dependency_func.__signature__ = inspect.Signature(sig_params) # type: ignore[attr-defined]
95
+ dependency_func.__annotations__ = annotations # type: ignore[attr-defined]
96
+ dependency_func.__name__ = f"get_{model_class.__name__.lower()}_request" # type: ignore[attr-defined]
97
+
98
+ return dependency_func
99
+
100
+
101
+ def create_path_dependency[T: BaseModel](model_class: type[T]) -> Callable[..., T]:
102
+ """Create a FastAPI dependency function from a Pydantic model for path parameters.
103
+
104
+ FastAPI requires path parameters to be explicitly annotated with `Path()`. When using
105
+ a Pydantic model that contains path parameters, you typically need a dependency function
106
+ that extracts the path parameter and constructs the model. This leads to duplication:
107
+ the parameter name, type, and description must be repeated in `Path(...)` annotations
108
+ even though they already exist in the Pydantic model.
109
+
110
+ This function automatically generates a dependency function that extracts path parameters
111
+ from the request and constructs an instance of the Pydantic model. The descriptions are
112
+ automatically extracted from the model's Field definitions, making the model the single
113
+ source of truth.
114
+
115
+ Args:
116
+ model_class: The Pydantic model class to create a dependency for. The model should
117
+ have exactly one field that represents the path parameter.
118
+
119
+ Returns:
120
+ A dependency function that can be used with FastAPI's Depends()
121
+ ```
122
+ """
123
+ # Get the single field from the model (path parameter models typically have one field)
124
+ if len(model_class.model_fields) != 1:
125
+ raise ValueError(
126
+ f"Path parameter model {model_class.__name__} must have exactly one field, "
127
+ f"but has {len(model_class.model_fields)} fields"
128
+ )
129
+
130
+ field_name, field_info = next(iter(model_class.model_fields.items()))
131
+
132
+ # Extract description from Field
133
+ description = field_info.description
134
+
135
+ # Create Path annotation with description from model
136
+ path_annotation = Path(description=description) if description else Path()
137
+
138
+ # Create Annotated type with Path
139
+ field_type = field_info.annotation
140
+ annotations: dict[str, Any] = {field_name: Annotated[field_type, path_annotation]}
141
+
142
+ # Create the dependency function dynamically
143
+ def dependency_func(**kwargs: Any) -> T:
144
+ return model_class(**kwargs)
145
+
146
+ # Set function signature
147
+ param = inspect.Parameter(
148
+ field_name,
149
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
150
+ annotation=annotations[field_name],
151
+ )
152
+
153
+ # These attributes are set dynamically at runtime. While mypy can't verify them statically,
154
+ # they are standard Python function attributes that exist on all callable objects at runtime.
155
+ # Setting them allows FastAPI to properly introspect the function signature for dependency injection.
156
+ dependency_func.__signature__ = inspect.Signature([param]) # type: ignore[attr-defined]
157
+ dependency_func.__annotations__ = annotations # type: ignore[attr-defined]
158
+ dependency_func.__name__ = f"get_{model_class.__name__.lower()}_request" # type: ignore[attr-defined]
159
+
160
+ return dependency_func
@@ -9,11 +9,10 @@ from typing import Any, Protocol, runtime_checkable
9
9
 
10
10
  from pydantic import BaseModel, Field
11
11
 
12
- from llama_stack.apis.inference import OpenAIMessageParam
13
- from llama_stack.apis.shields import Shield
14
- from llama_stack.apis.version import LLAMA_STACK_API_V1
15
- from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
16
- from llama_stack.schema_utils import json_schema_type, webmethod
12
+ from llama_stack_api.inference import OpenAIMessageParam
13
+ from llama_stack_api.schema_utils import json_schema_type, webmethod
14
+ from llama_stack_api.shields import Shield
15
+ from llama_stack_api.version import LLAMA_STACK_API_V1
17
16
 
18
17
 
19
18
  @json_schema_type
@@ -94,7 +93,6 @@ class ShieldStore(Protocol):
94
93
 
95
94
 
96
95
  @runtime_checkable
97
- @trace_protocol
98
96
  class Safety(Protocol):
99
97
  """Safety
100
98
 
@@ -121,15 +119,14 @@ class Safety(Protocol):
121
119
  """
122
120
  ...
123
121
 
124
- @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
125
122
  @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
126
- async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
123
+ async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
127
124
  """Create moderation.
128
125
 
129
126
  Classifies if text and/or image inputs are potentially harmful.
130
127
  :param input: Input (or inputs) to classify.
131
128
  Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
132
- :param model: The content moderation model you would like to use.
129
+ :param model: (Optional) The content moderation model you would like to use.
133
130
  :returns: A moderation object.
134
131
  """
135
132
  ...