llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -0,0 +1,113 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """FastAPI router for the Batches API.
8
+
9
+ This module defines the FastAPI router for the Batches API using standard
10
+ FastAPI route decorators. The router is defined in the API package to keep
11
+ all API-related code together.
12
+ """
13
+
14
+ from typing import Annotated
15
+
16
+ from fastapi import APIRouter, Body, Depends
17
+
18
+ from llama_stack_api.batches.models import (
19
+ CancelBatchRequest,
20
+ CreateBatchRequest,
21
+ ListBatchesRequest,
22
+ RetrieveBatchRequest,
23
+ )
24
+ from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
25
+ from llama_stack_api.version import LLAMA_STACK_API_V1
26
+
27
+ from .api import Batches
28
+ from .models import BatchObject, ListBatchesResponse
29
+
30
+ # Automatically generate dependency functions from Pydantic models
31
+ # This ensures the models are the single source of truth for descriptions
32
+ get_retrieve_batch_request = create_path_dependency(RetrieveBatchRequest)
33
+ get_cancel_batch_request = create_path_dependency(CancelBatchRequest)
34
+
35
+
36
+ # Automatically generate dependency function from Pydantic model
37
+ # This ensures the model is the single source of truth for descriptions and defaults
38
+ get_list_batches_request = create_query_dependency(ListBatchesRequest)
39
+
40
+
41
+ def create_router(impl: Batches) -> APIRouter:
42
+ """Create a FastAPI router for the Batches API.
43
+
44
+ Args:
45
+ impl: The Batches implementation instance
46
+
47
+ Returns:
48
+ APIRouter configured for the Batches API
49
+ """
50
+ router = APIRouter(
51
+ prefix=f"/{LLAMA_STACK_API_V1}",
52
+ tags=["Batches"],
53
+ responses=standard_responses,
54
+ )
55
+
56
+ @router.post(
57
+ "/batches",
58
+ response_model=BatchObject,
59
+ summary="Create a new batch for processing multiple API requests.",
60
+ description="Create a new batch for processing multiple API requests.",
61
+ responses={
62
+ 200: {"description": "The created batch object."},
63
+ 409: {"description": "Conflict: The idempotency key was previously used with different parameters."},
64
+ },
65
+ )
66
+ async def create_batch(
67
+ request: Annotated[CreateBatchRequest, Body(...)],
68
+ ) -> BatchObject:
69
+ return await impl.create_batch(request)
70
+
71
+ @router.get(
72
+ "/batches/{batch_id}",
73
+ response_model=BatchObject,
74
+ summary="Retrieve information about a specific batch.",
75
+ description="Retrieve information about a specific batch.",
76
+ responses={
77
+ 200: {"description": "The batch object."},
78
+ },
79
+ )
80
+ async def retrieve_batch(
81
+ request: Annotated[RetrieveBatchRequest, Depends(get_retrieve_batch_request)],
82
+ ) -> BatchObject:
83
+ return await impl.retrieve_batch(request)
84
+
85
+ @router.post(
86
+ "/batches/{batch_id}/cancel",
87
+ response_model=BatchObject,
88
+ summary="Cancel a batch that is in progress.",
89
+ description="Cancel a batch that is in progress.",
90
+ responses={
91
+ 200: {"description": "The updated batch object."},
92
+ },
93
+ )
94
+ async def cancel_batch(
95
+ request: Annotated[CancelBatchRequest, Depends(get_cancel_batch_request)],
96
+ ) -> BatchObject:
97
+ return await impl.cancel_batch(request)
98
+
99
+ @router.get(
100
+ "/batches",
101
+ response_model=ListBatchesResponse,
102
+ summary="List all batches for the current user.",
103
+ description="List all batches for the current user.",
104
+ responses={
105
+ 200: {"description": "A list of batch objects."},
106
+ },
107
+ )
108
+ async def list_batches(
109
+ request: Annotated[ListBatchesRequest, Depends(get_list_batches_request)],
110
+ ) -> ListBatchesResponse:
111
+ return await impl.list_batches(request)
112
+
113
+ return router
@@ -0,0 +1,78 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Pydantic models for Batches API requests and responses.
8
+
9
+ This module defines the request and response models for the Batches API
10
+ using Pydantic with Field descriptions for OpenAPI schema generation.
11
+ """
12
+
13
+ from typing import Literal
14
+
15
+ from openai.types import Batch as BatchObject
16
+ from pydantic import BaseModel, Field
17
+
18
+ from llama_stack_api.schema_utils import json_schema_type
19
+
20
+
21
+ @json_schema_type
22
+ class CreateBatchRequest(BaseModel):
23
+ """Request model for creating a batch."""
24
+
25
+ input_file_id: str = Field(..., description="The ID of an uploaded file containing requests for the batch.")
26
+ endpoint: str = Field(..., description="The endpoint to be used for all requests in the batch.")
27
+ completion_window: Literal["24h"] = Field(
28
+ ..., description="The time window within which the batch should be processed."
29
+ )
30
+ metadata: dict[str, str] | None = Field(default=None, description="Optional metadata for the batch.")
31
+ idempotency_key: str | None = Field(
32
+ default=None, description="Optional idempotency key. When provided, enables idempotent behavior."
33
+ )
34
+
35
+
36
+ @json_schema_type
37
+ class ListBatchesRequest(BaseModel):
38
+ """Request model for listing batches."""
39
+
40
+ after: str | None = Field(
41
+ default=None, description="Optional cursor for pagination. Returns batches after this ID."
42
+ )
43
+ limit: int = Field(default=20, description="Maximum number of batches to return. Defaults to 20.")
44
+
45
+
46
+ @json_schema_type
47
+ class RetrieveBatchRequest(BaseModel):
48
+ """Request model for retrieving a batch."""
49
+
50
+ batch_id: str = Field(..., description="The ID of the batch to retrieve.")
51
+
52
+
53
+ @json_schema_type
54
+ class CancelBatchRequest(BaseModel):
55
+ """Request model for canceling a batch."""
56
+
57
+ batch_id: str = Field(..., description="The ID of the batch to cancel.")
58
+
59
+
60
+ @json_schema_type
61
+ class ListBatchesResponse(BaseModel):
62
+ """Response containing a list of batch objects."""
63
+
64
+ object: Literal["list"] = "list"
65
+ data: list[BatchObject] = Field(..., description="List of batch objects")
66
+ first_id: str | None = Field(default=None, description="ID of the first batch in the list")
67
+ last_id: str | None = Field(default=None, description="ID of the last batch in the list")
68
+ has_more: bool = Field(default=False, description="Whether there are more batches available")
69
+
70
+
71
+ __all__ = [
72
+ "CreateBatchRequest",
73
+ "ListBatchesRequest",
74
+ "RetrieveBatchRequest",
75
+ "CancelBatchRequest",
76
+ "ListBatchesResponse",
77
+ "BatchObject",
78
+ ]
@@ -0,0 +1,43 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Benchmarks API protocol and models.
8
+
9
+ This module contains the Benchmarks protocol definition.
10
+ Pydantic models are defined in llama_stack_api.benchmarks.models.
11
+ The FastAPI router is defined in llama_stack_api.benchmarks.fastapi_routes.
12
+ """
13
+
14
+ # Import fastapi_routes for router factory access
15
+ from . import fastapi_routes
16
+
17
+ # Import protocol for re-export
18
+ from .api import Benchmarks
19
+
20
+ # Import models for re-export
21
+ from .models import (
22
+ Benchmark,
23
+ BenchmarkInput,
24
+ CommonBenchmarkFields,
25
+ GetBenchmarkRequest,
26
+ ListBenchmarksRequest,
27
+ ListBenchmarksResponse,
28
+ RegisterBenchmarkRequest,
29
+ UnregisterBenchmarkRequest,
30
+ )
31
+
32
+ __all__ = [
33
+ "Benchmarks",
34
+ "Benchmark",
35
+ "BenchmarkInput",
36
+ "CommonBenchmarkFields",
37
+ "ListBenchmarksResponse",
38
+ "ListBenchmarksRequest",
39
+ "GetBenchmarkRequest",
40
+ "RegisterBenchmarkRequest",
41
+ "UnregisterBenchmarkRequest",
42
+ "fastapi_routes",
43
+ ]
@@ -0,0 +1,39 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Protocol, runtime_checkable
8
+
9
+ from .models import (
10
+ Benchmark,
11
+ GetBenchmarkRequest,
12
+ ListBenchmarksRequest,
13
+ ListBenchmarksResponse,
14
+ RegisterBenchmarkRequest,
15
+ UnregisterBenchmarkRequest,
16
+ )
17
+
18
+
19
+ @runtime_checkable
20
+ class Benchmarks(Protocol):
21
+ async def list_benchmarks(
22
+ self,
23
+ request: ListBenchmarksRequest,
24
+ ) -> ListBenchmarksResponse: ...
25
+
26
+ async def get_benchmark(
27
+ self,
28
+ request: GetBenchmarkRequest,
29
+ ) -> Benchmark: ...
30
+
31
+ async def register_benchmark(
32
+ self,
33
+ request: RegisterBenchmarkRequest,
34
+ ) -> None: ...
35
+
36
+ async def unregister_benchmark(
37
+ self,
38
+ request: UnregisterBenchmarkRequest,
39
+ ) -> None: ...
@@ -0,0 +1,109 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """FastAPI router for the Benchmarks API.
8
+
9
+ This module defines the FastAPI router for the Benchmarks API using standard
10
+ FastAPI route decorators. The router is defined in the API package to keep
11
+ all API-related code together.
12
+ """
13
+
14
+ from typing import Annotated
15
+
16
+ from fastapi import APIRouter, Body, Depends
17
+
18
+ from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
19
+ from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
20
+
21
+ from .api import Benchmarks
22
+ from .models import (
23
+ Benchmark,
24
+ GetBenchmarkRequest,
25
+ ListBenchmarksRequest,
26
+ ListBenchmarksResponse,
27
+ RegisterBenchmarkRequest,
28
+ UnregisterBenchmarkRequest,
29
+ )
30
+
31
+ # Automatically generate dependency functions from Pydantic models
32
+ # This ensures the models are the single source of truth for descriptions
33
+ get_list_benchmarks_request = create_query_dependency(ListBenchmarksRequest)
34
+ get_get_benchmark_request = create_path_dependency(GetBenchmarkRequest)
35
+ get_unregister_benchmark_request = create_path_dependency(UnregisterBenchmarkRequest)
36
+
37
+
38
+ def create_router(impl: Benchmarks) -> APIRouter:
39
+ """Create a FastAPI router for the Benchmarks API.
40
+
41
+ Args:
42
+ impl: The Benchmarks implementation instance
43
+
44
+ Returns:
45
+ APIRouter configured for the Benchmarks API
46
+ """
47
+ router = APIRouter(
48
+ prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
49
+ tags=["Benchmarks"],
50
+ responses=standard_responses,
51
+ )
52
+
53
+ @router.get(
54
+ "/eval/benchmarks",
55
+ response_model=ListBenchmarksResponse,
56
+ summary="List all benchmarks.",
57
+ description="List all benchmarks.",
58
+ responses={
59
+ 200: {"description": "A ListBenchmarksResponse."},
60
+ },
61
+ )
62
+ async def list_benchmarks(
63
+ request: Annotated[ListBenchmarksRequest, Depends(get_list_benchmarks_request)],
64
+ ) -> ListBenchmarksResponse:
65
+ return await impl.list_benchmarks(request)
66
+
67
+ @router.get(
68
+ "/eval/benchmarks/{benchmark_id}",
69
+ response_model=Benchmark,
70
+ summary="Get a benchmark by its ID.",
71
+ description="Get a benchmark by its ID.",
72
+ responses={
73
+ 200: {"description": "A Benchmark."},
74
+ },
75
+ )
76
+ async def get_benchmark(
77
+ request: Annotated[GetBenchmarkRequest, Depends(get_get_benchmark_request)],
78
+ ) -> Benchmark:
79
+ return await impl.get_benchmark(request)
80
+
81
+ @router.post(
82
+ "/eval/benchmarks",
83
+ summary="Register a benchmark.",
84
+ description="Register a benchmark.",
85
+ responses={
86
+ 200: {"description": "The benchmark was successfully registered."},
87
+ },
88
+ deprecated=True,
89
+ )
90
+ async def register_benchmark(
91
+ request: Annotated[RegisterBenchmarkRequest, Body(...)],
92
+ ) -> None:
93
+ return await impl.register_benchmark(request)
94
+
95
+ @router.delete(
96
+ "/eval/benchmarks/{benchmark_id}",
97
+ summary="Unregister a benchmark.",
98
+ description="Unregister a benchmark.",
99
+ responses={
100
+ 200: {"description": "The benchmark was successfully unregistered."},
101
+ },
102
+ deprecated=True,
103
+ )
104
+ async def unregister_benchmark(
105
+ request: Annotated[UnregisterBenchmarkRequest, Depends(get_unregister_benchmark_request)],
106
+ ) -> None:
107
+ return await impl.unregister_benchmark(request)
108
+
109
+ return router
@@ -0,0 +1,109 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Pydantic models for Benchmarks API requests and responses.
8
+
9
+ This module defines the request and response models for the Benchmarks API
10
+ using Pydantic with Field descriptions for OpenAPI schema generation.
11
+ """
12
+
13
+ from typing import Any, Literal
14
+
15
+ from pydantic import BaseModel, Field
16
+
17
+ from llama_stack_api.resource import Resource, ResourceType
18
+ from llama_stack_api.schema_utils import json_schema_type
19
+
20
+
21
+ @json_schema_type
22
+ class ListBenchmarksRequest(BaseModel):
23
+ """Request model for listing benchmarks."""
24
+
25
+ pass
26
+
27
+
28
+ @json_schema_type
29
+ class GetBenchmarkRequest(BaseModel):
30
+ """Request model for getting a benchmark."""
31
+
32
+ benchmark_id: str = Field(..., description="The ID of the benchmark to get.")
33
+
34
+
35
+ @json_schema_type
36
+ class RegisterBenchmarkRequest(BaseModel):
37
+ """Request model for registering a benchmark."""
38
+
39
+ benchmark_id: str = Field(..., description="The ID of the benchmark to register.")
40
+ dataset_id: str = Field(..., description="The ID of the dataset to use for the benchmark.")
41
+ scoring_functions: list[str] = Field(..., description="The scoring functions to use for the benchmark.")
42
+ provider_benchmark_id: str | None = Field(
43
+ default=None, description="The ID of the provider benchmark to use for the benchmark."
44
+ )
45
+ provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
46
+ metadata: dict[str, Any] | None = Field(default=None, description="The metadata to use for the benchmark.")
47
+
48
+
49
+ @json_schema_type
50
+ class UnregisterBenchmarkRequest(BaseModel):
51
+ """Request model for unregistering a benchmark."""
52
+
53
+ benchmark_id: str = Field(..., description="The ID of the benchmark to unregister.")
54
+
55
+
56
+ class CommonBenchmarkFields(BaseModel):
57
+ dataset_id: str = Field(..., description="Identifier of the dataset to use for the benchmark evaluation.")
58
+ scoring_functions: list[str] = Field(
59
+ ..., description="List of scoring function identifiers to apply during evaluation."
60
+ )
61
+ metadata: dict[str, Any] = Field(
62
+ default_factory=dict,
63
+ description="Metadata for this evaluation task.",
64
+ )
65
+
66
+
67
+ @json_schema_type
68
+ class Benchmark(CommonBenchmarkFields, Resource):
69
+ """A benchmark resource for evaluating model performance."""
70
+
71
+ type: Literal[ResourceType.benchmark] = Field(
72
+ default=ResourceType.benchmark,
73
+ description="The resource type, always benchmark.",
74
+ )
75
+
76
+ @property
77
+ def benchmark_id(self) -> str:
78
+ return self.identifier
79
+
80
+ @property
81
+ def provider_benchmark_id(self) -> str | None:
82
+ return self.provider_resource_id
83
+
84
+
85
+ class BenchmarkInput(CommonBenchmarkFields, BaseModel):
86
+ benchmark_id: str = Field(..., description="The ID of the benchmark.")
87
+ provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
88
+ provider_benchmark_id: str | None = Field(
89
+ default=None, description="The ID of the provider benchmark to use for the benchmark."
90
+ )
91
+
92
+
93
+ @json_schema_type
94
+ class ListBenchmarksResponse(BaseModel):
95
+ """Response containing a list of benchmark objects."""
96
+
97
+ data: list[Benchmark] = Field(..., description="List of benchmark objects.")
98
+
99
+
100
+ __all__ = [
101
+ "ListBenchmarksRequest",
102
+ "GetBenchmarkRequest",
103
+ "RegisterBenchmarkRequest",
104
+ "UnregisterBenchmarkRequest",
105
+ "CommonBenchmarkFields",
106
+ "Benchmark",
107
+ "BenchmarkInput",
108
+ "ListBenchmarksResponse",
109
+ ]
@@ -4,13 +4,11 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from enum import Enum
8
7
  from typing import Annotated, Literal
9
8
 
10
9
  from pydantic import BaseModel, Field, model_validator
11
10
 
12
- from llama_stack.models.llama.datatypes import ToolCall
13
- from llama_stack.schema_utils import json_schema_type, register_schema
11
+ from llama_stack_api.schema_utils import json_schema_type, register_schema
14
12
 
15
13
 
16
14
  @json_schema_type
@@ -101,43 +99,3 @@ class ImageDelta(BaseModel):
101
99
 
102
100
  type: Literal["image"] = "image"
103
101
  image: bytes
104
-
105
-
106
- class ToolCallParseStatus(Enum):
107
- """Status of tool call parsing during streaming.
108
- :cvar started: Tool call parsing has begun
109
- :cvar in_progress: Tool call parsing is ongoing
110
- :cvar failed: Tool call parsing failed
111
- :cvar succeeded: Tool call parsing completed successfully
112
- """
113
-
114
- started = "started"
115
- in_progress = "in_progress"
116
- failed = "failed"
117
- succeeded = "succeeded"
118
-
119
-
120
- @json_schema_type
121
- class ToolCallDelta(BaseModel):
122
- """A tool call content delta for streaming responses.
123
-
124
- :param type: Discriminator type of the delta. Always "tool_call"
125
- :param tool_call: Either an in-progress tool call string or the final parsed tool call
126
- :param parse_status: Current parsing status of the tool call
127
- """
128
-
129
- type: Literal["tool_call"] = "tool_call"
130
-
131
- # you either send an in-progress tool call so the client can stream a long
132
- # code generation or you send the final parsed tool call at the end of the
133
- # stream
134
- tool_call: str | ToolCall
135
- parse_status: ToolCallParseStatus
136
-
137
-
138
- # streaming completions send a stream of ContentDeltas
139
- ContentDelta = Annotated[
140
- TextDelta | ImageDelta | ToolCallDelta,
141
- Field(discriminator="type"),
142
- ]
143
- register_schema(ContentDelta, name="ContentDelta")
@@ -56,14 +56,6 @@ class ToolGroupNotFoundError(ResourceNotFoundError):
56
56
  super().__init__(toolgroup_name, "Tool Group", "client.toolgroups.list()")
57
57
 
58
58
 
59
- class SessionNotFoundError(ValueError):
60
- """raised when Llama Stack cannot find a referenced session or access is denied"""
61
-
62
- def __init__(self, session_name: str) -> None:
63
- message = f"Session '{session_name}' not found or access denied."
64
- super().__init__(message)
65
-
66
-
67
59
  class ModelTypeError(TypeError):
68
60
  """raised when a model is present but not the correct type"""
69
61
 
@@ -7,7 +7,7 @@ from enum import Enum
7
7
 
8
8
  from pydantic import BaseModel
9
9
 
10
- from llama_stack.schema_utils import json_schema_type
10
+ from llama_stack_api.schema_utils import json_schema_type
11
11
 
12
12
 
13
13
  class JobStatus(Enum):
@@ -0,0 +1,77 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from enum import Enum
8
+ from typing import Any
9
+
10
+ from pydantic import BaseModel
11
+
12
+ from llama_stack_api.schema_utils import json_schema_type
13
+
14
+
15
+ class Order(Enum):
16
+ """Sort order for paginated responses.
17
+ :cvar asc: Ascending order
18
+ :cvar desc: Descending order
19
+ """
20
+
21
+ asc = "asc"
22
+ desc = "desc"
23
+
24
+
25
+ @json_schema_type
26
+ class PaginatedResponse(BaseModel):
27
+ """A generic paginated response that follows a simple format.
28
+
29
+ :param data: The list of items for the current page
30
+ :param has_more: Whether there are more items available after this set
31
+ :param url: The URL for accessing this list
32
+ """
33
+
34
+ data: list[dict[str, Any]]
35
+ has_more: bool
36
+ url: str | None = None
37
+
38
+
39
+ # This is a short term solution to allow inference API to return metrics
40
+ # The ideal way to do this is to have a way for all response types to include metrics
41
+ # and all metric events logged to the telemetry API to be included with the response
42
+ # To do this, we will need to augment all response types with a metrics field.
43
+ # We have hit a blocker from stainless SDK that prevents us from doing this.
44
+ # The blocker is that if we were to augment the response types that have a data field
45
+ # in them like so
46
+ # class ListModelsResponse(BaseModel):
47
+ # metrics: Optional[List[MetricEvent]] = None
48
+ # data: List[Models]
49
+ # ...
50
+ # The client SDK will need to access the data by using a .data field, which is not
51
+ # ergonomic. Stainless SDK does support unwrapping the response type, but it
52
+ # requires that the response type to only have a single field.
53
+
54
+ # We will need a way in the client SDK to signal that the metrics are needed
55
+ # and if they are needed, the client SDK has to return the full response type
56
+ # without unwrapping it.
57
+
58
+
59
+ @json_schema_type
60
+ class MetricInResponse(BaseModel):
61
+ """A metric value included in API responses.
62
+ :param metric: The name of the metric
63
+ :param value: The numeric value of the metric
64
+ :param unit: (Optional) The unit of measurement for the metric value
65
+ """
66
+
67
+ metric: str
68
+ value: int | float
69
+ unit: str | None = None
70
+
71
+
72
+ class MetricResponseMixin(BaseModel):
73
+ """Mixin class for API responses that can include metrics.
74
+ :param metrics: (Optional) List of metrics associated with the API response
75
+ """
76
+
77
+ metrics: list[MetricInResponse] | None = None
@@ -8,7 +8,7 @@ from datetime import datetime
8
8
 
9
9
  from pydantic import BaseModel
10
10
 
11
- from llama_stack.schema_utils import json_schema_type
11
+ from llama_stack_api.schema_utils import json_schema_type
12
12
 
13
13
 
14
14
  @json_schema_type