llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -11,29 +11,44 @@ import json
11
11
  import time
12
12
  import uuid
13
13
  from io import BytesIO
14
- from typing import Any, Literal
14
+ from typing import Any
15
15
 
16
16
  from openai.types.batch import BatchError, Errors
17
17
  from pydantic import BaseModel
18
18
 
19
- from llama_stack.apis.batches import Batches, BatchObject, ListBatchesResponse
20
- from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
21
- from llama_stack.apis.files import Files, OpenAIFilePurpose
22
- from llama_stack.apis.inference import (
19
+ from llama_stack.core.storage.kvstore import KVStore
20
+ from llama_stack.log import get_logger
21
+ from llama_stack_api import (
22
+ Batches,
23
+ BatchObject,
24
+ ConflictError,
25
+ Files,
23
26
  Inference,
27
+ ListBatchesResponse,
28
+ Models,
24
29
  OpenAIAssistantMessageParam,
25
30
  OpenAIChatCompletionRequestWithExtraBody,
26
31
  OpenAICompletionRequestWithExtraBody,
27
32
  OpenAIDeveloperMessageParam,
28
33
  OpenAIEmbeddingsRequestWithExtraBody,
34
+ OpenAIFilePurpose,
29
35
  OpenAIMessageParam,
30
36
  OpenAISystemMessageParam,
31
37
  OpenAIToolMessageParam,
32
38
  OpenAIUserMessageParam,
39
+ ResourceNotFoundError,
40
+ )
41
+ from llama_stack_api.batches.models import (
42
+ CancelBatchRequest,
43
+ CreateBatchRequest,
44
+ ListBatchesRequest,
45
+ RetrieveBatchRequest,
46
+ )
47
+ from llama_stack_api.files.models import (
48
+ RetrieveFileContentRequest,
49
+ RetrieveFileRequest,
50
+ UploadFileRequest,
33
51
  )
34
- from llama_stack.apis.models import Models
35
- from llama_stack.log import get_logger
36
- from llama_stack.providers.utils.kvstore import KVStore
37
52
 
38
53
  from .config import ReferenceBatchesImplConfig
39
54
 
@@ -136,11 +151,7 @@ class ReferenceBatchesImpl(Batches):
136
151
  # TODO (SECURITY): this currently works w/ configured api keys, not with x-llamastack-provider-data or with user policy restrictions
137
152
  async def create_batch(
138
153
  self,
139
- input_file_id: str,
140
- endpoint: str,
141
- completion_window: Literal["24h"],
142
- metadata: dict[str, str] | None = None,
143
- idempotency_key: str | None = None,
154
+ request: CreateBatchRequest,
144
155
  ) -> BatchObject:
145
156
  """
146
157
  Create a new batch for processing multiple API requests.
@@ -181,14 +192,14 @@ class ReferenceBatchesImpl(Batches):
181
192
 
182
193
  # TODO: set expiration time for garbage collection
183
194
 
184
- if endpoint not in ["/v1/chat/completions", "/v1/completions", "/v1/embeddings"]:
195
+ if request.endpoint not in ["/v1/chat/completions", "/v1/completions", "/v1/embeddings"]:
185
196
  raise ValueError(
186
- f"Invalid endpoint: {endpoint}. Supported values: /v1/chat/completions, /v1/completions, /v1/embeddings. Code: invalid_value. Param: endpoint",
197
+ f"Invalid endpoint: {request.endpoint}. Supported values: /v1/chat/completions, /v1/completions, /v1/embeddings. Code: invalid_value. Param: endpoint",
187
198
  )
188
199
 
189
- if completion_window != "24h":
200
+ if request.completion_window != "24h":
190
201
  raise ValueError(
191
- f"Invalid completion_window: {completion_window}. Supported values are: 24h. Code: invalid_value. Param: completion_window",
202
+ f"Invalid completion_window: {request.completion_window}. Supported values are: 24h. Code: invalid_value. Param: completion_window",
192
203
  )
193
204
 
194
205
  batch_id = f"batch_{uuid.uuid4().hex[:16]}"
@@ -196,22 +207,22 @@ class ReferenceBatchesImpl(Batches):
196
207
  # For idempotent requests, use the idempotency key for the batch ID
197
208
  # This ensures the same key always maps to the same batch ID,
198
209
  # allowing us to detect parameter conflicts
199
- if idempotency_key is not None:
200
- hash_input = idempotency_key.encode("utf-8")
210
+ if request.idempotency_key is not None:
211
+ hash_input = request.idempotency_key.encode("utf-8")
201
212
  hash_digest = hashlib.sha256(hash_input).hexdigest()[:24]
202
213
  batch_id = f"batch_{hash_digest}"
203
214
 
204
215
  try:
205
- existing_batch = await self.retrieve_batch(batch_id)
216
+ existing_batch = await self.retrieve_batch(RetrieveBatchRequest(batch_id=batch_id))
206
217
 
207
218
  if (
208
- existing_batch.input_file_id != input_file_id
209
- or existing_batch.endpoint != endpoint
210
- or existing_batch.completion_window != completion_window
211
- or existing_batch.metadata != metadata
219
+ existing_batch.input_file_id != request.input_file_id
220
+ or existing_batch.endpoint != request.endpoint
221
+ or existing_batch.completion_window != request.completion_window
222
+ or existing_batch.metadata != request.metadata
212
223
  ):
213
224
  raise ConflictError(
214
- f"Idempotency key '{idempotency_key}' was previously used with different parameters. "
225
+ f"Idempotency key '{request.idempotency_key}' was previously used with different parameters. "
215
226
  "Either use a new idempotency key or ensure all parameters match the original request."
216
227
  )
217
228
 
@@ -226,12 +237,12 @@ class ReferenceBatchesImpl(Batches):
226
237
  batch = BatchObject(
227
238
  id=batch_id,
228
239
  object="batch",
229
- endpoint=endpoint,
230
- input_file_id=input_file_id,
231
- completion_window=completion_window,
240
+ endpoint=request.endpoint,
241
+ input_file_id=request.input_file_id,
242
+ completion_window=request.completion_window,
232
243
  status="validating",
233
244
  created_at=current_time,
234
- metadata=metadata,
245
+ metadata=request.metadata,
235
246
  )
236
247
 
237
248
  await self.kvstore.set(f"batch:{batch_id}", batch.to_json())
@@ -243,28 +254,27 @@ class ReferenceBatchesImpl(Batches):
243
254
 
244
255
  return batch
245
256
 
246
- async def cancel_batch(self, batch_id: str) -> BatchObject:
257
+ async def cancel_batch(self, request: CancelBatchRequest) -> BatchObject:
247
258
  """Cancel a batch that is in progress."""
248
- batch = await self.retrieve_batch(batch_id)
259
+ batch = await self.retrieve_batch(RetrieveBatchRequest(batch_id=request.batch_id))
249
260
 
250
261
  if batch.status in ["cancelled", "cancelling"]:
251
262
  return batch
252
263
 
253
264
  if batch.status in ["completed", "failed", "expired"]:
254
- raise ConflictError(f"Cannot cancel batch '{batch_id}' with status '{batch.status}'")
265
+ raise ConflictError(f"Cannot cancel batch '{request.batch_id}' with status '{batch.status}'")
255
266
 
256
- await self._update_batch(batch_id, status="cancelling", cancelling_at=int(time.time()))
267
+ await self._update_batch(request.batch_id, status="cancelling", cancelling_at=int(time.time()))
257
268
 
258
- if batch_id in self._processing_tasks:
259
- self._processing_tasks[batch_id].cancel()
269
+ if request.batch_id in self._processing_tasks:
270
+ self._processing_tasks[request.batch_id].cancel()
260
271
  # note: task removal and status="cancelled" handled in finally block of _process_batch
261
272
 
262
- return await self.retrieve_batch(batch_id)
273
+ return await self.retrieve_batch(RetrieveBatchRequest(batch_id=request.batch_id))
263
274
 
264
275
  async def list_batches(
265
276
  self,
266
- after: str | None = None,
267
- limit: int = 20,
277
+ request: ListBatchesRequest,
268
278
  ) -> ListBatchesResponse:
269
279
  """
270
280
  List all batches, eventually only for the current user.
@@ -281,14 +291,14 @@ class ReferenceBatchesImpl(Batches):
281
291
  batches.sort(key=lambda b: b.created_at, reverse=True)
282
292
 
283
293
  start_idx = 0
284
- if after:
294
+ if request.after:
285
295
  for i, batch in enumerate(batches):
286
- if batch.id == after:
296
+ if batch.id == request.after:
287
297
  start_idx = i + 1
288
298
  break
289
299
 
290
- page_batches = batches[start_idx : start_idx + limit]
291
- has_more = (start_idx + limit) < len(batches)
300
+ page_batches = batches[start_idx : start_idx + request.limit]
301
+ has_more = (start_idx + request.limit) < len(batches)
292
302
 
293
303
  first_id = page_batches[0].id if page_batches else None
294
304
  last_id = page_batches[-1].id if page_batches else None
@@ -300,11 +310,11 @@ class ReferenceBatchesImpl(Batches):
300
310
  has_more=has_more,
301
311
  )
302
312
 
303
- async def retrieve_batch(self, batch_id: str) -> BatchObject:
313
+ async def retrieve_batch(self, request: RetrieveBatchRequest) -> BatchObject:
304
314
  """Retrieve information about a specific batch."""
305
- batch_data = await self.kvstore.get(f"batch:{batch_id}")
315
+ batch_data = await self.kvstore.get(f"batch:{request.batch_id}")
306
316
  if not batch_data:
307
- raise ResourceNotFoundError(batch_id, "Batch", "batches.list()")
317
+ raise ResourceNotFoundError(request.batch_id, "Batch", "batches.list()")
308
318
 
309
319
  return BatchObject.model_validate_json(batch_data)
310
320
 
@@ -312,7 +322,7 @@ class ReferenceBatchesImpl(Batches):
312
322
  """Update batch fields in kvstore."""
313
323
  async with self._update_batch_lock:
314
324
  try:
315
- batch = await self.retrieve_batch(batch_id)
325
+ batch = await self.retrieve_batch(RetrieveBatchRequest(batch_id=batch_id))
316
326
 
317
327
  # batch processing is async. once cancelling, only allow "cancelled" status updates
318
328
  if batch.status == "cancelling" and updates.get("status") != "cancelled":
@@ -344,7 +354,7 @@ class ReferenceBatchesImpl(Batches):
344
354
  requests: list[BatchRequest] = []
345
355
  errors: list[BatchError] = []
346
356
  try:
347
- await self.files_api.openai_retrieve_file(batch.input_file_id)
357
+ await self.files_api.openai_retrieve_file(RetrieveFileRequest(file_id=batch.input_file_id))
348
358
  except Exception:
349
359
  errors.append(
350
360
  BatchError(
@@ -357,8 +367,13 @@ class ReferenceBatchesImpl(Batches):
357
367
  return errors, requests
358
368
 
359
369
  # TODO(SECURITY): do something about large files
360
- file_content_response = await self.files_api.openai_retrieve_file_content(batch.input_file_id)
361
- file_content = file_content_response.body.decode("utf-8")
370
+ file_content_response = await self.files_api.openai_retrieve_file_content(
371
+ RetrieveFileContentRequest(file_id=batch.input_file_id)
372
+ )
373
+ # Handle both bytes and memoryview types - convert to bytes unconditionally
374
+ # (bytes(x) returns x if already bytes, creates new bytes from memoryview otherwise)
375
+ body_bytes = bytes(file_content_response.body)
376
+ file_content = body_bytes.decode("utf-8")
362
377
  for line_num, line in enumerate(file_content.strip().split("\n"), 1):
363
378
  if line.strip(): # skip empty lines
364
379
  try:
@@ -415,8 +430,8 @@ class ReferenceBatchesImpl(Batches):
415
430
  )
416
431
  valid = False
417
432
 
418
- if (body := request.get("body")) and isinstance(body, dict):
419
- if body.get("stream", False):
433
+ if (request_body := request.get("body")) and isinstance(request_body, dict):
434
+ if request_body.get("stream", False):
420
435
  errors.append(
421
436
  BatchError(
422
437
  code="streaming_unsupported",
@@ -447,7 +462,7 @@ class ReferenceBatchesImpl(Batches):
447
462
  ]
448
463
 
449
464
  for param, expected_type, type_string in required_params:
450
- if param not in body:
465
+ if param not in request_body:
451
466
  errors.append(
452
467
  BatchError(
453
468
  code="invalid_request",
@@ -457,7 +472,7 @@ class ReferenceBatchesImpl(Batches):
457
472
  )
458
473
  )
459
474
  valid = False
460
- elif not isinstance(body[param], expected_type):
475
+ elif not isinstance(request_body[param], expected_type):
461
476
  errors.append(
462
477
  BatchError(
463
478
  code="invalid_request",
@@ -468,15 +483,15 @@ class ReferenceBatchesImpl(Batches):
468
483
  )
469
484
  valid = False
470
485
 
471
- if "model" in body and isinstance(body["model"], str):
486
+ if "model" in request_body and isinstance(request_body["model"], str):
472
487
  try:
473
- await self.models_api.get_model(body["model"])
488
+ await self.models_api.get_model(request_body["model"])
474
489
  except Exception:
475
490
  errors.append(
476
491
  BatchError(
477
492
  code="model_not_found",
478
493
  line=line_num,
479
- message=f"Model '{body['model']}' does not exist or is not supported",
494
+ message=f"Model '{request_body['model']}' does not exist or is not supported",
480
495
  param="body.model",
481
496
  )
482
497
  )
@@ -484,14 +499,14 @@ class ReferenceBatchesImpl(Batches):
484
499
 
485
500
  if valid:
486
501
  assert isinstance(url, str), "URL must be a string" # for mypy
487
- assert isinstance(body, dict), "Body must be a dictionary" # for mypy
502
+ assert isinstance(request_body, dict), "Body must be a dictionary" # for mypy
488
503
  requests.append(
489
504
  BatchRequest(
490
505
  line_num=line_num,
491
506
  url=url,
492
507
  method=request["method"],
493
508
  custom_id=request["custom_id"],
494
- body=body,
509
+ body=request_body,
495
510
  ),
496
511
  )
497
512
  except json.JSONDecodeError:
@@ -529,7 +544,7 @@ class ReferenceBatchesImpl(Batches):
529
544
  async def _process_batch_impl(self, batch_id: str) -> None:
530
545
  """Implementation of batch processing logic."""
531
546
  errors: list[BatchError] = []
532
- batch = await self.retrieve_batch(batch_id)
547
+ batch = await self.retrieve_batch(RetrieveBatchRequest(batch_id=batch_id))
533
548
 
534
549
  errors, requests = await self._validate_input(batch)
535
550
  if errors:
@@ -675,5 +690,8 @@ class ReferenceBatchesImpl(Batches):
675
690
 
676
691
  with AsyncBytesIO("\n".join(output_lines).encode("utf-8")) as file_buffer:
677
692
  file_buffer.filename = f"{batch_id}_{file_type}.jsonl"
678
- uploaded_file = await self.files_api.openai_upload_file(file=file_buffer, purpose=OpenAIFilePurpose.BATCH)
693
+ uploaded_file = await self.files_api.openai_upload_file(
694
+ request=UploadFileRequest(purpose=OpenAIFilePurpose.BATCH),
695
+ file=file_buffer,
696
+ )
679
697
  return uploaded_file.id
@@ -5,13 +5,10 @@
5
5
  # the root directory of this source tree.
6
6
  from typing import Any
7
7
 
8
- from llama_stack.apis.common.responses import PaginatedResponse
9
- from llama_stack.apis.datasetio import DatasetIO
10
- from llama_stack.apis.datasets import Dataset
11
- from llama_stack.providers.datatypes import DatasetsProtocolPrivate
8
+ from llama_stack.core.storage.kvstore import kvstore_impl
12
9
  from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
13
- from llama_stack.providers.utils.kvstore import kvstore_impl
14
10
  from llama_stack.providers.utils.pagination import paginate_records
11
+ from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
15
12
 
16
13
  from .config import LocalFSDatasetIOConfig
17
14
 
@@ -8,28 +8,27 @@ from typing import Any
8
8
 
9
9
  from tqdm import tqdm
10
10
 
11
- from llama_stack.apis.agents import Agents, StepType
12
- from llama_stack.apis.benchmarks import Benchmark
13
- from llama_stack.apis.datasetio import DatasetIO
14
- from llama_stack.apis.datasets import Datasets
15
- from llama_stack.apis.inference import (
11
+ from llama_stack.core.storage.kvstore import kvstore_impl
12
+ from llama_stack.providers.utils.common.data_schema_validator import ColumnName
13
+ from llama_stack_api import (
14
+ Agents,
15
+ Benchmark,
16
+ BenchmarkConfig,
17
+ BenchmarksProtocolPrivate,
18
+ DatasetIO,
19
+ Datasets,
20
+ Eval,
21
+ EvaluateResponse,
16
22
  Inference,
23
+ Job,
24
+ JobStatus,
17
25
  OpenAIChatCompletionRequestWithExtraBody,
18
26
  OpenAICompletionRequestWithExtraBody,
19
27
  OpenAISystemMessageParam,
20
28
  OpenAIUserMessageParam,
21
- UserMessage,
22
- )
23
- from llama_stack.apis.scoring import Scoring
24
- from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
25
- from llama_stack.providers.inline.agents.meta_reference.agent_instance import (
26
- MEMORY_QUERY_TOOL,
29
+ Scoring,
27
30
  )
28
- from llama_stack.providers.utils.common.data_schema_validator import ColumnName
29
- from llama_stack.providers.utils.kvstore import kvstore_impl
30
31
 
31
- from .....apis.common.job_types import Job, JobStatus
32
- from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
33
32
  from .config import MetaReferenceEvalConfig
34
33
 
35
34
  EVAL_TASKS_PREFIX = "benchmarks:"
@@ -118,49 +117,6 @@ class MetaReferenceEvalImpl(
118
117
  self.jobs[job_id] = res
119
118
  return Job(job_id=job_id, status=JobStatus.completed)
120
119
 
121
- async def _run_agent_generation(
122
- self, input_rows: list[dict[str, Any]], benchmark_config: BenchmarkConfig
123
- ) -> list[dict[str, Any]]:
124
- candidate = benchmark_config.eval_candidate
125
- create_response = await self.agents_api.create_agent(candidate.config)
126
- agent_id = create_response.agent_id
127
-
128
- generations = []
129
- for i, x in tqdm(enumerate(input_rows)):
130
- assert ColumnName.chat_completion_input.value in x, "Invalid input row"
131
- input_messages = json.loads(x[ColumnName.chat_completion_input.value])
132
- input_messages = [UserMessage(**x) for x in input_messages if x["role"] == "user"]
133
-
134
- # NOTE: only single-turn agent generation is supported. Create a new session for each input row
135
- session_create_response = await self.agents_api.create_agent_session(agent_id, f"session-{i}")
136
- session_id = session_create_response.session_id
137
-
138
- turn_request = dict(
139
- agent_id=agent_id,
140
- session_id=session_id,
141
- messages=input_messages,
142
- stream=True,
143
- )
144
- turn_response = [chunk async for chunk in await self.agents_api.create_agent_turn(**turn_request)]
145
- final_event = turn_response[-1].event.payload
146
-
147
- # check if there's a memory retrieval step and extract the context
148
- memory_rag_context = None
149
- for step in final_event.turn.steps:
150
- if step.step_type == StepType.tool_execution.value:
151
- for tool_response in step.tool_responses:
152
- if tool_response.tool_name == MEMORY_QUERY_TOOL:
153
- memory_rag_context = " ".join(x.text for x in tool_response.content)
154
-
155
- agent_generation = {}
156
- agent_generation[ColumnName.generated_answer.value] = final_event.turn.output_message.content
157
- if memory_rag_context:
158
- agent_generation[ColumnName.context.value] = memory_rag_context
159
-
160
- generations.append(agent_generation)
161
-
162
- return generations
163
-
164
120
  async def _run_model_generation(
165
121
  self, input_rows: list[dict[str, Any]], benchmark_config: BenchmarkConfig
166
122
  ) -> list[dict[str, Any]]:
@@ -215,9 +171,8 @@ class MetaReferenceEvalImpl(
215
171
  benchmark_config: BenchmarkConfig,
216
172
  ) -> EvaluateResponse:
217
173
  candidate = benchmark_config.eval_candidate
218
- if candidate.type == "agent":
219
- generations = await self._run_agent_generation(input_rows, benchmark_config)
220
- elif candidate.type == "model":
174
+ # Agent evaluation removed
175
+ if candidate.type == "model":
221
176
  generations = await self._run_model_generation(input_rows, benchmark_config)
222
177
  else:
223
178
  raise ValueError(f"Invalid candidate type: {candidate.type}")
@@ -7,27 +7,30 @@
7
7
  import time
8
8
  import uuid
9
9
  from pathlib import Path
10
- from typing import Annotated
11
10
 
12
- from fastapi import Depends, File, Form, Response, UploadFile
11
+ from fastapi import Response, UploadFile
13
12
 
14
- from llama_stack.apis.common.errors import ResourceNotFoundError
15
- from llama_stack.apis.common.responses import Order
16
- from llama_stack.apis.files import (
17
- ExpiresAfter,
13
+ from llama_stack.core.access_control.datatypes import Action
14
+ from llama_stack.core.datatypes import AccessRule
15
+ from llama_stack.core.id_generation import generate_object_id
16
+ from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
17
+ from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
18
+ from llama_stack.log import get_logger
19
+ from llama_stack_api import (
20
+ DeleteFileRequest,
18
21
  Files,
22
+ ListFilesRequest,
19
23
  ListOpenAIFileResponse,
20
24
  OpenAIFileDeleteResponse,
21
25
  OpenAIFileObject,
22
26
  OpenAIFilePurpose,
27
+ Order,
28
+ ResourceNotFoundError,
29
+ RetrieveFileContentRequest,
30
+ RetrieveFileRequest,
31
+ UploadFileRequest,
23
32
  )
24
- from llama_stack.core.datatypes import AccessRule
25
- from llama_stack.core.id_generation import generate_object_id
26
- from llama_stack.log import get_logger
27
- from llama_stack.providers.utils.files.form_data import parse_expires_after
28
- from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
29
- from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
30
- from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
33
+ from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
31
34
 
32
35
  from .config import LocalfsFilesImplConfig
33
36
 
@@ -72,12 +75,12 @@ class LocalfsFilesImpl(Files):
72
75
  """Get the filesystem path for a file ID."""
73
76
  return Path(self.config.storage_dir) / file_id
74
77
 
75
- async def _lookup_file_id(self, file_id: str) -> tuple[OpenAIFileObject, Path]:
78
+ async def _lookup_file_id(self, file_id: str, action: Action = Action.READ) -> tuple[OpenAIFileObject, Path]:
76
79
  """Look up a OpenAIFileObject and filesystem path from its ID."""
77
80
  if not self.sql_store:
78
81
  raise RuntimeError("Files provider not initialized")
79
82
 
80
- row = await self.sql_store.fetch_one("openai_files", where={"id": file_id})
83
+ row = await self.sql_store.fetch_one("openai_files", where={"id": file_id}, action=action)
81
84
  if not row:
82
85
  raise ResourceNotFoundError(file_id, "File", "client.files.list()")
83
86
 
@@ -87,14 +90,16 @@ class LocalfsFilesImpl(Files):
87
90
  # OpenAI Files API Implementation
88
91
  async def openai_upload_file(
89
92
  self,
90
- file: Annotated[UploadFile, File()],
91
- purpose: Annotated[OpenAIFilePurpose, Form()],
92
- expires_after: Annotated[ExpiresAfter | None, Depends(parse_expires_after)] = None,
93
+ request: UploadFileRequest,
94
+ file: UploadFile,
93
95
  ) -> OpenAIFileObject:
94
96
  """Upload a file that can be used across various endpoints."""
95
97
  if not self.sql_store:
96
98
  raise RuntimeError("Files provider not initialized")
97
99
 
100
+ purpose = request.purpose
101
+ expires_after = request.expires_after
102
+
98
103
  if expires_after is not None:
99
104
  logger.warning(
100
105
  f"File expiration is not supported by this provider, ignoring expires_after: {expires_after}"
@@ -136,15 +141,17 @@ class LocalfsFilesImpl(Files):
136
141
 
137
142
  async def openai_list_files(
138
143
  self,
139
- after: str | None = None,
140
- limit: int | None = 10000,
141
- order: Order | None = Order.desc,
142
- purpose: OpenAIFilePurpose | None = None,
144
+ request: ListFilesRequest,
143
145
  ) -> ListOpenAIFileResponse:
144
146
  """Returns a list of files that belong to the user's organization."""
145
147
  if not self.sql_store:
146
148
  raise RuntimeError("Files provider not initialized")
147
149
 
150
+ after = request.after
151
+ limit = request.limit
152
+ order = request.order
153
+ purpose = request.purpose
154
+
148
155
  if not order:
149
156
  order = Order.desc
150
157
 
@@ -179,16 +186,17 @@ class LocalfsFilesImpl(Files):
179
186
  last_id=files[-1].id if files else "",
180
187
  )
181
188
 
182
- async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject:
189
+ async def openai_retrieve_file(self, request: RetrieveFileRequest) -> OpenAIFileObject:
183
190
  """Returns information about a specific file."""
184
- file_obj, _ = await self._lookup_file_id(file_id)
191
+ file_obj, _ = await self._lookup_file_id(request.file_id)
185
192
 
186
193
  return file_obj
187
194
 
188
- async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse:
195
+ async def openai_delete_file(self, request: DeleteFileRequest) -> OpenAIFileDeleteResponse:
189
196
  """Delete a file."""
197
+ file_id = request.file_id
190
198
  # Delete physical file
191
- _, file_path = await self._lookup_file_id(file_id)
199
+ _, file_path = await self._lookup_file_id(file_id, action=Action.DELETE)
192
200
  if file_path.exists():
193
201
  file_path.unlink()
194
202
 
@@ -201,14 +209,15 @@ class LocalfsFilesImpl(Files):
201
209
  deleted=True,
202
210
  )
203
211
 
204
- async def openai_retrieve_file_content(self, file_id: str) -> Response:
212
+ async def openai_retrieve_file_content(self, request: RetrieveFileContentRequest) -> Response:
205
213
  """Returns the contents of the specified file."""
214
+ file_id = request.file_id
206
215
  # Read file content
207
216
  file_obj, file_path = await self._lookup_file_id(file_id)
208
217
 
209
218
  if not file_path.exists():
210
219
  logger.warning(f"File '{file_id}'s underlying '{file_path}' is missing, deleting metadata.")
211
- await self.openai_delete_file(file_id)
220
+ await self.openai_delete_file(DeleteFileRequest(file_id=file_id))
212
221
  raise ResourceNotFoundError(file_id, "File", "client.files.list()")
213
222
 
214
223
  # Return as binary response with appropriate content type
@@ -8,15 +8,15 @@ from typing import Any
8
8
 
9
9
  from pydantic import BaseModel, field_validator
10
10
 
11
- from llama_stack.apis.inference import QuantizationConfig
12
11
  from llama_stack.providers.utils.inference import supported_inference_models
12
+ from llama_stack_api import QuantizationConfig
13
13
 
14
14
 
15
15
  class MetaReferenceInferenceConfig(BaseModel):
16
16
  # this is a placeholder to indicate inference model id
17
17
  # the actual inference model id is dtermined by the moddel id in the request
18
18
  # Note: you need to register the model before using it for inference
19
- # models in the resouce list in the run.yaml config will be registered automatically
19
+ # models in the resouce list in the config.yaml config will be registered automatically
20
20
  model: str | None = None
21
21
  torch_seed: int | None = None
22
22
  max_seq_len: int = 4096