llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.5.dist-info/RECORD +0 -625
  445. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -0,0 +1,76 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """FastAPI router for the Inspect API.
8
+
9
+ This module defines the FastAPI router for the Inspect API using standard
10
+ FastAPI route decorators.
11
+ """
12
+
13
+ from typing import Annotated
14
+
15
+ from fastapi import APIRouter, Query
16
+
17
+ from llama_stack_api.router_utils import PUBLIC_ROUTE_KEY, standard_responses
18
+ from llama_stack_api.version import LLAMA_STACK_API_V1
19
+
20
+ from .api import Inspect
21
+ from .models import (
22
+ ApiFilter,
23
+ HealthInfo,
24
+ ListRoutesResponse,
25
+ VersionInfo,
26
+ )
27
+
28
+
29
+ def create_router(impl: Inspect) -> APIRouter:
30
+ """Create a FastAPI router for the Inspect API."""
31
+ router = APIRouter(
32
+ prefix=f"/{LLAMA_STACK_API_V1}",
33
+ tags=["Inspect"],
34
+ responses=standard_responses,
35
+ )
36
+
37
+ @router.get(
38
+ "/inspect/routes",
39
+ response_model=ListRoutesResponse,
40
+ summary="List routes.",
41
+ description="List all available API routes with their methods and implementing providers.",
42
+ responses={200: {"description": "Response containing information about all available routes."}},
43
+ )
44
+ async def list_routes(
45
+ api_filter: Annotated[
46
+ ApiFilter | None,
47
+ Query(
48
+ description="Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns all non-deprecated routes."
49
+ ),
50
+ ] = None,
51
+ ) -> ListRoutesResponse:
52
+ return await impl.list_routes(api_filter)
53
+
54
+ @router.get(
55
+ "/health",
56
+ response_model=HealthInfo,
57
+ summary="Get health status.",
58
+ description="Get the current health status of the service.",
59
+ responses={200: {"description": "Health information indicating if the service is operational."}},
60
+ openapi_extra={PUBLIC_ROUTE_KEY: True},
61
+ )
62
+ async def health() -> HealthInfo:
63
+ return await impl.health()
64
+
65
+ @router.get(
66
+ "/version",
67
+ response_model=VersionInfo,
68
+ summary="Get version.",
69
+ description="Get the version of the service.",
70
+ responses={200: {"description": "Version information containing the service version number."}},
71
+ openapi_extra={PUBLIC_ROUTE_KEY: True},
72
+ )
73
+ async def version() -> VersionInfo:
74
+ return await impl.version()
75
+
76
+ return router
@@ -0,0 +1,28 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """Pydantic models for Inspect API requests and responses.
8
+
9
+ This module re-exports models from llama_stack_api.admin.models to ensure
10
+ a single source of truth and avoid type conflicts.
11
+ """
12
+
13
+ # Import and re-export shared models from admin
14
+ from llama_stack_api.admin.models import (
15
+ ApiFilter,
16
+ HealthInfo,
17
+ ListRoutesResponse,
18
+ RouteInfo,
19
+ VersionInfo,
20
+ )
21
+
22
+ __all__ = [
23
+ "ApiFilter",
24
+ "RouteInfo",
25
+ "HealthInfo",
26
+ "VersionInfo",
27
+ "ListRoutesResponse",
28
+ ]
@@ -4,4 +4,6 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from .agents import *
7
+ # Internal subpackage for shared interfaces that are not part of the public API.
8
+
9
+ __all__: list[str] = []
@@ -9,6 +9,8 @@ from typing import Protocol
9
9
 
10
10
 
11
11
  class KVStore(Protocol):
12
+ """Protocol for simple key/value storage backends."""
13
+
12
14
  # TODO: make the value type bytes instead of str
13
15
  async def set(self, key: str, value: str, expiration: datetime | None = None) -> None: ...
14
16
 
@@ -19,3 +21,6 @@ class KVStore(Protocol):
19
21
  async def values_in_range(self, start_key: str, end_key: str) -> list[str]: ...
20
22
 
21
23
  async def keys_in_range(self, start_key: str, end_key: str) -> list[str]: ...
24
+
25
+
26
+ __all__ = ["KVStore"]
@@ -0,0 +1,79 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from collections.abc import Mapping, Sequence
8
+ from enum import Enum
9
+ from typing import Any, Literal, Protocol
10
+
11
+ from pydantic import BaseModel
12
+
13
+ from llama_stack_api import PaginatedResponse
14
+
15
+
16
+ class ColumnType(Enum):
17
+ INTEGER = "INTEGER"
18
+ STRING = "STRING"
19
+ TEXT = "TEXT"
20
+ FLOAT = "FLOAT"
21
+ BOOLEAN = "BOOLEAN"
22
+ JSON = "JSON"
23
+ DATETIME = "DATETIME"
24
+
25
+
26
+ class ColumnDefinition(BaseModel):
27
+ type: ColumnType
28
+ primary_key: bool = False
29
+ nullable: bool = True
30
+ default: Any = None
31
+
32
+
33
+ class SqlStore(Protocol):
34
+ """Protocol for common SQL-store functionality."""
35
+
36
+ async def create_table(self, table: str, schema: Mapping[str, ColumnType | ColumnDefinition]) -> None: ...
37
+
38
+ async def insert(self, table: str, data: Mapping[str, Any] | Sequence[Mapping[str, Any]]) -> None: ...
39
+
40
+ async def upsert(
41
+ self,
42
+ table: str,
43
+ data: Mapping[str, Any],
44
+ conflict_columns: list[str],
45
+ update_columns: list[str] | None = None,
46
+ ) -> None: ...
47
+
48
+ async def fetch_all(
49
+ self,
50
+ table: str,
51
+ where: Mapping[str, Any] | None = None,
52
+ where_sql: str | None = None,
53
+ limit: int | None = None,
54
+ order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
55
+ cursor: tuple[str, str] | None = None,
56
+ ) -> PaginatedResponse: ...
57
+
58
+ async def fetch_one(
59
+ self,
60
+ table: str,
61
+ where: Mapping[str, Any] | None = None,
62
+ where_sql: str | None = None,
63
+ order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None,
64
+ ) -> dict[str, Any] | None: ...
65
+
66
+ async def update(self, table: str, data: Mapping[str, Any], where: Mapping[str, Any]) -> None: ...
67
+
68
+ async def delete(self, table: str, where: Mapping[str, Any]) -> None: ...
69
+
70
+ async def add_column_if_not_exists(
71
+ self,
72
+ table: str,
73
+ column_name: str,
74
+ column_type: ColumnType,
75
+ nullable: bool = True,
76
+ ) -> None: ...
77
+
78
+
79
+ __all__ = ["ColumnDefinition", "ColumnType", "SqlStore"]
@@ -9,10 +9,9 @@ from typing import Any, Literal, Protocol, runtime_checkable
9
9
 
10
10
  from pydantic import BaseModel, ConfigDict, Field, field_validator
11
11
 
12
- from llama_stack.apis.resource import Resource, ResourceType
13
- from llama_stack.apis.version import LLAMA_STACK_API_V1
14
- from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
15
- from llama_stack.schema_utils import json_schema_type, webmethod
12
+ from llama_stack_api.resource import Resource, ResourceType
13
+ from llama_stack_api.schema_utils import json_schema_type, webmethod
14
+ from llama_stack_api.version import LLAMA_STACK_API_V1
16
15
 
17
16
 
18
17
  class CommonModelFields(BaseModel):
@@ -27,10 +26,12 @@ class ModelType(StrEnum):
27
26
  """Enumeration of supported model types in Llama Stack.
28
27
  :cvar llm: Large language model for text generation and completion
29
28
  :cvar embedding: Embedding model for converting text to vector representations
29
+ :cvar rerank: Reranking model for reordering documents based on their relevance to a query
30
30
  """
31
31
 
32
32
  llm = "llm"
33
33
  embedding = "embedding"
34
+ rerank = "rerank"
34
35
 
35
36
 
36
37
  @json_schema_type
@@ -88,22 +89,23 @@ class OpenAIModel(BaseModel):
88
89
  :object: The object type, which will be "model"
89
90
  :created: The Unix timestamp in seconds when the model was created
90
91
  :owned_by: The owner of the model
92
+ :custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata
91
93
  """
92
94
 
93
95
  id: str
94
96
  object: Literal["model"] = "model"
95
97
  created: int
96
98
  owned_by: str
99
+ custom_metadata: dict[str, Any] | None = None
97
100
 
98
101
 
102
+ @json_schema_type
99
103
  class OpenAIListModelsResponse(BaseModel):
100
104
  data: list[OpenAIModel]
101
105
 
102
106
 
103
107
  @runtime_checkable
104
- @trace_protocol
105
108
  class Models(Protocol):
106
- @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
107
109
  async def list_models(self) -> ListModelsResponse:
108
110
  """List all models.
109
111
 
@@ -111,7 +113,7 @@ class Models(Protocol):
111
113
  """
112
114
  ...
113
115
 
114
- @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
116
+ @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
115
117
  async def openai_list_models(self) -> OpenAIListModelsResponse:
116
118
  """List models using the OpenAI API.
117
119
 
@@ -133,7 +135,7 @@ class Models(Protocol):
133
135
  """
134
136
  ...
135
137
 
136
- @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
138
+ @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
137
139
  async def register_model(
138
140
  self,
139
141
  model_id: str,
@@ -155,7 +157,7 @@ class Models(Protocol):
155
157
  """
156
158
  ...
157
159
 
158
- @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
160
+ @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
159
161
  async def unregister_model(
160
162
  self,
161
163
  model_id: str,
@@ -4,13 +4,16 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
+ from collections.abc import Sequence
8
+ from enum import Enum
7
9
  from typing import Annotated, Any, Literal
8
10
 
9
- from pydantic import BaseModel, Field
11
+ from pydantic import BaseModel, Field, model_validator
10
12
  from typing_extensions import TypedDict
11
13
 
12
- from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
13
- from llama_stack.schema_utils import json_schema_type, register_schema
14
+ from llama_stack_api.inference import OpenAITokenLogProb
15
+ from llama_stack_api.schema_utils import json_schema_type, register_schema
16
+ from llama_stack_api.vector_io import SearchRankingOptions as FileSearchRankingOptions
14
17
 
15
18
  # NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably
16
19
  # take their YAML and generate this file automatically. Their YAML is available.
@@ -46,23 +49,66 @@ class OpenAIResponseInputMessageContentImage(BaseModel):
46
49
 
47
50
  :param detail: Level of detail for image processing, can be "low", "high", or "auto"
48
51
  :param type: Content type identifier, always "input_image"
52
+ :param file_id: (Optional) The ID of the file to be sent to the model.
49
53
  :param image_url: (Optional) URL of the image content
50
54
  """
51
55
 
52
56
  detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
53
57
  type: Literal["input_image"] = "input_image"
54
- # TODO: handle file_id
58
+ file_id: str | None = None
55
59
  image_url: str | None = None
56
60
 
57
61
 
58
- # TODO: handle file content types
62
+ @json_schema_type
63
+ class OpenAIResponseInputMessageContentFile(BaseModel):
64
+ """File content for input messages in OpenAI response format.
65
+
66
+ :param type: The type of the input item. Always `input_file`.
67
+ :param file_data: The data of the file to be sent to the model.
68
+ :param file_id: (Optional) The ID of the file to be sent to the model.
69
+ :param file_url: The URL of the file to be sent to the model.
70
+ :param filename: The name of the file to be sent to the model.
71
+ """
72
+
73
+ type: Literal["input_file"] = "input_file"
74
+ file_data: str | None = None
75
+ file_id: str | None = None
76
+ file_url: str | None = None
77
+ filename: str | None = None
78
+
79
+ @model_validator(mode="after")
80
+ def validate_file_source(self) -> "OpenAIResponseInputMessageContentFile":
81
+ if not any([self.file_data, self.file_id, self.file_url, self.filename]):
82
+ raise ValueError(
83
+ "At least one of 'file_data', 'file_id', 'file_url', or 'filename' must be provided for file content"
84
+ )
85
+ return self
86
+
87
+
59
88
  OpenAIResponseInputMessageContent = Annotated[
60
- OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage,
89
+ OpenAIResponseInputMessageContentText
90
+ | OpenAIResponseInputMessageContentImage
91
+ | OpenAIResponseInputMessageContentFile,
61
92
  Field(discriminator="type"),
62
93
  ]
63
94
  register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
64
95
 
65
96
 
97
+ @json_schema_type
98
+ class OpenAIResponsePrompt(BaseModel):
99
+ """OpenAI compatible Prompt object that is used in OpenAI responses.
100
+
101
+ :param id: Unique identifier of the prompt template
102
+ :param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types
103
+ like images or files.
104
+ :param version: Version number of the prompt to use (defaults to latest if not specified)
105
+ """
106
+
107
+ id: str
108
+ variables: dict[str, OpenAIResponseInputMessageContent] | None = None
109
+ version: str | None = None
110
+
111
+
66
112
  @json_schema_type
67
113
  class OpenAIResponseAnnotationFileCitation(BaseModel):
68
114
  """File citation annotation for referencing specific files in response content.
@@ -129,6 +175,7 @@ class OpenAIResponseOutputMessageContentOutputText(BaseModel):
129
175
  text: str
130
176
  type: Literal["output_text"] = "output_text"
131
177
  annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
178
+ logprobs: list[OpenAITokenLogProb] | None = None
132
179
 
133
180
 
134
181
  @json_schema_type
@@ -159,7 +206,7 @@ class OpenAIResponseMessage(BaseModel):
159
206
  scenarios.
160
207
  """
161
208
 
162
- content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]
209
+ content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent]
163
210
  role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
164
211
  type: Literal["message"] = "message"
165
212
 
@@ -211,10 +258,10 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
211
258
  """
212
259
 
213
260
  id: str
214
- queries: list[str]
261
+ queries: Sequence[str]
215
262
  status: str
216
263
  type: Literal["file_search_call"] = "file_search_call"
217
- results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
264
+ results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
218
265
 
219
266
 
220
267
  @json_schema_type
@@ -359,7 +406,7 @@ class OpenAIResponseText(BaseModel):
359
406
 
360
407
 
361
408
  # Must match type Literals of OpenAIResponseInputToolWebSearch below
362
- WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
409
+ WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11", "web_search_2025_08_26"]
363
410
 
364
411
 
365
412
  @json_schema_type
@@ -371,9 +418,12 @@ class OpenAIResponseInputToolWebSearch(BaseModel):
371
418
  """
372
419
 
373
420
  # Must match values of WebSearchToolTypes above
374
- type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
375
- "web_search"
376
- )
421
+ type: (
422
+ Literal["web_search"]
423
+ | Literal["web_search_preview"]
424
+ | Literal["web_search_preview_2025_03_11"]
425
+ | Literal["web_search_2025_08_26"]
426
+ ) = "web_search"
377
427
  # TODO: actually use search_context_size somewhere...
378
428
  search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
379
429
  # TODO: add user_location
@@ -443,6 +493,7 @@ class OpenAIResponseInputToolMCP(BaseModel):
443
493
  :param server_label: Label to identify this MCP server
444
494
  :param server_url: URL endpoint of the MCP server
445
495
  :param headers: (Optional) HTTP headers to include when connecting to the server
496
+ :param authorization: (Optional) OAuth access token for authenticating with the MCP server
446
497
  :param require_approval: Approval requirement for tool calls ("always", "never", or filter)
447
498
  :param allowed_tools: (Optional) Restriction on which tools can be used from this server
448
499
  """
@@ -451,6 +502,7 @@ class OpenAIResponseInputToolMCP(BaseModel):
451
502
  server_label: str
452
503
  server_url: str
453
504
  headers: dict[str, Any] | None = None
505
+ authorization: str | None = Field(default=None, exclude=True)
454
506
 
455
507
  require_approval: Literal["always"] | Literal["never"] | ApprovalFilter = "never"
456
508
  allowed_tools: list[str] | AllowedToolsFilter | None = None
@@ -490,6 +542,105 @@ OpenAIResponseTool = Annotated[
490
542
  register_schema(OpenAIResponseTool, name="OpenAIResponseTool")
491
543
 
492
544
 
545
+ @json_schema_type
546
+ class OpenAIResponseInputToolChoiceAllowedTools(BaseModel):
547
+ """Constrains the tools available to the model to a pre-defined set.
548
+
549
+ :param mode: Constrains the tools available to the model to a pre-defined set
550
+ :param tools: A list of tool definitions that the model should be allowed to call
551
+ :param type: Tool choice type identifier, always "allowed_tools"
552
+ """
553
+
554
+ mode: Literal["auto", "required"] = "auto"
555
+ tools: list[dict[str, str]]
556
+ type: Literal["allowed_tools"] = "allowed_tools"
557
+
558
+
559
+ @json_schema_type
560
+ class OpenAIResponseInputToolChoiceFileSearch(BaseModel):
561
+ """Indicates that the model should use file search to generate a response.
562
+
563
+ :param type: Tool choice type identifier, always "file_search"
564
+ """
565
+
566
+ type: Literal["file_search"] = "file_search"
567
+
568
+
569
+ @json_schema_type
570
+ class OpenAIResponseInputToolChoiceWebSearch(BaseModel):
571
+ """Indicates that the model should use web search to generate a response
572
+
573
+ :param type: Web search tool type variant to use
574
+ """
575
+
576
+ type: (
577
+ Literal["web_search"]
578
+ | Literal["web_search_preview"]
579
+ | Literal["web_search_preview_2025_03_11"]
580
+ | Literal["web_search_2025_08_26"]
581
+ ) = "web_search"
582
+
583
+
584
+ @json_schema_type
585
+ class OpenAIResponseInputToolChoiceFunctionTool(BaseModel):
586
+ """Forces the model to call a specific function.
587
+
588
+ :param name: The name of the function to call
589
+ :param type: Tool choice type identifier, always "function"
590
+ """
591
+
592
+ name: str
593
+ type: Literal["function"] = "function"
594
+
595
+
596
+ @json_schema_type
597
+ class OpenAIResponseInputToolChoiceMCPTool(BaseModel):
598
+ """Forces the model to call a specific tool on a remote MCP server
599
+
600
+ :param server_label: The label of the MCP server to use.
601
+ :param type: Tool choice type identifier, always "mcp"
602
+ :param name: (Optional) The name of the tool to call on the server.
603
+ """
604
+
605
+ server_label: str
606
+ type: Literal["mcp"] = "mcp"
607
+ name: str | None = None
608
+
609
+
610
+ @json_schema_type
611
+ class OpenAIResponseInputToolChoiceCustomTool(BaseModel):
612
+ """Forces the model to call a custom tool.
613
+
614
+ :param type: Tool choice type identifier, always "custom"
615
+ :param name: The name of the custom tool to call.
616
+ """
617
+
618
+ type: Literal["custom"] = "custom"
619
+ name: str
620
+
621
+
622
+ class OpenAIResponseInputToolChoiceMode(str, Enum):
623
+ auto = "auto"
624
+ required = "required"
625
+ none = "none"
626
+
627
+
628
+ OpenAIResponseInputToolChoiceObject = Annotated[
629
+ OpenAIResponseInputToolChoiceAllowedTools
630
+ | OpenAIResponseInputToolChoiceFileSearch
631
+ | OpenAIResponseInputToolChoiceWebSearch
632
+ | OpenAIResponseInputToolChoiceFunctionTool
633
+ | OpenAIResponseInputToolChoiceMCPTool
634
+ | OpenAIResponseInputToolChoiceCustomTool,
635
+ Field(discriminator="type"),
636
+ ]
637
+
638
+ # 3. Final Union without registration or None (Keep it clean)
639
+ OpenAIResponseInputToolChoice = OpenAIResponseInputToolChoiceMode | OpenAIResponseInputToolChoiceObject
640
+
641
+ register_schema(OpenAIResponseInputToolChoice, name="OpenAIResponseInputToolChoice")
642
+
643
+
493
644
  class OpenAIResponseUsageOutputTokensDetails(BaseModel):
494
645
  """Token details for output tokens in OpenAI response usage.
495
646
 
@@ -536,16 +687,20 @@ class OpenAIResponseObject(BaseModel):
536
687
  :param model: Model identifier used for generation
537
688
  :param object: Object type identifier, always "response"
538
689
  :param output: List of generated output items (messages, tool calls, etc.)
539
- :param parallel_tool_calls: Whether tool calls can be executed in parallel
690
+ :param parallel_tool_calls: (Optional) Whether to allow more than one function tool call generated per turn.
540
691
  :param previous_response_id: (Optional) ID of the previous response in a conversation
692
+ :param prompt: (Optional) Reference to a prompt template and its variables.
541
693
  :param status: Current status of the response generation
542
694
  :param temperature: (Optional) Sampling temperature used for generation
543
695
  :param text: Text formatting configuration for the response
544
696
  :param top_p: (Optional) Nucleus sampling parameter used for generation
545
697
  :param tools: (Optional) An array of tools the model may call while generating a response.
698
+ :param tool_choice: (Optional) Tool choice configuration for the response.
546
699
  :param truncation: (Optional) Truncation strategy applied to the response
547
700
  :param usage: (Optional) Token usage information for the response
548
701
  :param instructions: (Optional) System message inserted into the model's context
702
+ :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response
703
+ :param metadata: (Optional) Dictionary of metadata key-value pairs
549
704
  """
550
705
 
551
706
  created_at: int
@@ -553,19 +708,23 @@ class OpenAIResponseObject(BaseModel):
553
708
  id: str
554
709
  model: str
555
710
  object: Literal["response"] = "response"
556
- output: list[OpenAIResponseOutput]
557
- parallel_tool_calls: bool = False
711
+ output: Sequence[OpenAIResponseOutput]
712
+ parallel_tool_calls: bool | None = True
558
713
  previous_response_id: str | None = None
714
+ prompt: OpenAIResponsePrompt | None = None
559
715
  status: str
560
716
  temperature: float | None = None
561
717
  # Default to text format to avoid breaking the loading of old responses
562
718
  # before the field was added. New responses will have this set always.
563
719
  text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
564
720
  top_p: float | None = None
565
- tools: list[OpenAIResponseTool] | None = None
721
+ tools: Sequence[OpenAIResponseTool] | None = None
722
+ tool_choice: OpenAIResponseInputToolChoice | None = None
566
723
  truncation: str | None = None
567
724
  usage: OpenAIResponseUsage | None = None
568
725
  instructions: str | None = None
726
+ max_tool_calls: int | None = None
727
+ metadata: dict[str, str] | None = None
569
728
 
570
729
 
571
730
  @json_schema_type
@@ -691,6 +850,7 @@ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
691
850
  :param content_index: Index position within the text content
692
851
  :param delta: Incremental text content being added
693
852
  :param item_id: Unique identifier of the output item being updated
853
+ :param logprobs: (Optional) Token log probability details
694
854
  :param output_index: Index position of the item in the output list
695
855
  :param sequence_number: Sequential number for ordering streaming events
696
856
  :param type: Event type identifier, always "response.output_text.delta"
@@ -699,6 +859,7 @@ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
699
859
  content_index: int
700
860
  delta: str
701
861
  item_id: str
862
+ logprobs: list[OpenAITokenLogProb] | None = None
702
863
  output_index: int
703
864
  sequence_number: int
704
865
  type: Literal["response.output_text.delta"] = "response.output_text.delta"
@@ -889,7 +1050,7 @@ class OpenAIResponseContentPartOutputText(BaseModel):
889
1050
  type: Literal["output_text"] = "output_text"
890
1051
  text: str
891
1052
  annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
892
- logprobs: list[dict[str, Any]] | None = None
1053
+ logprobs: list[OpenAITokenLogProb] | None = None
893
1054
 
894
1055
 
895
1056
  @json_schema_type
@@ -1254,20 +1415,16 @@ class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
1254
1415
 
1255
1416
  OpenAIResponseInput = Annotated[
1256
1417
  # Responses API allows output messages to be passed in as input
1257
- OpenAIResponseOutputMessageWebSearchToolCall
1258
- | OpenAIResponseOutputMessageFileSearchToolCall
1259
- | OpenAIResponseOutputMessageFunctionToolCall
1418
+ OpenAIResponseOutput
1260
1419
  | OpenAIResponseInputFunctionToolCallOutput
1261
- | OpenAIResponseMCPApprovalRequest
1262
1420
  | OpenAIResponseMCPApprovalResponse
1263
- | OpenAIResponseOutputMessageMCPCall
1264
- | OpenAIResponseOutputMessageMCPListTools
1265
1421
  | OpenAIResponseMessage,
1266
1422
  Field(union_mode="left_to_right"),
1267
1423
  ]
1268
1424
  register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
1269
1425
 
1270
1426
 
1427
+ @json_schema_type
1271
1428
  class ListOpenAIResponseInputItem(BaseModel):
1272
1429
  """List container for OpenAI response input items.
1273
1430
 
@@ -1275,7 +1432,7 @@ class ListOpenAIResponseInputItem(BaseModel):
1275
1432
  :param object: Object type identifier, always "list"
1276
1433
  """
1277
1434
 
1278
- data: list[OpenAIResponseInput]
1435
+ data: Sequence[OpenAIResponseInput]
1279
1436
  object: Literal["list"] = "list"
1280
1437
 
1281
1438
 
@@ -1286,7 +1443,7 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
1286
1443
  :param input: List of input items that led to this response
1287
1444
  """
1288
1445
 
1289
- input: list[OpenAIResponseInput]
1446
+ input: Sequence[OpenAIResponseInput]
1290
1447
 
1291
1448
  def to_response_object(self) -> OpenAIResponseObject:
1292
1449
  """Convert to OpenAIResponseObject by excluding input field."""
@@ -1304,7 +1461,7 @@ class ListOpenAIResponseObject(BaseModel):
1304
1461
  :param object: Object type identifier, always "list"
1305
1462
  """
1306
1463
 
1307
- data: list[OpenAIResponseObjectWithInput]
1464
+ data: Sequence[OpenAIResponseObjectWithInput]
1308
1465
  has_more: bool
1309
1466
  first_id: str
1310
1467
  last_id: str