llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. llama_stack/__init__.py +5 -0
  2. llama_stack/apis/agents/__init__.py +1 -1
  3. llama_stack/apis/agents/agents.py +700 -281
  4. llama_stack/apis/agents/openai_responses.py +1311 -0
  5. llama_stack/{providers/adapters/memory/sample/config.py → apis/batches/__init__.py} +2 -5
  6. llama_stack/apis/batches/batches.py +100 -0
  7. llama_stack/apis/benchmarks/__init__.py +7 -0
  8. llama_stack/apis/benchmarks/benchmarks.py +108 -0
  9. llama_stack/apis/common/content_types.py +143 -0
  10. llama_stack/apis/common/errors.py +103 -0
  11. llama_stack/apis/common/job_types.py +38 -0
  12. llama_stack/apis/common/responses.py +36 -0
  13. llama_stack/apis/common/training_types.py +36 -5
  14. llama_stack/apis/common/type_system.py +158 -0
  15. llama_stack/apis/conversations/__init__.py +31 -0
  16. llama_stack/apis/conversations/conversations.py +286 -0
  17. llama_stack/apis/datasetio/__init__.py +7 -0
  18. llama_stack/apis/datasetio/datasetio.py +59 -0
  19. llama_stack/apis/datasets/__init__.py +7 -0
  20. llama_stack/apis/datasets/datasets.py +251 -0
  21. llama_stack/apis/datatypes.py +160 -0
  22. llama_stack/apis/eval/__init__.py +7 -0
  23. llama_stack/apis/eval/eval.py +169 -0
  24. llama_stack/apis/files/__init__.py +7 -0
  25. llama_stack/apis/files/files.py +199 -0
  26. llama_stack/apis/inference/__init__.py +1 -1
  27. llama_stack/apis/inference/inference.py +1169 -113
  28. llama_stack/apis/inspect/__init__.py +1 -1
  29. llama_stack/apis/inspect/inspect.py +69 -16
  30. llama_stack/apis/models/__init__.py +1 -1
  31. llama_stack/apis/models/models.py +148 -21
  32. llama_stack/apis/post_training/__init__.py +1 -1
  33. llama_stack/apis/post_training/post_training.py +265 -120
  34. llama_stack/{providers/adapters/agents/sample/config.py → apis/prompts/__init__.py} +2 -5
  35. llama_stack/apis/prompts/prompts.py +204 -0
  36. llama_stack/apis/providers/__init__.py +7 -0
  37. llama_stack/apis/providers/providers.py +69 -0
  38. llama_stack/apis/resource.py +37 -0
  39. llama_stack/apis/safety/__init__.py +1 -1
  40. llama_stack/apis/safety/safety.py +95 -12
  41. llama_stack/apis/scoring/__init__.py +7 -0
  42. llama_stack/apis/scoring/scoring.py +93 -0
  43. llama_stack/apis/scoring_functions/__init__.py +7 -0
  44. llama_stack/apis/scoring_functions/scoring_functions.py +208 -0
  45. llama_stack/apis/shields/__init__.py +1 -1
  46. llama_stack/apis/shields/shields.py +76 -33
  47. llama_stack/apis/synthetic_data_generation/__init__.py +1 -1
  48. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +40 -17
  49. llama_stack/apis/telemetry/__init__.py +1 -1
  50. llama_stack/apis/telemetry/telemetry.py +322 -31
  51. llama_stack/apis/{dataset → tools}/__init__.py +2 -1
  52. llama_stack/apis/tools/rag_tool.py +218 -0
  53. llama_stack/apis/tools/tools.py +221 -0
  54. llama_stack/apis/vector_io/__init__.py +7 -0
  55. llama_stack/apis/vector_io/vector_io.py +960 -0
  56. llama_stack/apis/vector_stores/__init__.py +7 -0
  57. llama_stack/apis/vector_stores/vector_stores.py +51 -0
  58. llama_stack/apis/version.py +9 -0
  59. llama_stack/cli/llama.py +13 -5
  60. llama_stack/cli/stack/_list_deps.py +182 -0
  61. llama_stack/cli/stack/list_apis.py +1 -1
  62. llama_stack/cli/stack/list_deps.py +55 -0
  63. llama_stack/cli/stack/list_providers.py +24 -10
  64. llama_stack/cli/stack/list_stacks.py +56 -0
  65. llama_stack/cli/stack/remove.py +115 -0
  66. llama_stack/cli/stack/run.py +169 -56
  67. llama_stack/cli/stack/stack.py +18 -4
  68. llama_stack/cli/stack/utils.py +151 -0
  69. llama_stack/cli/table.py +23 -61
  70. llama_stack/cli/utils.py +29 -0
  71. llama_stack/core/access_control/access_control.py +131 -0
  72. llama_stack/core/access_control/conditions.py +129 -0
  73. llama_stack/core/access_control/datatypes.py +107 -0
  74. llama_stack/core/build.py +164 -0
  75. llama_stack/core/client.py +205 -0
  76. llama_stack/core/common.sh +37 -0
  77. llama_stack/{distribution → core}/configure.py +74 -55
  78. llama_stack/core/conversations/conversations.py +309 -0
  79. llama_stack/core/datatypes.py +625 -0
  80. llama_stack/core/distribution.py +276 -0
  81. llama_stack/core/external.py +54 -0
  82. llama_stack/core/id_generation.py +42 -0
  83. llama_stack/core/inspect.py +86 -0
  84. llama_stack/core/library_client.py +539 -0
  85. llama_stack/core/prompts/prompts.py +234 -0
  86. llama_stack/core/providers.py +137 -0
  87. llama_stack/core/request_headers.py +115 -0
  88. llama_stack/core/resolver.py +506 -0
  89. llama_stack/core/routers/__init__.py +101 -0
  90. llama_stack/core/routers/datasets.py +73 -0
  91. llama_stack/core/routers/eval_scoring.py +155 -0
  92. llama_stack/core/routers/inference.py +645 -0
  93. llama_stack/core/routers/safety.py +85 -0
  94. llama_stack/core/routers/tool_runtime.py +91 -0
  95. llama_stack/core/routers/vector_io.py +442 -0
  96. llama_stack/core/routing_tables/benchmarks.py +62 -0
  97. llama_stack/core/routing_tables/common.py +254 -0
  98. llama_stack/core/routing_tables/datasets.py +91 -0
  99. llama_stack/core/routing_tables/models.py +163 -0
  100. llama_stack/core/routing_tables/scoring_functions.py +66 -0
  101. llama_stack/core/routing_tables/shields.py +61 -0
  102. llama_stack/core/routing_tables/toolgroups.py +129 -0
  103. llama_stack/core/routing_tables/vector_stores.py +292 -0
  104. llama_stack/core/server/auth.py +187 -0
  105. llama_stack/core/server/auth_providers.py +494 -0
  106. llama_stack/core/server/quota.py +110 -0
  107. llama_stack/core/server/routes.py +141 -0
  108. llama_stack/core/server/server.py +542 -0
  109. llama_stack/core/server/tracing.py +80 -0
  110. llama_stack/core/stack.py +546 -0
  111. llama_stack/core/start_stack.sh +117 -0
  112. llama_stack/core/storage/datatypes.py +283 -0
  113. llama_stack/{cli/model → core/store}/__init__.py +1 -1
  114. llama_stack/core/store/registry.py +199 -0
  115. llama_stack/core/testing_context.py +49 -0
  116. llama_stack/core/ui/app.py +55 -0
  117. llama_stack/core/ui/modules/api.py +32 -0
  118. llama_stack/core/ui/modules/utils.py +42 -0
  119. llama_stack/core/ui/page/distribution/datasets.py +18 -0
  120. llama_stack/core/ui/page/distribution/eval_tasks.py +20 -0
  121. llama_stack/core/ui/page/distribution/models.py +18 -0
  122. llama_stack/core/ui/page/distribution/providers.py +27 -0
  123. llama_stack/core/ui/page/distribution/resources.py +48 -0
  124. llama_stack/core/ui/page/distribution/scoring_functions.py +18 -0
  125. llama_stack/core/ui/page/distribution/shields.py +19 -0
  126. llama_stack/core/ui/page/evaluations/app_eval.py +143 -0
  127. llama_stack/core/ui/page/evaluations/native_eval.py +253 -0
  128. llama_stack/core/ui/page/playground/chat.py +130 -0
  129. llama_stack/core/ui/page/playground/tools.py +352 -0
  130. llama_stack/core/utils/config.py +30 -0
  131. llama_stack/{distribution → core}/utils/config_dirs.py +3 -6
  132. llama_stack/core/utils/config_resolution.py +125 -0
  133. llama_stack/core/utils/context.py +84 -0
  134. llama_stack/core/utils/exec.py +96 -0
  135. llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} +4 -3
  136. llama_stack/{distribution → core}/utils/model_utils.py +2 -2
  137. llama_stack/{distribution → core}/utils/prompt_for_config.py +30 -63
  138. llama_stack/{apis/batch_inference → distributions/dell}/__init__.py +1 -1
  139. llama_stack/distributions/dell/build.yaml +33 -0
  140. llama_stack/distributions/dell/dell.py +158 -0
  141. llama_stack/distributions/dell/run-with-safety.yaml +141 -0
  142. llama_stack/distributions/dell/run.yaml +132 -0
  143. llama_stack/distributions/meta-reference-gpu/__init__.py +7 -0
  144. llama_stack/distributions/meta-reference-gpu/build.yaml +32 -0
  145. llama_stack/distributions/meta-reference-gpu/meta_reference.py +163 -0
  146. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +154 -0
  147. llama_stack/distributions/meta-reference-gpu/run.yaml +139 -0
  148. llama_stack/{apis/evals → distributions/nvidia}/__init__.py +1 -1
  149. llama_stack/distributions/nvidia/build.yaml +29 -0
  150. llama_stack/distributions/nvidia/nvidia.py +154 -0
  151. llama_stack/distributions/nvidia/run-with-safety.yaml +137 -0
  152. llama_stack/distributions/nvidia/run.yaml +116 -0
  153. llama_stack/distributions/open-benchmark/__init__.py +7 -0
  154. llama_stack/distributions/open-benchmark/build.yaml +36 -0
  155. llama_stack/distributions/open-benchmark/open_benchmark.py +303 -0
  156. llama_stack/distributions/open-benchmark/run.yaml +252 -0
  157. llama_stack/distributions/postgres-demo/__init__.py +7 -0
  158. llama_stack/distributions/postgres-demo/build.yaml +23 -0
  159. llama_stack/distributions/postgres-demo/postgres_demo.py +125 -0
  160. llama_stack/distributions/postgres-demo/run.yaml +115 -0
  161. llama_stack/{apis/memory → distributions/starter}/__init__.py +1 -1
  162. llama_stack/distributions/starter/build.yaml +61 -0
  163. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  164. llama_stack/distributions/starter/run.yaml +276 -0
  165. llama_stack/distributions/starter/starter.py +345 -0
  166. llama_stack/distributions/starter-gpu/__init__.py +7 -0
  167. llama_stack/distributions/starter-gpu/build.yaml +61 -0
  168. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  169. llama_stack/distributions/starter-gpu/run.yaml +279 -0
  170. llama_stack/distributions/starter-gpu/starter_gpu.py +20 -0
  171. llama_stack/distributions/template.py +456 -0
  172. llama_stack/distributions/watsonx/__init__.py +7 -0
  173. llama_stack/distributions/watsonx/build.yaml +33 -0
  174. llama_stack/distributions/watsonx/run.yaml +133 -0
  175. llama_stack/distributions/watsonx/watsonx.py +95 -0
  176. llama_stack/env.py +24 -0
  177. llama_stack/log.py +314 -0
  178. llama_stack/models/llama/checkpoint.py +164 -0
  179. llama_stack/models/llama/datatypes.py +164 -0
  180. llama_stack/models/llama/hadamard_utils.py +86 -0
  181. llama_stack/models/llama/llama3/args.py +74 -0
  182. llama_stack/models/llama/llama3/chat_format.py +286 -0
  183. llama_stack/models/llama/llama3/generation.py +376 -0
  184. llama_stack/models/llama/llama3/interface.py +255 -0
  185. llama_stack/models/llama/llama3/model.py +304 -0
  186. llama_stack/models/llama/llama3/multimodal/__init__.py +12 -0
  187. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +180 -0
  188. llama_stack/models/llama/llama3/multimodal/image_transform.py +409 -0
  189. llama_stack/models/llama/llama3/multimodal/model.py +1430 -0
  190. llama_stack/models/llama/llama3/multimodal/utils.py +26 -0
  191. llama_stack/models/llama/llama3/prompt_templates/__init__.py +22 -0
  192. llama_stack/models/llama/llama3/prompt_templates/base.py +39 -0
  193. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +319 -0
  194. llama_stack/models/llama/llama3/prompt_templates/tool_response.py +62 -0
  195. llama_stack/models/llama/llama3/quantization/loader.py +316 -0
  196. llama_stack/models/llama/llama3/template_data.py +116 -0
  197. llama_stack/models/llama/llama3/tokenizer.model +128000 -0
  198. llama_stack/models/llama/llama3/tokenizer.py +198 -0
  199. llama_stack/models/llama/llama3/tool_utils.py +266 -0
  200. llama_stack/models/llama/llama3_1/__init__.py +12 -0
  201. llama_stack/models/llama/llama3_1/prompt_format.md +358 -0
  202. llama_stack/models/llama/llama3_1/prompts.py +258 -0
  203. llama_stack/models/llama/llama3_2/prompts_text.py +229 -0
  204. llama_stack/models/llama/llama3_2/prompts_vision.py +126 -0
  205. llama_stack/models/llama/llama3_2/text_prompt_format.md +286 -0
  206. llama_stack/models/llama/llama3_2/vision_prompt_format.md +141 -0
  207. llama_stack/models/llama/llama3_3/prompts.py +259 -0
  208. llama_stack/models/llama/llama4/args.py +107 -0
  209. llama_stack/models/llama/llama4/chat_format.py +317 -0
  210. llama_stack/models/llama/llama4/datatypes.py +56 -0
  211. llama_stack/models/llama/llama4/ffn.py +58 -0
  212. llama_stack/models/llama/llama4/generation.py +313 -0
  213. llama_stack/models/llama/llama4/model.py +437 -0
  214. llama_stack/models/llama/llama4/moe.py +214 -0
  215. llama_stack/models/llama/llama4/preprocess.py +435 -0
  216. llama_stack/models/llama/llama4/prompt_format.md +304 -0
  217. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +136 -0
  218. llama_stack/models/llama/llama4/prompts.py +279 -0
  219. llama_stack/models/llama/llama4/quantization/__init__.py +5 -0
  220. llama_stack/models/llama/llama4/quantization/loader.py +226 -0
  221. llama_stack/models/llama/llama4/tokenizer.model +200000 -0
  222. llama_stack/models/llama/llama4/tokenizer.py +263 -0
  223. llama_stack/models/llama/llama4/vision/__init__.py +5 -0
  224. llama_stack/models/llama/llama4/vision/embedding.py +210 -0
  225. llama_stack/models/llama/llama4/vision/encoder.py +412 -0
  226. llama_stack/models/llama/prompt_format.py +191 -0
  227. llama_stack/models/llama/quantize_impls.py +316 -0
  228. llama_stack/models/llama/sku_list.py +1029 -0
  229. llama_stack/models/llama/sku_types.py +233 -0
  230. llama_stack/models/llama/tokenizer_utils.py +40 -0
  231. llama_stack/providers/datatypes.py +136 -107
  232. llama_stack/providers/inline/__init__.py +5 -0
  233. llama_stack/providers/inline/agents/__init__.py +5 -0
  234. llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py +12 -5
  235. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +1024 -0
  236. llama_stack/providers/inline/agents/meta_reference/agents.py +383 -0
  237. llama_stack/providers/inline/agents/meta_reference/config.py +37 -0
  238. llama_stack/providers/inline/agents/meta_reference/persistence.py +228 -0
  239. llama_stack/providers/inline/agents/meta_reference/responses/__init__.py +5 -0
  240. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +423 -0
  241. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +1226 -0
  242. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +449 -0
  243. llama_stack/providers/inline/agents/meta_reference/responses/types.py +194 -0
  244. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +365 -0
  245. llama_stack/providers/inline/agents/meta_reference/safety.py +52 -0
  246. llama_stack/providers/inline/batches/__init__.py +5 -0
  247. llama_stack/providers/inline/batches/reference/__init__.py +36 -0
  248. llama_stack/providers/inline/batches/reference/batches.py +679 -0
  249. llama_stack/providers/inline/batches/reference/config.py +40 -0
  250. llama_stack/providers/inline/datasetio/__init__.py +5 -0
  251. llama_stack/providers/inline/datasetio/localfs/__init__.py +20 -0
  252. llama_stack/providers/inline/datasetio/localfs/config.py +23 -0
  253. llama_stack/providers/inline/datasetio/localfs/datasetio.py +113 -0
  254. llama_stack/providers/inline/eval/__init__.py +5 -0
  255. llama_stack/providers/inline/eval/meta_reference/__init__.py +28 -0
  256. llama_stack/providers/inline/eval/meta_reference/config.py +23 -0
  257. llama_stack/providers/inline/eval/meta_reference/eval.py +259 -0
  258. llama_stack/providers/inline/files/localfs/__init__.py +20 -0
  259. llama_stack/providers/inline/files/localfs/config.py +31 -0
  260. llama_stack/providers/inline/files/localfs/files.py +219 -0
  261. llama_stack/providers/inline/inference/__init__.py +5 -0
  262. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/__init__.py +4 -4
  263. llama_stack/providers/inline/inference/meta_reference/common.py +24 -0
  264. llama_stack/providers/inline/inference/meta_reference/config.py +68 -0
  265. llama_stack/providers/inline/inference/meta_reference/generators.py +211 -0
  266. llama_stack/providers/inline/inference/meta_reference/inference.py +158 -0
  267. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +96 -0
  268. llama_stack/providers/{impls/meta_reference/inference → inline/inference/meta_reference}/parallel_utils.py +56 -73
  269. llama_stack/providers/inline/inference/sentence_transformers/__init__.py +22 -0
  270. llama_stack/providers/{impls/meta_reference/agents → inline/inference/sentence_transformers}/config.py +6 -4
  271. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +83 -0
  272. llama_stack/providers/inline/post_training/__init__.py +5 -0
  273. llama_stack/providers/inline/post_training/common/__init__.py +5 -0
  274. llama_stack/providers/inline/post_training/common/utils.py +35 -0
  275. llama_stack/providers/inline/post_training/common/validator.py +36 -0
  276. llama_stack/providers/inline/post_training/huggingface/__init__.py +27 -0
  277. llama_stack/providers/inline/post_training/huggingface/config.py +83 -0
  278. llama_stack/providers/inline/post_training/huggingface/post_training.py +208 -0
  279. llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py +5 -0
  280. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +519 -0
  281. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +485 -0
  282. llama_stack/providers/inline/post_training/huggingface/utils.py +269 -0
  283. llama_stack/providers/inline/post_training/torchtune/__init__.py +27 -0
  284. llama_stack/providers/inline/post_training/torchtune/common/__init__.py +5 -0
  285. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +240 -0
  286. llama_stack/providers/inline/post_training/torchtune/common/utils.py +99 -0
  287. llama_stack/providers/inline/post_training/torchtune/config.py +20 -0
  288. llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py +5 -0
  289. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +57 -0
  290. llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +78 -0
  291. llama_stack/providers/inline/post_training/torchtune/post_training.py +178 -0
  292. llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py +5 -0
  293. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +588 -0
  294. llama_stack/providers/inline/safety/__init__.py +5 -0
  295. llama_stack/providers/{impls/meta_reference/codeshield → inline/safety/code_scanner}/__init__.py +4 -2
  296. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +128 -0
  297. llama_stack/providers/{impls/meta_reference/memory → inline/safety/code_scanner}/config.py +5 -3
  298. llama_stack/providers/inline/safety/llama_guard/__init__.py +19 -0
  299. llama_stack/providers/inline/safety/llama_guard/config.py +19 -0
  300. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +489 -0
  301. llama_stack/providers/{adapters/memory/sample → inline/safety/prompt_guard}/__init__.py +4 -4
  302. llama_stack/providers/inline/safety/prompt_guard/config.py +32 -0
  303. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +131 -0
  304. llama_stack/providers/inline/scoring/__init__.py +5 -0
  305. llama_stack/providers/inline/scoring/basic/__init__.py +25 -0
  306. llama_stack/providers/{adapters/memory/weaviate → inline/scoring/basic}/config.py +5 -7
  307. llama_stack/providers/inline/scoring/basic/scoring.py +126 -0
  308. llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py +5 -0
  309. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +240 -0
  310. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +41 -0
  311. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py +5 -0
  312. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +21 -0
  313. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +21 -0
  314. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +23 -0
  315. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +27 -0
  316. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +71 -0
  317. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +21 -0
  318. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +80 -0
  319. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +66 -0
  320. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +58 -0
  321. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +38 -0
  322. llama_stack/providers/inline/scoring/basic/utils/__init__.py +5 -0
  323. llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py +3319 -0
  324. llama_stack/providers/inline/scoring/basic/utils/math_utils.py +330 -0
  325. llama_stack/providers/inline/scoring/braintrust/__init__.py +27 -0
  326. llama_stack/providers/inline/scoring/braintrust/braintrust.py +230 -0
  327. llama_stack/providers/inline/scoring/braintrust/config.py +21 -0
  328. llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py +5 -0
  329. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py +5 -0
  330. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +24 -0
  331. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +24 -0
  332. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +24 -0
  333. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +24 -0
  334. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +24 -0
  335. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +24 -0
  336. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +23 -0
  337. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +24 -0
  338. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +24 -0
  339. llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +21 -0
  340. llama_stack/providers/inline/scoring/llm_as_judge/config.py +14 -0
  341. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +113 -0
  342. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py +5 -0
  343. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py +5 -0
  344. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +96 -0
  345. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +20 -0
  346. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +81 -0
  347. llama_stack/providers/inline/telemetry/__init__.py +5 -0
  348. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +21 -0
  349. llama_stack/providers/inline/telemetry/meta_reference/config.py +47 -0
  350. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +252 -0
  351. llama_stack/providers/inline/tool_runtime/__init__.py +5 -0
  352. llama_stack/providers/inline/tool_runtime/rag/__init__.py +19 -0
  353. llama_stack/providers/{impls/meta_reference/telemetry → inline/tool_runtime/rag}/config.py +5 -3
  354. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +77 -0
  355. llama_stack/providers/inline/tool_runtime/rag/memory.py +332 -0
  356. llama_stack/providers/inline/vector_io/__init__.py +5 -0
  357. llama_stack/providers/inline/vector_io/chroma/__init__.py +19 -0
  358. llama_stack/providers/inline/vector_io/chroma/config.py +30 -0
  359. llama_stack/providers/inline/vector_io/faiss/__init__.py +21 -0
  360. llama_stack/providers/inline/vector_io/faiss/config.py +26 -0
  361. llama_stack/providers/inline/vector_io/faiss/faiss.py +293 -0
  362. llama_stack/providers/inline/vector_io/milvus/__init__.py +19 -0
  363. llama_stack/providers/inline/vector_io/milvus/config.py +29 -0
  364. llama_stack/providers/inline/vector_io/qdrant/__init__.py +20 -0
  365. llama_stack/providers/inline/vector_io/qdrant/config.py +29 -0
  366. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +20 -0
  367. llama_stack/providers/inline/vector_io/sqlite_vec/config.py +26 -0
  368. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +483 -0
  369. llama_stack/providers/registry/agents.py +16 -18
  370. llama_stack/providers/registry/batches.py +26 -0
  371. llama_stack/providers/registry/datasetio.py +49 -0
  372. llama_stack/providers/registry/eval.py +46 -0
  373. llama_stack/providers/registry/files.py +31 -0
  374. llama_stack/providers/registry/inference.py +273 -118
  375. llama_stack/providers/registry/post_training.py +69 -0
  376. llama_stack/providers/registry/safety.py +46 -41
  377. llama_stack/providers/registry/scoring.py +51 -0
  378. llama_stack/providers/registry/tool_runtime.py +87 -0
  379. llama_stack/providers/registry/vector_io.py +828 -0
  380. llama_stack/providers/remote/__init__.py +5 -0
  381. llama_stack/providers/remote/agents/__init__.py +5 -0
  382. llama_stack/providers/remote/datasetio/__init__.py +5 -0
  383. llama_stack/providers/{adapters/memory/chroma → remote/datasetio/huggingface}/__init__.py +7 -4
  384. llama_stack/providers/remote/datasetio/huggingface/config.py +23 -0
  385. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +99 -0
  386. llama_stack/providers/remote/datasetio/nvidia/__init__.py +23 -0
  387. llama_stack/providers/remote/datasetio/nvidia/config.py +61 -0
  388. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +116 -0
  389. llama_stack/providers/remote/eval/__init__.py +5 -0
  390. llama_stack/providers/remote/eval/nvidia/__init__.py +31 -0
  391. llama_stack/providers/remote/eval/nvidia/config.py +29 -0
  392. llama_stack/providers/remote/eval/nvidia/eval.py +162 -0
  393. llama_stack/providers/remote/files/s3/__init__.py +19 -0
  394. llama_stack/providers/remote/files/s3/config.py +42 -0
  395. llama_stack/providers/remote/files/s3/files.py +313 -0
  396. llama_stack/providers/remote/inference/__init__.py +5 -0
  397. llama_stack/providers/{adapters/safety/sample → remote/inference/anthropic}/__init__.py +4 -6
  398. llama_stack/providers/remote/inference/anthropic/anthropic.py +36 -0
  399. llama_stack/providers/remote/inference/anthropic/config.py +28 -0
  400. llama_stack/providers/{impls/meta_reference/telemetry → remote/inference/azure}/__init__.py +4 -4
  401. llama_stack/providers/remote/inference/azure/azure.py +25 -0
  402. llama_stack/providers/remote/inference/azure/config.py +61 -0
  403. llama_stack/providers/{adapters → remote}/inference/bedrock/__init__.py +18 -17
  404. llama_stack/providers/remote/inference/bedrock/bedrock.py +142 -0
  405. llama_stack/providers/{adapters/inference/sample → remote/inference/bedrock}/config.py +3 -4
  406. llama_stack/providers/remote/inference/bedrock/models.py +29 -0
  407. llama_stack/providers/remote/inference/cerebras/__init__.py +19 -0
  408. llama_stack/providers/remote/inference/cerebras/cerebras.py +28 -0
  409. llama_stack/providers/remote/inference/cerebras/config.py +30 -0
  410. llama_stack/providers/{adapters → remote}/inference/databricks/__init__.py +4 -5
  411. llama_stack/providers/remote/inference/databricks/config.py +37 -0
  412. llama_stack/providers/remote/inference/databricks/databricks.py +44 -0
  413. llama_stack/providers/{adapters → remote}/inference/fireworks/__init__.py +8 -4
  414. llama_stack/providers/remote/inference/fireworks/config.py +27 -0
  415. llama_stack/providers/remote/inference/fireworks/fireworks.py +27 -0
  416. llama_stack/providers/{adapters/memory/pgvector → remote/inference/gemini}/__init__.py +4 -4
  417. llama_stack/providers/remote/inference/gemini/config.py +28 -0
  418. llama_stack/providers/remote/inference/gemini/gemini.py +82 -0
  419. llama_stack/providers/remote/inference/groq/__init__.py +15 -0
  420. llama_stack/providers/remote/inference/groq/config.py +34 -0
  421. llama_stack/providers/remote/inference/groq/groq.py +18 -0
  422. llama_stack/providers/remote/inference/llama_openai_compat/__init__.py +15 -0
  423. llama_stack/providers/remote/inference/llama_openai_compat/config.py +34 -0
  424. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +46 -0
  425. llama_stack/providers/remote/inference/nvidia/__init__.py +23 -0
  426. llama_stack/providers/remote/inference/nvidia/config.py +64 -0
  427. llama_stack/providers/remote/inference/nvidia/nvidia.py +61 -0
  428. llama_stack/providers/{adapters/safety/sample/config.py → remote/inference/nvidia/utils.py} +3 -4
  429. llama_stack/providers/{impls/vllm → remote/inference/ollama}/__init__.py +4 -6
  430. llama_stack/providers/remote/inference/ollama/config.py +25 -0
  431. llama_stack/providers/remote/inference/ollama/ollama.py +102 -0
  432. llama_stack/providers/{adapters/telemetry/opentelemetry → remote/inference/openai}/__init__.py +4 -4
  433. llama_stack/providers/remote/inference/openai/config.py +39 -0
  434. llama_stack/providers/remote/inference/openai/openai.py +38 -0
  435. llama_stack/providers/remote/inference/passthrough/__init__.py +23 -0
  436. llama_stack/providers/remote/inference/passthrough/config.py +34 -0
  437. llama_stack/providers/remote/inference/passthrough/passthrough.py +122 -0
  438. llama_stack/providers/remote/inference/runpod/__init__.py +16 -0
  439. llama_stack/providers/remote/inference/runpod/config.py +32 -0
  440. llama_stack/providers/remote/inference/runpod/runpod.py +42 -0
  441. llama_stack/providers/remote/inference/sambanova/__init__.py +16 -0
  442. llama_stack/providers/remote/inference/sambanova/config.py +34 -0
  443. llama_stack/providers/remote/inference/sambanova/sambanova.py +28 -0
  444. llama_stack/providers/{adapters → remote}/inference/tgi/__init__.py +3 -4
  445. llama_stack/providers/remote/inference/tgi/config.py +76 -0
  446. llama_stack/providers/remote/inference/tgi/tgi.py +85 -0
  447. llama_stack/providers/{adapters → remote}/inference/together/__init__.py +8 -4
  448. llama_stack/providers/remote/inference/together/config.py +27 -0
  449. llama_stack/providers/remote/inference/together/together.py +102 -0
  450. llama_stack/providers/remote/inference/vertexai/__init__.py +15 -0
  451. llama_stack/providers/remote/inference/vertexai/config.py +48 -0
  452. llama_stack/providers/remote/inference/vertexai/vertexai.py +54 -0
  453. llama_stack/providers/remote/inference/vllm/__init__.py +22 -0
  454. llama_stack/providers/remote/inference/vllm/config.py +59 -0
  455. llama_stack/providers/remote/inference/vllm/vllm.py +111 -0
  456. llama_stack/providers/remote/inference/watsonx/__init__.py +15 -0
  457. llama_stack/providers/remote/inference/watsonx/config.py +45 -0
  458. llama_stack/providers/remote/inference/watsonx/watsonx.py +336 -0
  459. llama_stack/providers/remote/post_training/__init__.py +5 -0
  460. llama_stack/providers/remote/post_training/nvidia/__init__.py +23 -0
  461. llama_stack/providers/remote/post_training/nvidia/config.py +113 -0
  462. llama_stack/providers/remote/post_training/nvidia/models.py +27 -0
  463. llama_stack/providers/remote/post_training/nvidia/post_training.py +430 -0
  464. llama_stack/providers/remote/post_training/nvidia/utils.py +63 -0
  465. llama_stack/providers/remote/safety/__init__.py +5 -0
  466. llama_stack/providers/remote/safety/bedrock/bedrock.py +111 -0
  467. llama_stack/providers/remote/safety/bedrock/config.py +14 -0
  468. llama_stack/providers/{adapters/inference/sample → remote/safety/nvidia}/__init__.py +5 -4
  469. llama_stack/providers/remote/safety/nvidia/config.py +40 -0
  470. llama_stack/providers/remote/safety/nvidia/nvidia.py +161 -0
  471. llama_stack/providers/{adapters/agents/sample → remote/safety/sambanova}/__init__.py +5 -4
  472. llama_stack/providers/remote/safety/sambanova/config.py +37 -0
  473. llama_stack/providers/remote/safety/sambanova/sambanova.py +98 -0
  474. llama_stack/providers/remote/tool_runtime/__init__.py +5 -0
  475. llama_stack/providers/remote/tool_runtime/bing_search/__init__.py +21 -0
  476. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +112 -0
  477. llama_stack/providers/remote/tool_runtime/bing_search/config.py +22 -0
  478. llama_stack/providers/remote/tool_runtime/brave_search/__init__.py +20 -0
  479. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +148 -0
  480. llama_stack/providers/remote/tool_runtime/brave_search/config.py +27 -0
  481. llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py +15 -0
  482. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +20 -0
  483. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +73 -0
  484. llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py +20 -0
  485. llama_stack/providers/remote/tool_runtime/tavily_search/config.py +27 -0
  486. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +84 -0
  487. llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py +22 -0
  488. llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py +21 -0
  489. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +140 -0
  490. llama_stack/providers/remote/vector_io/__init__.py +5 -0
  491. llama_stack/providers/remote/vector_io/chroma/__init__.py +17 -0
  492. llama_stack/providers/remote/vector_io/chroma/chroma.py +215 -0
  493. llama_stack/providers/remote/vector_io/chroma/config.py +28 -0
  494. llama_stack/providers/remote/vector_io/milvus/__init__.py +18 -0
  495. llama_stack/providers/remote/vector_io/milvus/config.py +35 -0
  496. llama_stack/providers/remote/vector_io/milvus/milvus.py +375 -0
  497. llama_stack/providers/remote/vector_io/pgvector/__init__.py +17 -0
  498. llama_stack/providers/remote/vector_io/pgvector/config.py +47 -0
  499. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +460 -0
  500. llama_stack/providers/remote/vector_io/qdrant/__init__.py +17 -0
  501. llama_stack/providers/remote/vector_io/qdrant/config.py +37 -0
  502. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +265 -0
  503. llama_stack/providers/remote/vector_io/weaviate/__init__.py +17 -0
  504. llama_stack/providers/remote/vector_io/weaviate/config.py +32 -0
  505. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +393 -0
  506. llama_stack/providers/utils/bedrock/__init__.py +5 -0
  507. llama_stack/providers/utils/bedrock/client.py +74 -0
  508. llama_stack/providers/utils/bedrock/config.py +64 -0
  509. llama_stack/providers/utils/bedrock/refreshable_boto_session.py +112 -0
  510. llama_stack/providers/utils/common/__init__.py +5 -0
  511. llama_stack/providers/utils/common/data_schema_validator.py +103 -0
  512. llama_stack/providers/utils/datasetio/__init__.py +5 -0
  513. llama_stack/providers/utils/datasetio/url_utils.py +47 -0
  514. llama_stack/providers/utils/files/__init__.py +5 -0
  515. llama_stack/providers/utils/files/form_data.py +69 -0
  516. llama_stack/providers/utils/inference/__init__.py +8 -7
  517. llama_stack/providers/utils/inference/embedding_mixin.py +101 -0
  518. llama_stack/providers/utils/inference/inference_store.py +264 -0
  519. llama_stack/providers/utils/inference/litellm_openai_mixin.py +336 -0
  520. llama_stack/providers/utils/inference/model_registry.py +173 -23
  521. llama_stack/providers/utils/inference/openai_compat.py +1261 -49
  522. llama_stack/providers/utils/inference/openai_mixin.py +506 -0
  523. llama_stack/providers/utils/inference/prompt_adapter.py +365 -67
  524. llama_stack/providers/utils/kvstore/api.py +6 -6
  525. llama_stack/providers/utils/kvstore/config.py +28 -48
  526. llama_stack/providers/utils/kvstore/kvstore.py +61 -15
  527. llama_stack/providers/utils/kvstore/mongodb/__init__.py +9 -0
  528. llama_stack/providers/utils/kvstore/mongodb/mongodb.py +82 -0
  529. llama_stack/providers/utils/kvstore/postgres/__init__.py +7 -0
  530. llama_stack/providers/utils/kvstore/postgres/postgres.py +114 -0
  531. llama_stack/providers/utils/kvstore/redis/redis.py +33 -9
  532. llama_stack/providers/utils/kvstore/sqlite/config.py +2 -1
  533. llama_stack/providers/utils/kvstore/sqlite/sqlite.py +123 -22
  534. llama_stack/providers/utils/memory/file_utils.py +1 -1
  535. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +1304 -0
  536. llama_stack/providers/utils/memory/vector_store.py +220 -82
  537. llama_stack/providers/utils/pagination.py +43 -0
  538. llama_stack/providers/utils/responses/__init__.py +5 -0
  539. llama_stack/providers/utils/responses/responses_store.py +292 -0
  540. llama_stack/providers/utils/scheduler.py +270 -0
  541. llama_stack/providers/utils/scoring/__init__.py +5 -0
  542. llama_stack/providers/utils/scoring/aggregation_utils.py +75 -0
  543. llama_stack/providers/utils/scoring/base_scoring_fn.py +114 -0
  544. llama_stack/providers/utils/scoring/basic_scoring_utils.py +26 -0
  545. llama_stack/providers/utils/sqlstore/__init__.py +5 -0
  546. llama_stack/providers/utils/sqlstore/api.py +128 -0
  547. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +319 -0
  548. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +343 -0
  549. llama_stack/providers/utils/sqlstore/sqlstore.py +70 -0
  550. llama_stack/providers/utils/telemetry/trace_protocol.py +142 -0
  551. llama_stack/providers/utils/telemetry/tracing.py +192 -53
  552. llama_stack/providers/utils/tools/__init__.py +5 -0
  553. llama_stack/providers/utils/tools/mcp.py +148 -0
  554. llama_stack/providers/utils/tools/ttl_dict.py +70 -0
  555. llama_stack/providers/utils/vector_io/__init__.py +5 -0
  556. llama_stack/providers/utils/vector_io/vector_utils.py +156 -0
  557. llama_stack/schema_utils.py +118 -0
  558. llama_stack/strong_typing/__init__.py +19 -0
  559. llama_stack/strong_typing/auxiliary.py +228 -0
  560. llama_stack/strong_typing/classdef.py +440 -0
  561. llama_stack/strong_typing/core.py +46 -0
  562. llama_stack/strong_typing/deserializer.py +877 -0
  563. llama_stack/strong_typing/docstring.py +409 -0
  564. llama_stack/strong_typing/exception.py +23 -0
  565. llama_stack/strong_typing/inspection.py +1085 -0
  566. llama_stack/strong_typing/mapping.py +40 -0
  567. llama_stack/strong_typing/name.py +182 -0
  568. llama_stack/strong_typing/py.typed +0 -0
  569. llama_stack/strong_typing/schema.py +792 -0
  570. llama_stack/strong_typing/serialization.py +97 -0
  571. llama_stack/strong_typing/serializer.py +500 -0
  572. llama_stack/strong_typing/slots.py +27 -0
  573. llama_stack/strong_typing/topological.py +89 -0
  574. llama_stack/testing/__init__.py +5 -0
  575. llama_stack/testing/api_recorder.py +956 -0
  576. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  577. llama_stack-0.3.4.dist-info/METADATA +261 -0
  578. llama_stack-0.3.4.dist-info/RECORD +625 -0
  579. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL +1 -1
  580. llama_stack/apis/agents/client.py +0 -292
  581. llama_stack/apis/agents/event_logger.py +0 -184
  582. llama_stack/apis/batch_inference/batch_inference.py +0 -72
  583. llama_stack/apis/common/deployment_types.py +0 -31
  584. llama_stack/apis/dataset/dataset.py +0 -63
  585. llama_stack/apis/evals/evals.py +0 -122
  586. llama_stack/apis/inference/client.py +0 -197
  587. llama_stack/apis/inspect/client.py +0 -82
  588. llama_stack/apis/memory/client.py +0 -155
  589. llama_stack/apis/memory/memory.py +0 -65
  590. llama_stack/apis/memory_banks/__init__.py +0 -7
  591. llama_stack/apis/memory_banks/client.py +0 -101
  592. llama_stack/apis/memory_banks/memory_banks.py +0 -78
  593. llama_stack/apis/models/client.py +0 -83
  594. llama_stack/apis/reward_scoring/__init__.py +0 -7
  595. llama_stack/apis/reward_scoring/reward_scoring.py +0 -55
  596. llama_stack/apis/safety/client.py +0 -105
  597. llama_stack/apis/shields/client.py +0 -79
  598. llama_stack/cli/download.py +0 -340
  599. llama_stack/cli/model/describe.py +0 -82
  600. llama_stack/cli/model/download.py +0 -24
  601. llama_stack/cli/model/list.py +0 -62
  602. llama_stack/cli/model/model.py +0 -34
  603. llama_stack/cli/model/prompt_format.py +0 -112
  604. llama_stack/cli/model/safety_models.py +0 -52
  605. llama_stack/cli/stack/build.py +0 -299
  606. llama_stack/cli/stack/configure.py +0 -178
  607. llama_stack/distribution/build.py +0 -123
  608. llama_stack/distribution/build_conda_env.sh +0 -136
  609. llama_stack/distribution/build_container.sh +0 -142
  610. llama_stack/distribution/common.sh +0 -40
  611. llama_stack/distribution/configure_container.sh +0 -47
  612. llama_stack/distribution/datatypes.py +0 -139
  613. llama_stack/distribution/distribution.py +0 -58
  614. llama_stack/distribution/inspect.py +0 -67
  615. llama_stack/distribution/request_headers.py +0 -57
  616. llama_stack/distribution/resolver.py +0 -323
  617. llama_stack/distribution/routers/__init__.py +0 -48
  618. llama_stack/distribution/routers/routers.py +0 -158
  619. llama_stack/distribution/routers/routing_tables.py +0 -173
  620. llama_stack/distribution/server/endpoints.py +0 -48
  621. llama_stack/distribution/server/server.py +0 -343
  622. llama_stack/distribution/start_conda_env.sh +0 -42
  623. llama_stack/distribution/start_container.sh +0 -64
  624. llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml +0 -10
  625. llama_stack/distribution/templates/local-build.yaml +0 -10
  626. llama_stack/distribution/templates/local-databricks-build.yaml +0 -10
  627. llama_stack/distribution/templates/local-fireworks-build.yaml +0 -10
  628. llama_stack/distribution/templates/local-hf-endpoint-build.yaml +0 -10
  629. llama_stack/distribution/templates/local-hf-serverless-build.yaml +0 -10
  630. llama_stack/distribution/templates/local-ollama-build.yaml +0 -10
  631. llama_stack/distribution/templates/local-tgi-build.yaml +0 -10
  632. llama_stack/distribution/templates/local-together-build.yaml +0 -10
  633. llama_stack/distribution/templates/local-vllm-build.yaml +0 -10
  634. llama_stack/distribution/utils/exec.py +0 -105
  635. llama_stack/providers/adapters/agents/sample/sample.py +0 -18
  636. llama_stack/providers/adapters/inference/bedrock/bedrock.py +0 -451
  637. llama_stack/providers/adapters/inference/bedrock/config.py +0 -55
  638. llama_stack/providers/adapters/inference/databricks/config.py +0 -21
  639. llama_stack/providers/adapters/inference/databricks/databricks.py +0 -125
  640. llama_stack/providers/adapters/inference/fireworks/config.py +0 -20
  641. llama_stack/providers/adapters/inference/fireworks/fireworks.py +0 -130
  642. llama_stack/providers/adapters/inference/ollama/__init__.py +0 -19
  643. llama_stack/providers/adapters/inference/ollama/ollama.py +0 -175
  644. llama_stack/providers/adapters/inference/sample/sample.py +0 -23
  645. llama_stack/providers/adapters/inference/tgi/config.py +0 -43
  646. llama_stack/providers/adapters/inference/tgi/tgi.py +0 -200
  647. llama_stack/providers/adapters/inference/together/config.py +0 -22
  648. llama_stack/providers/adapters/inference/together/together.py +0 -143
  649. llama_stack/providers/adapters/memory/chroma/chroma.py +0 -157
  650. llama_stack/providers/adapters/memory/pgvector/config.py +0 -17
  651. llama_stack/providers/adapters/memory/pgvector/pgvector.py +0 -211
  652. llama_stack/providers/adapters/memory/sample/sample.py +0 -23
  653. llama_stack/providers/adapters/memory/weaviate/__init__.py +0 -15
  654. llama_stack/providers/adapters/memory/weaviate/weaviate.py +0 -190
  655. llama_stack/providers/adapters/safety/bedrock/bedrock.py +0 -113
  656. llama_stack/providers/adapters/safety/bedrock/config.py +0 -16
  657. llama_stack/providers/adapters/safety/sample/sample.py +0 -23
  658. llama_stack/providers/adapters/safety/together/__init__.py +0 -18
  659. llama_stack/providers/adapters/safety/together/config.py +0 -26
  660. llama_stack/providers/adapters/safety/together/together.py +0 -101
  661. llama_stack/providers/adapters/telemetry/opentelemetry/config.py +0 -12
  662. llama_stack/providers/adapters/telemetry/opentelemetry/opentelemetry.py +0 -201
  663. llama_stack/providers/adapters/telemetry/sample/__init__.py +0 -17
  664. llama_stack/providers/adapters/telemetry/sample/config.py +0 -12
  665. llama_stack/providers/adapters/telemetry/sample/sample.py +0 -18
  666. llama_stack/providers/impls/meta_reference/agents/agent_instance.py +0 -844
  667. llama_stack/providers/impls/meta_reference/agents/agents.py +0 -161
  668. llama_stack/providers/impls/meta_reference/agents/persistence.py +0 -84
  669. llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py +0 -74
  670. llama_stack/providers/impls/meta_reference/agents/safety.py +0 -57
  671. llama_stack/providers/impls/meta_reference/agents/tests/code_execution.py +0 -93
  672. llama_stack/providers/impls/meta_reference/agents/tests/test_chat_agent.py +0 -305
  673. llama_stack/providers/impls/meta_reference/agents/tools/base.py +0 -20
  674. llama_stack/providers/impls/meta_reference/agents/tools/builtin.py +0 -375
  675. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_env_prefix.py +0 -133
  676. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/code_execution.py +0 -256
  677. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/matplotlib_custom_backend.py +0 -87
  678. llama_stack/providers/impls/meta_reference/agents/tools/ipython_tool/utils.py +0 -21
  679. llama_stack/providers/impls/meta_reference/agents/tools/safety.py +0 -43
  680. llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py +0 -58
  681. llama_stack/providers/impls/meta_reference/inference/config.py +0 -45
  682. llama_stack/providers/impls/meta_reference/inference/generation.py +0 -376
  683. llama_stack/providers/impls/meta_reference/inference/inference.py +0 -280
  684. llama_stack/providers/impls/meta_reference/inference/model_parallel.py +0 -99
  685. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py +0 -184
  686. llama_stack/providers/impls/meta_reference/inference/quantization/fp8_txest_disabled.py +0 -76
  687. llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +0 -97
  688. llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py +0 -161
  689. llama_stack/providers/impls/meta_reference/memory/__init__.py +0 -19
  690. llama_stack/providers/impls/meta_reference/memory/faiss.py +0 -113
  691. llama_stack/providers/impls/meta_reference/safety/__init__.py +0 -17
  692. llama_stack/providers/impls/meta_reference/safety/base.py +0 -57
  693. llama_stack/providers/impls/meta_reference/safety/config.py +0 -48
  694. llama_stack/providers/impls/meta_reference/safety/llama_guard.py +0 -268
  695. llama_stack/providers/impls/meta_reference/safety/prompt_guard.py +0 -145
  696. llama_stack/providers/impls/meta_reference/safety/safety.py +0 -112
  697. llama_stack/providers/impls/meta_reference/telemetry/console.py +0 -89
  698. llama_stack/providers/impls/vllm/config.py +0 -35
  699. llama_stack/providers/impls/vllm/vllm.py +0 -241
  700. llama_stack/providers/registry/memory.py +0 -78
  701. llama_stack/providers/registry/telemetry.py +0 -44
  702. llama_stack/providers/tests/agents/test_agents.py +0 -210
  703. llama_stack/providers/tests/inference/test_inference.py +0 -257
  704. llama_stack/providers/tests/inference/test_prompt_adapter.py +0 -126
  705. llama_stack/providers/tests/memory/test_memory.py +0 -136
  706. llama_stack/providers/tests/resolver.py +0 -100
  707. llama_stack/providers/tests/safety/test_safety.py +0 -77
  708. llama_stack-0.0.42.dist-info/METADATA +0 -137
  709. llama_stack-0.0.42.dist-info/RECORD +0 -256
  710. /llama_stack/{distribution → core}/__init__.py +0 -0
  711. /llama_stack/{distribution/server → core/access_control}/__init__.py +0 -0
  712. /llama_stack/{distribution/utils → core/conversations}/__init__.py +0 -0
  713. /llama_stack/{providers/adapters → core/prompts}/__init__.py +0 -0
  714. /llama_stack/{providers/adapters/agents → core/routing_tables}/__init__.py +0 -0
  715. /llama_stack/{providers/adapters/inference → core/server}/__init__.py +0 -0
  716. /llama_stack/{providers/adapters/memory → core/storage}/__init__.py +0 -0
  717. /llama_stack/{providers/adapters/safety → core/ui}/__init__.py +0 -0
  718. /llama_stack/{providers/adapters/telemetry → core/ui/modules}/__init__.py +0 -0
  719. /llama_stack/{providers/impls → core/ui/page}/__init__.py +0 -0
  720. /llama_stack/{providers/impls/meta_reference → core/ui/page/distribution}/__init__.py +0 -0
  721. /llama_stack/{providers/impls/meta_reference/agents/rag → core/ui/page/evaluations}/__init__.py +0 -0
  722. /llama_stack/{providers/impls/meta_reference/agents/tests → core/ui/page/playground}/__init__.py +0 -0
  723. /llama_stack/{providers/impls/meta_reference/agents/tools → core/utils}/__init__.py +0 -0
  724. /llama_stack/{distribution → core}/utils/dynamic.py +0 -0
  725. /llama_stack/{distribution → core}/utils/serialize.py +0 -0
  726. /llama_stack/{providers/impls/meta_reference/agents/tools/ipython_tool → distributions}/__init__.py +0 -0
  727. /llama_stack/{providers/impls/meta_reference/inference/quantization → models}/__init__.py +0 -0
  728. /llama_stack/{providers/impls/meta_reference/inference/quantization/scripts → models/llama}/__init__.py +0 -0
  729. /llama_stack/{providers/tests → models/llama/llama3}/__init__.py +0 -0
  730. /llama_stack/{providers/tests/agents → models/llama/llama3/quantization}/__init__.py +0 -0
  731. /llama_stack/{providers/tests/inference → models/llama/llama3_2}/__init__.py +0 -0
  732. /llama_stack/{providers/tests/memory → models/llama/llama3_3}/__init__.py +0 -0
  733. /llama_stack/{providers/tests/safety → models/llama/llama4}/__init__.py +0 -0
  734. /llama_stack/{scripts → models/llama/llama4/prompt_templates}/__init__.py +0 -0
  735. /llama_stack/providers/{adapters → remote}/safety/bedrock/__init__.py +0 -0
  736. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/entry_points.txt +0 -0
  737. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info/licenses}/LICENSE +0 -0
  738. {llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1311 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from typing import Annotated, Any, Literal
8
+
9
+ from pydantic import BaseModel, Field
10
+ from typing_extensions import TypedDict
11
+
12
+ from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
13
+ from llama_stack.schema_utils import json_schema_type, register_schema
14
+
15
+ # NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably
16
+ # take their YAML and generate this file automatically. Their YAML is available.
17
+
18
+
19
+ @json_schema_type
20
+ class OpenAIResponseError(BaseModel):
21
+ """Error details for failed OpenAI response requests.
22
+
23
+ :param code: Error code identifying the type of failure
24
+ :param message: Human-readable error message describing the failure
25
+ """
26
+
27
+ code: str
28
+ message: str
29
+
30
+
31
+ @json_schema_type
32
+ class OpenAIResponseInputMessageContentText(BaseModel):
33
+ """Text content for input messages in OpenAI response format.
34
+
35
+ :param text: The text content of the input message
36
+ :param type: Content type identifier, always "input_text"
37
+ """
38
+
39
+ text: str
40
+ type: Literal["input_text"] = "input_text"
41
+
42
+
43
+ @json_schema_type
44
+ class OpenAIResponseInputMessageContentImage(BaseModel):
45
+ """Image content for input messages in OpenAI response format.
46
+
47
+ :param detail: Level of detail for image processing, can be "low", "high", or "auto"
48
+ :param type: Content type identifier, always "input_image"
49
+ :param image_url: (Optional) URL of the image content
50
+ """
51
+
52
+ detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
53
+ type: Literal["input_image"] = "input_image"
54
+ # TODO: handle file_id
55
+ image_url: str | None = None
56
+
57
+
58
+ # TODO: handle file content types
59
+ OpenAIResponseInputMessageContent = Annotated[
60
+ OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage,
61
+ Field(discriminator="type"),
62
+ ]
63
+ register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
64
+
65
+
66
+ @json_schema_type
67
+ class OpenAIResponseAnnotationFileCitation(BaseModel):
68
+ """File citation annotation for referencing specific files in response content.
69
+
70
+ :param type: Annotation type identifier, always "file_citation"
71
+ :param file_id: Unique identifier of the referenced file
72
+ :param filename: Name of the referenced file
73
+ :param index: Position index of the citation within the content
74
+ """
75
+
76
+ type: Literal["file_citation"] = "file_citation"
77
+ file_id: str
78
+ filename: str
79
+ index: int
80
+
81
+
82
+ @json_schema_type
83
+ class OpenAIResponseAnnotationCitation(BaseModel):
84
+ """URL citation annotation for referencing external web resources.
85
+
86
+ :param type: Annotation type identifier, always "url_citation"
87
+ :param end_index: End position of the citation span in the content
88
+ :param start_index: Start position of the citation span in the content
89
+ :param title: Title of the referenced web resource
90
+ :param url: URL of the referenced web resource
91
+ """
92
+
93
+ type: Literal["url_citation"] = "url_citation"
94
+ end_index: int
95
+ start_index: int
96
+ title: str
97
+ url: str
98
+
99
+
100
+ @json_schema_type
101
+ class OpenAIResponseAnnotationContainerFileCitation(BaseModel):
102
+ type: Literal["container_file_citation"] = "container_file_citation"
103
+ container_id: str
104
+ end_index: int
105
+ file_id: str
106
+ filename: str
107
+ start_index: int
108
+
109
+
110
+ @json_schema_type
111
+ class OpenAIResponseAnnotationFilePath(BaseModel):
112
+ type: Literal["file_path"] = "file_path"
113
+ file_id: str
114
+ index: int
115
+
116
+
117
+ OpenAIResponseAnnotations = Annotated[
118
+ OpenAIResponseAnnotationFileCitation
119
+ | OpenAIResponseAnnotationCitation
120
+ | OpenAIResponseAnnotationContainerFileCitation
121
+ | OpenAIResponseAnnotationFilePath,
122
+ Field(discriminator="type"),
123
+ ]
124
+ register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations")
125
+
126
+
127
+ @json_schema_type
128
+ class OpenAIResponseOutputMessageContentOutputText(BaseModel):
129
+ text: str
130
+ type: Literal["output_text"] = "output_text"
131
+ annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
132
+
133
+
134
+ @json_schema_type
135
+ class OpenAIResponseContentPartRefusal(BaseModel):
136
+ """Refusal content within a streamed response part.
137
+
138
+ :param type: Content part type identifier, always "refusal"
139
+ :param refusal: Refusal text supplied by the model
140
+ """
141
+
142
+ type: Literal["refusal"] = "refusal"
143
+ refusal: str
144
+
145
+
146
+ OpenAIResponseOutputMessageContent = Annotated[
147
+ OpenAIResponseOutputMessageContentOutputText | OpenAIResponseContentPartRefusal,
148
+ Field(discriminator="type"),
149
+ ]
150
+ register_schema(OpenAIResponseOutputMessageContent, name="OpenAIResponseOutputMessageContent")
151
+
152
+
153
+ @json_schema_type
154
+ class OpenAIResponseMessage(BaseModel):
155
+ """
156
+ Corresponds to the various Message types in the Responses API.
157
+ They are all under one type because the Responses API gives them all
158
+ the same "type" value, and there is no way to tell them apart in certain
159
+ scenarios.
160
+ """
161
+
162
+ content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]
163
+ role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
164
+ type: Literal["message"] = "message"
165
+
166
+ # The fields below are not used in all scenarios, but are required in others.
167
+ id: str | None = None
168
+ status: str | None = None
169
+
170
+
171
+ @json_schema_type
172
+ class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
173
+ """Web search tool call output message for OpenAI responses.
174
+
175
+ :param id: Unique identifier for this tool call
176
+ :param status: Current status of the web search operation
177
+ :param type: Tool call type identifier, always "web_search_call"
178
+ """
179
+
180
+ id: str
181
+ status: str
182
+ type: Literal["web_search_call"] = "web_search_call"
183
+
184
+
185
+ class OpenAIResponseOutputMessageFileSearchToolCallResults(BaseModel):
186
+ """Search results returned by the file search operation.
187
+
188
+ :param attributes: (Optional) Key-value attributes associated with the file
189
+ :param file_id: Unique identifier of the file containing the result
190
+ :param filename: Name of the file containing the result
191
+ :param score: Relevance score for this search result (between 0 and 1)
192
+ :param text: Text content of the search result
193
+ """
194
+
195
+ attributes: dict[str, Any]
196
+ file_id: str
197
+ filename: str
198
+ score: float
199
+ text: str
200
+
201
+
202
+ @json_schema_type
203
+ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
204
+ """File search tool call output message for OpenAI responses.
205
+
206
+ :param id: Unique identifier for this tool call
207
+ :param queries: List of search queries executed
208
+ :param status: Current status of the file search operation
209
+ :param type: Tool call type identifier, always "file_search_call"
210
+ :param results: (Optional) Search results returned by the file search operation
211
+ """
212
+
213
+ id: str
214
+ queries: list[str]
215
+ status: str
216
+ type: Literal["file_search_call"] = "file_search_call"
217
+ results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
218
+
219
+
220
+ @json_schema_type
221
+ class OpenAIResponseOutputMessageFunctionToolCall(BaseModel):
222
+ """Function tool call output message for OpenAI responses.
223
+
224
+ :param call_id: Unique identifier for the function call
225
+ :param name: Name of the function being called
226
+ :param arguments: JSON string containing the function arguments
227
+ :param type: Tool call type identifier, always "function_call"
228
+ :param id: (Optional) Additional identifier for the tool call
229
+ :param status: (Optional) Current status of the function call execution
230
+ """
231
+
232
+ call_id: str
233
+ name: str
234
+ arguments: str
235
+ type: Literal["function_call"] = "function_call"
236
+ id: str | None = None
237
+ status: str | None = None
238
+
239
+
240
+ @json_schema_type
241
+ class OpenAIResponseOutputMessageMCPCall(BaseModel):
242
+ """Model Context Protocol (MCP) call output message for OpenAI responses.
243
+
244
+ :param id: Unique identifier for this MCP call
245
+ :param type: Tool call type identifier, always "mcp_call"
246
+ :param arguments: JSON string containing the MCP call arguments
247
+ :param name: Name of the MCP method being called
248
+ :param server_label: Label identifying the MCP server handling the call
249
+ :param error: (Optional) Error message if the MCP call failed
250
+ :param output: (Optional) Output result from the successful MCP call
251
+ """
252
+
253
+ id: str
254
+ type: Literal["mcp_call"] = "mcp_call"
255
+ arguments: str
256
+ name: str
257
+ server_label: str
258
+ error: str | None = None
259
+ output: str | None = None
260
+
261
+
262
+ class MCPListToolsTool(BaseModel):
263
+ """Tool definition returned by MCP list tools operation.
264
+
265
+ :param input_schema: JSON schema defining the tool's input parameters
266
+ :param name: Name of the tool
267
+ :param description: (Optional) Description of what the tool does
268
+ """
269
+
270
+ input_schema: dict[str, Any]
271
+ name: str
272
+ description: str | None = None
273
+
274
+
275
+ @json_schema_type
276
+ class OpenAIResponseOutputMessageMCPListTools(BaseModel):
277
+ """MCP list tools output message containing available tools from an MCP server.
278
+
279
+ :param id: Unique identifier for this MCP list tools operation
280
+ :param type: Tool call type identifier, always "mcp_list_tools"
281
+ :param server_label: Label identifying the MCP server providing the tools
282
+ :param tools: List of available tools provided by the MCP server
283
+ """
284
+
285
+ id: str
286
+ type: Literal["mcp_list_tools"] = "mcp_list_tools"
287
+ server_label: str
288
+ tools: list[MCPListToolsTool]
289
+
290
+
291
+ @json_schema_type
292
+ class OpenAIResponseMCPApprovalRequest(BaseModel):
293
+ """
294
+ A request for human approval of a tool invocation.
295
+ """
296
+
297
+ arguments: str
298
+ id: str
299
+ name: str
300
+ server_label: str
301
+ type: Literal["mcp_approval_request"] = "mcp_approval_request"
302
+
303
+
304
+ @json_schema_type
305
+ class OpenAIResponseMCPApprovalResponse(BaseModel):
306
+ """
307
+ A response to an MCP approval request.
308
+ """
309
+
310
+ approval_request_id: str
311
+ approve: bool
312
+ type: Literal["mcp_approval_response"] = "mcp_approval_response"
313
+ id: str | None = None
314
+ reason: str | None = None
315
+
316
+
317
+ OpenAIResponseOutput = Annotated[
318
+ OpenAIResponseMessage
319
+ | OpenAIResponseOutputMessageWebSearchToolCall
320
+ | OpenAIResponseOutputMessageFileSearchToolCall
321
+ | OpenAIResponseOutputMessageFunctionToolCall
322
+ | OpenAIResponseOutputMessageMCPCall
323
+ | OpenAIResponseOutputMessageMCPListTools
324
+ | OpenAIResponseMCPApprovalRequest,
325
+ Field(discriminator="type"),
326
+ ]
327
+ register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
328
+
329
+
330
+ # This has to be a TypedDict because we need a "schema" field and our strong
331
+ # typing code in the schema generator doesn't support Pydantic aliases. That also
332
+ # means we can't use a discriminator field here, because TypedDicts don't support
333
+ # default values which the strong typing code requires for discriminators.
334
+ class OpenAIResponseTextFormat(TypedDict, total=False):
335
+ """Configuration for Responses API text format.
336
+
337
+ :param type: Must be "text", "json_schema", or "json_object" to identify the format type
338
+ :param name: The name of the response format. Only used for json_schema.
339
+ :param schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model. Only used for json_schema.
340
+ :param description: (Optional) A description of the response format. Only used for json_schema.
341
+ :param strict: (Optional) Whether to strictly enforce the JSON schema. If true, the response must match the schema exactly. Only used for json_schema.
342
+ """
343
+
344
+ type: Literal["text"] | Literal["json_schema"] | Literal["json_object"]
345
+ name: str | None
346
+ schema: dict[str, Any] | None
347
+ description: str | None
348
+ strict: bool | None
349
+
350
+
351
+ @json_schema_type
352
+ class OpenAIResponseText(BaseModel):
353
+ """Text response configuration for OpenAI responses.
354
+
355
+ :param format: (Optional) Text format configuration specifying output format requirements
356
+ """
357
+
358
+ format: OpenAIResponseTextFormat | None = None
359
+
360
+
361
+ # Must match type Literals of OpenAIResponseInputToolWebSearch below
362
+ WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
363
+
364
+
365
+ @json_schema_type
366
+ class OpenAIResponseInputToolWebSearch(BaseModel):
367
+ """Web search tool configuration for OpenAI response inputs.
368
+
369
+ :param type: Web search tool type variant to use
370
+ :param search_context_size: (Optional) Size of search context, must be "low", "medium", or "high"
371
+ """
372
+
373
+ # Must match values of WebSearchToolTypes above
374
+ type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
375
+ "web_search"
376
+ )
377
+ # TODO: actually use search_context_size somewhere...
378
+ search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
379
+ # TODO: add user_location
380
+
381
+
382
+ @json_schema_type
383
+ class OpenAIResponseInputToolFunction(BaseModel):
384
+ """Function tool configuration for OpenAI response inputs.
385
+
386
+ :param type: Tool type identifier, always "function"
387
+ :param name: Name of the function that can be called
388
+ :param description: (Optional) Description of what the function does
389
+ :param parameters: (Optional) JSON schema defining the function's parameters
390
+ :param strict: (Optional) Whether to enforce strict parameter validation
391
+ """
392
+
393
+ type: Literal["function"] = "function"
394
+ name: str
395
+ description: str | None = None
396
+ parameters: dict[str, Any] | None
397
+ strict: bool | None = None
398
+
399
+
400
+ @json_schema_type
401
+ class OpenAIResponseInputToolFileSearch(BaseModel):
402
+ """File search tool configuration for OpenAI response inputs.
403
+
404
+ :param type: Tool type identifier, always "file_search"
405
+ :param vector_store_ids: List of vector store identifiers to search within
406
+ :param filters: (Optional) Additional filters to apply to the search
407
+ :param max_num_results: (Optional) Maximum number of search results to return (1-50)
408
+ :param ranking_options: (Optional) Options for ranking and scoring search results
409
+ """
410
+
411
+ type: Literal["file_search"] = "file_search"
412
+ vector_store_ids: list[str]
413
+ filters: dict[str, Any] | None = None
414
+ max_num_results: int | None = Field(default=10, ge=1, le=50)
415
+ ranking_options: FileSearchRankingOptions | None = None
416
+
417
+
418
+ class ApprovalFilter(BaseModel):
419
+ """Filter configuration for MCP tool approval requirements.
420
+
421
+ :param always: (Optional) List of tool names that always require approval
422
+ :param never: (Optional) List of tool names that never require approval
423
+ """
424
+
425
+ always: list[str] | None = None
426
+ never: list[str] | None = None
427
+
428
+
429
+ class AllowedToolsFilter(BaseModel):
430
+ """Filter configuration for restricting which MCP tools can be used.
431
+
432
+ :param tool_names: (Optional) List of specific tool names that are allowed
433
+ """
434
+
435
+ tool_names: list[str] | None = None
436
+
437
+
438
+ @json_schema_type
439
+ class OpenAIResponseInputToolMCP(BaseModel):
440
+ """Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
441
+
442
+ :param type: Tool type identifier, always "mcp"
443
+ :param server_label: Label to identify this MCP server
444
+ :param server_url: URL endpoint of the MCP server
445
+ :param headers: (Optional) HTTP headers to include when connecting to the server
446
+ :param require_approval: Approval requirement for tool calls ("always", "never", or filter)
447
+ :param allowed_tools: (Optional) Restriction on which tools can be used from this server
448
+ """
449
+
450
+ type: Literal["mcp"] = "mcp"
451
+ server_label: str
452
+ server_url: str
453
+ headers: dict[str, Any] | None = None
454
+
455
+ require_approval: Literal["always"] | Literal["never"] | ApprovalFilter = "never"
456
+ allowed_tools: list[str] | AllowedToolsFilter | None = None
457
+
458
+
459
+ OpenAIResponseInputTool = Annotated[
460
+ OpenAIResponseInputToolWebSearch
461
+ | OpenAIResponseInputToolFileSearch
462
+ | OpenAIResponseInputToolFunction
463
+ | OpenAIResponseInputToolMCP,
464
+ Field(discriminator="type"),
465
+ ]
466
+ register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
467
+
468
+
469
+ @json_schema_type
470
+ class OpenAIResponseToolMCP(BaseModel):
471
+ """Model Context Protocol (MCP) tool configuration for OpenAI response object.
472
+
473
+ :param type: Tool type identifier, always "mcp"
474
+ :param server_label: Label to identify this MCP server
475
+ :param allowed_tools: (Optional) Restriction on which tools can be used from this server
476
+ """
477
+
478
+ type: Literal["mcp"] = "mcp"
479
+ server_label: str
480
+ allowed_tools: list[str] | AllowedToolsFilter | None = None
481
+
482
+
483
+ OpenAIResponseTool = Annotated[
484
+ OpenAIResponseInputToolWebSearch
485
+ | OpenAIResponseInputToolFileSearch
486
+ | OpenAIResponseInputToolFunction
487
+ | OpenAIResponseToolMCP, # The only type that differes from that in the inputs is the MCP tool
488
+ Field(discriminator="type"),
489
+ ]
490
+ register_schema(OpenAIResponseTool, name="OpenAIResponseTool")
491
+
492
+
493
+ class OpenAIResponseUsageOutputTokensDetails(BaseModel):
494
+ """Token details for output tokens in OpenAI response usage.
495
+
496
+ :param reasoning_tokens: Number of tokens used for reasoning (o1/o3 models)
497
+ """
498
+
499
+ reasoning_tokens: int | None = None
500
+
501
+
502
+ class OpenAIResponseUsageInputTokensDetails(BaseModel):
503
+ """Token details for input tokens in OpenAI response usage.
504
+
505
+ :param cached_tokens: Number of tokens retrieved from cache
506
+ """
507
+
508
+ cached_tokens: int | None = None
509
+
510
+
511
+ @json_schema_type
512
+ class OpenAIResponseUsage(BaseModel):
513
+ """Usage information for OpenAI response.
514
+
515
+ :param input_tokens: Number of tokens in the input
516
+ :param output_tokens: Number of tokens in the output
517
+ :param total_tokens: Total tokens used (input + output)
518
+ :param input_tokens_details: Detailed breakdown of input token usage
519
+ :param output_tokens_details: Detailed breakdown of output token usage
520
+ """
521
+
522
+ input_tokens: int
523
+ output_tokens: int
524
+ total_tokens: int
525
+ input_tokens_details: OpenAIResponseUsageInputTokensDetails | None = None
526
+ output_tokens_details: OpenAIResponseUsageOutputTokensDetails | None = None
527
+
528
+
529
+ @json_schema_type
530
+ class OpenAIResponseObject(BaseModel):
531
+ """Complete OpenAI response object containing generation results and metadata.
532
+
533
+ :param created_at: Unix timestamp when the response was created
534
+ :param error: (Optional) Error details if the response generation failed
535
+ :param id: Unique identifier for this response
536
+ :param model: Model identifier used for generation
537
+ :param object: Object type identifier, always "response"
538
+ :param output: List of generated output items (messages, tool calls, etc.)
539
+ :param parallel_tool_calls: Whether tool calls can be executed in parallel
540
+ :param previous_response_id: (Optional) ID of the previous response in a conversation
541
+ :param status: Current status of the response generation
542
+ :param temperature: (Optional) Sampling temperature used for generation
543
+ :param text: Text formatting configuration for the response
544
+ :param top_p: (Optional) Nucleus sampling parameter used for generation
545
+ :param tools: (Optional) An array of tools the model may call while generating a response.
546
+ :param truncation: (Optional) Truncation strategy applied to the response
547
+ :param usage: (Optional) Token usage information for the response
548
+ :param instructions: (Optional) System message inserted into the model's context
549
+ """
550
+
551
+ created_at: int
552
+ error: OpenAIResponseError | None = None
553
+ id: str
554
+ model: str
555
+ object: Literal["response"] = "response"
556
+ output: list[OpenAIResponseOutput]
557
+ parallel_tool_calls: bool = False
558
+ previous_response_id: str | None = None
559
+ status: str
560
+ temperature: float | None = None
561
+ # Default to text format to avoid breaking the loading of old responses
562
+ # before the field was added. New responses will have this set always.
563
+ text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
564
+ top_p: float | None = None
565
+ tools: list[OpenAIResponseTool] | None = None
566
+ truncation: str | None = None
567
+ usage: OpenAIResponseUsage | None = None
568
+ instructions: str | None = None
569
+
570
+
571
+ @json_schema_type
572
+ class OpenAIDeleteResponseObject(BaseModel):
573
+ """Response object confirming deletion of an OpenAI response.
574
+
575
+ :param id: Unique identifier of the deleted response
576
+ :param object: Object type identifier, always "response"
577
+ :param deleted: Deletion confirmation flag, always True
578
+ """
579
+
580
+ id: str
581
+ object: Literal["response"] = "response"
582
+ deleted: bool = True
583
+
584
+
585
+ @json_schema_type
586
+ class OpenAIResponseObjectStreamResponseCreated(BaseModel):
587
+ """Streaming event indicating a new response has been created.
588
+
589
+ :param response: The response object that was created
590
+ :param type: Event type identifier, always "response.created"
591
+ """
592
+
593
+ response: OpenAIResponseObject
594
+ type: Literal["response.created"] = "response.created"
595
+
596
+
597
+ @json_schema_type
598
+ class OpenAIResponseObjectStreamResponseInProgress(BaseModel):
599
+ """Streaming event indicating the response remains in progress.
600
+
601
+ :param response: Current response state while in progress
602
+ :param sequence_number: Sequential number for ordering streaming events
603
+ :param type: Event type identifier, always "response.in_progress"
604
+ """
605
+
606
+ response: OpenAIResponseObject
607
+ sequence_number: int
608
+ type: Literal["response.in_progress"] = "response.in_progress"
609
+
610
+
611
+ @json_schema_type
612
+ class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
613
+ """Streaming event indicating a response has been completed.
614
+
615
+ :param response: Completed response object
616
+ :param type: Event type identifier, always "response.completed"
617
+ """
618
+
619
+ response: OpenAIResponseObject
620
+ type: Literal["response.completed"] = "response.completed"
621
+
622
+
623
+ @json_schema_type
624
+ class OpenAIResponseObjectStreamResponseIncomplete(BaseModel):
625
+ """Streaming event emitted when a response ends in an incomplete state.
626
+
627
+ :param response: Response object describing the incomplete state
628
+ :param sequence_number: Sequential number for ordering streaming events
629
+ :param type: Event type identifier, always "response.incomplete"
630
+ """
631
+
632
+ response: OpenAIResponseObject
633
+ sequence_number: int
634
+ type: Literal["response.incomplete"] = "response.incomplete"
635
+
636
+
637
+ @json_schema_type
638
+ class OpenAIResponseObjectStreamResponseFailed(BaseModel):
639
+ """Streaming event emitted when a response fails.
640
+
641
+ :param response: Response object describing the failure
642
+ :param sequence_number: Sequential number for ordering streaming events
643
+ :param type: Event type identifier, always "response.failed"
644
+ """
645
+
646
+ response: OpenAIResponseObject
647
+ sequence_number: int
648
+ type: Literal["response.failed"] = "response.failed"
649
+
650
+
651
+ @json_schema_type
652
+ class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
653
+ """Streaming event for when a new output item is added to the response.
654
+
655
+ :param response_id: Unique identifier of the response containing this output
656
+ :param item: The output item that was added (message, tool call, etc.)
657
+ :param output_index: Index position of this item in the output list
658
+ :param sequence_number: Sequential number for ordering streaming events
659
+ :param type: Event type identifier, always "response.output_item.added"
660
+ """
661
+
662
+ response_id: str
663
+ item: OpenAIResponseOutput
664
+ output_index: int
665
+ sequence_number: int
666
+ type: Literal["response.output_item.added"] = "response.output_item.added"
667
+
668
+
669
+ @json_schema_type
670
+ class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel):
671
+ """Streaming event for when an output item is completed.
672
+
673
+ :param response_id: Unique identifier of the response containing this output
674
+ :param item: The completed output item (message, tool call, etc.)
675
+ :param output_index: Index position of this item in the output list
676
+ :param sequence_number: Sequential number for ordering streaming events
677
+ :param type: Event type identifier, always "response.output_item.done"
678
+ """
679
+
680
+ response_id: str
681
+ item: OpenAIResponseOutput
682
+ output_index: int
683
+ sequence_number: int
684
+ type: Literal["response.output_item.done"] = "response.output_item.done"
685
+
686
+
687
+ @json_schema_type
688
+ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
689
+ """Streaming event for incremental text content updates.
690
+
691
+ :param content_index: Index position within the text content
692
+ :param delta: Incremental text content being added
693
+ :param item_id: Unique identifier of the output item being updated
694
+ :param output_index: Index position of the item in the output list
695
+ :param sequence_number: Sequential number for ordering streaming events
696
+ :param type: Event type identifier, always "response.output_text.delta"
697
+ """
698
+
699
+ content_index: int
700
+ delta: str
701
+ item_id: str
702
+ output_index: int
703
+ sequence_number: int
704
+ type: Literal["response.output_text.delta"] = "response.output_text.delta"
705
+
706
+
707
+ @json_schema_type
708
+ class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
709
+ """Streaming event for when text output is completed.
710
+
711
+ :param content_index: Index position within the text content
712
+ :param text: Final complete text content of the output item
713
+ :param item_id: Unique identifier of the completed output item
714
+ :param output_index: Index position of the item in the output list
715
+ :param sequence_number: Sequential number for ordering streaming events
716
+ :param type: Event type identifier, always "response.output_text.done"
717
+ """
718
+
719
+ content_index: int
720
+ text: str # final text of the output item
721
+ item_id: str
722
+ output_index: int
723
+ sequence_number: int
724
+ type: Literal["response.output_text.done"] = "response.output_text.done"
725
+
726
+
727
+ @json_schema_type
728
+ class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel):
729
+ """Streaming event for incremental function call argument updates.
730
+
731
+ :param delta: Incremental function call arguments being added
732
+ :param item_id: Unique identifier of the function call being updated
733
+ :param output_index: Index position of the item in the output list
734
+ :param sequence_number: Sequential number for ordering streaming events
735
+ :param type: Event type identifier, always "response.function_call_arguments.delta"
736
+ """
737
+
738
+ delta: str
739
+ item_id: str
740
+ output_index: int
741
+ sequence_number: int
742
+ type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta"
743
+
744
+
745
+ @json_schema_type
746
+ class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel):
747
+ """Streaming event for when function call arguments are completed.
748
+
749
+ :param arguments: Final complete arguments JSON string for the function call
750
+ :param item_id: Unique identifier of the completed function call
751
+ :param output_index: Index position of the item in the output list
752
+ :param sequence_number: Sequential number for ordering streaming events
753
+ :param type: Event type identifier, always "response.function_call_arguments.done"
754
+ """
755
+
756
+ arguments: str # final arguments of the function call
757
+ item_id: str
758
+ output_index: int
759
+ sequence_number: int
760
+ type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done"
761
+
762
+
763
+ @json_schema_type
764
+ class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel):
765
+ """Streaming event for web search calls in progress.
766
+
767
+ :param item_id: Unique identifier of the web search call
768
+ :param output_index: Index position of the item in the output list
769
+ :param sequence_number: Sequential number for ordering streaming events
770
+ :param type: Event type identifier, always "response.web_search_call.in_progress"
771
+ """
772
+
773
+ item_id: str
774
+ output_index: int
775
+ sequence_number: int
776
+ type: Literal["response.web_search_call.in_progress"] = "response.web_search_call.in_progress"
777
+
778
+
779
+ @json_schema_type
780
+ class OpenAIResponseObjectStreamResponseWebSearchCallSearching(BaseModel):
781
+ item_id: str
782
+ output_index: int
783
+ sequence_number: int
784
+ type: Literal["response.web_search_call.searching"] = "response.web_search_call.searching"
785
+
786
+
787
+ @json_schema_type
788
+ class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel):
789
+ """Streaming event for completed web search calls.
790
+
791
+ :param item_id: Unique identifier of the completed web search call
792
+ :param output_index: Index position of the item in the output list
793
+ :param sequence_number: Sequential number for ordering streaming events
794
+ :param type: Event type identifier, always "response.web_search_call.completed"
795
+ """
796
+
797
+ item_id: str
798
+ output_index: int
799
+ sequence_number: int
800
+ type: Literal["response.web_search_call.completed"] = "response.web_search_call.completed"
801
+
802
+
803
+ @json_schema_type
804
+ class OpenAIResponseObjectStreamResponseMcpListToolsInProgress(BaseModel):
805
+ sequence_number: int
806
+ type: Literal["response.mcp_list_tools.in_progress"] = "response.mcp_list_tools.in_progress"
807
+
808
+
809
+ @json_schema_type
810
+ class OpenAIResponseObjectStreamResponseMcpListToolsFailed(BaseModel):
811
+ sequence_number: int
812
+ type: Literal["response.mcp_list_tools.failed"] = "response.mcp_list_tools.failed"
813
+
814
+
815
+ @json_schema_type
816
+ class OpenAIResponseObjectStreamResponseMcpListToolsCompleted(BaseModel):
817
+ sequence_number: int
818
+ type: Literal["response.mcp_list_tools.completed"] = "response.mcp_list_tools.completed"
819
+
820
+
821
+ @json_schema_type
822
+ class OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta(BaseModel):
823
+ delta: str
824
+ item_id: str
825
+ output_index: int
826
+ sequence_number: int
827
+ type: Literal["response.mcp_call.arguments.delta"] = "response.mcp_call.arguments.delta"
828
+
829
+
830
+ @json_schema_type
831
+ class OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(BaseModel):
832
+ arguments: str # final arguments of the MCP call
833
+ item_id: str
834
+ output_index: int
835
+ sequence_number: int
836
+ type: Literal["response.mcp_call.arguments.done"] = "response.mcp_call.arguments.done"
837
+
838
+
839
+ @json_schema_type
840
+ class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel):
841
+ """Streaming event for MCP calls in progress.
842
+
843
+ :param item_id: Unique identifier of the MCP call
844
+ :param output_index: Index position of the item in the output list
845
+ :param sequence_number: Sequential number for ordering streaming events
846
+ :param type: Event type identifier, always "response.mcp_call.in_progress"
847
+ """
848
+
849
+ item_id: str
850
+ output_index: int
851
+ sequence_number: int
852
+ type: Literal["response.mcp_call.in_progress"] = "response.mcp_call.in_progress"
853
+
854
+
855
+ @json_schema_type
856
+ class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel):
857
+ """Streaming event for failed MCP calls.
858
+
859
+ :param sequence_number: Sequential number for ordering streaming events
860
+ :param type: Event type identifier, always "response.mcp_call.failed"
861
+ """
862
+
863
+ sequence_number: int
864
+ type: Literal["response.mcp_call.failed"] = "response.mcp_call.failed"
865
+
866
+
867
+ @json_schema_type
868
+ class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel):
869
+ """Streaming event for completed MCP calls.
870
+
871
+ :param sequence_number: Sequential number for ordering streaming events
872
+ :param type: Event type identifier, always "response.mcp_call.completed"
873
+ """
874
+
875
+ sequence_number: int
876
+ type: Literal["response.mcp_call.completed"] = "response.mcp_call.completed"
877
+
878
+
879
+ @json_schema_type
880
+ class OpenAIResponseContentPartOutputText(BaseModel):
881
+ """Text content within a streamed response part.
882
+
883
+ :param type: Content part type identifier, always "output_text"
884
+ :param text: Text emitted for this content part
885
+ :param annotations: Structured annotations associated with the text
886
+ :param logprobs: (Optional) Token log probability details
887
+ """
888
+
889
+ type: Literal["output_text"] = "output_text"
890
+ text: str
891
+ annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
892
+ logprobs: list[dict[str, Any]] | None = None
893
+
894
+
895
+ @json_schema_type
896
+ class OpenAIResponseContentPartReasoningText(BaseModel):
897
+ """Reasoning text emitted as part of a streamed response.
898
+
899
+ :param type: Content part type identifier, always "reasoning_text"
900
+ :param text: Reasoning text supplied by the model
901
+ """
902
+
903
+ type: Literal["reasoning_text"] = "reasoning_text"
904
+ text: str
905
+
906
+
907
+ OpenAIResponseContentPart = Annotated[
908
+ OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText,
909
+ Field(discriminator="type"),
910
+ ]
911
+ register_schema(OpenAIResponseContentPart, name="OpenAIResponseContentPart")
912
+
913
+
914
+ @json_schema_type
915
+ class OpenAIResponseObjectStreamResponseContentPartAdded(BaseModel):
916
+ """Streaming event for when a new content part is added to a response item.
917
+
918
+ :param content_index: Index position of the part within the content array
919
+ :param response_id: Unique identifier of the response containing this content
920
+ :param item_id: Unique identifier of the output item containing this content part
921
+ :param output_index: Index position of the output item in the response
922
+ :param part: The content part that was added
923
+ :param sequence_number: Sequential number for ordering streaming events
924
+ :param type: Event type identifier, always "response.content_part.added"
925
+ """
926
+
927
+ content_index: int
928
+ response_id: str
929
+ item_id: str
930
+ output_index: int
931
+ part: OpenAIResponseContentPart
932
+ sequence_number: int
933
+ type: Literal["response.content_part.added"] = "response.content_part.added"
934
+
935
+
936
+ @json_schema_type
937
+ class OpenAIResponseObjectStreamResponseContentPartDone(BaseModel):
938
+ """Streaming event for when a content part is completed.
939
+
940
+ :param content_index: Index position of the part within the content array
941
+ :param response_id: Unique identifier of the response containing this content
942
+ :param item_id: Unique identifier of the output item containing this content part
943
+ :param output_index: Index position of the output item in the response
944
+ :param part: The completed content part
945
+ :param sequence_number: Sequential number for ordering streaming events
946
+ :param type: Event type identifier, always "response.content_part.done"
947
+ """
948
+
949
+ content_index: int
950
+ response_id: str
951
+ item_id: str
952
+ output_index: int
953
+ part: OpenAIResponseContentPart
954
+ sequence_number: int
955
+ type: Literal["response.content_part.done"] = "response.content_part.done"
956
+
957
+
958
+ @json_schema_type
959
+ class OpenAIResponseObjectStreamResponseReasoningTextDelta(BaseModel):
960
+ """Streaming event for incremental reasoning text updates.
961
+
962
+ :param content_index: Index position of the reasoning content part
963
+ :param delta: Incremental reasoning text being added
964
+ :param item_id: Unique identifier of the output item being updated
965
+ :param output_index: Index position of the item in the output list
966
+ :param sequence_number: Sequential number for ordering streaming events
967
+ :param type: Event type identifier, always "response.reasoning_text.delta"
968
+ """
969
+
970
+ content_index: int
971
+ delta: str
972
+ item_id: str
973
+ output_index: int
974
+ sequence_number: int
975
+ type: Literal["response.reasoning_text.delta"] = "response.reasoning_text.delta"
976
+
977
+
978
+ @json_schema_type
979
+ class OpenAIResponseObjectStreamResponseReasoningTextDone(BaseModel):
980
+ """Streaming event for when reasoning text is completed.
981
+
982
+ :param content_index: Index position of the reasoning content part
983
+ :param text: Final complete reasoning text
984
+ :param item_id: Unique identifier of the completed output item
985
+ :param output_index: Index position of the item in the output list
986
+ :param sequence_number: Sequential number for ordering streaming events
987
+ :param type: Event type identifier, always "response.reasoning_text.done"
988
+ """
989
+
990
+ content_index: int
991
+ text: str
992
+ item_id: str
993
+ output_index: int
994
+ sequence_number: int
995
+ type: Literal["response.reasoning_text.done"] = "response.reasoning_text.done"
996
+
997
+
998
+ @json_schema_type
999
+ class OpenAIResponseContentPartReasoningSummary(BaseModel):
1000
+ """Reasoning summary part in a streamed response.
1001
+
1002
+ :param type: Content part type identifier, always "summary_text"
1003
+ :param text: Summary text
1004
+ """
1005
+
1006
+ type: Literal["summary_text"] = "summary_text"
1007
+ text: str
1008
+
1009
+
1010
+ @json_schema_type
1011
+ class OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded(BaseModel):
1012
+ """Streaming event for when a new reasoning summary part is added.
1013
+
1014
+ :param item_id: Unique identifier of the output item
1015
+ :param output_index: Index position of the output item
1016
+ :param part: The summary part that was added
1017
+ :param sequence_number: Sequential number for ordering streaming events
1018
+ :param summary_index: Index of the summary part within the reasoning summary
1019
+ :param type: Event type identifier, always "response.reasoning_summary_part.added"
1020
+ """
1021
+
1022
+ item_id: str
1023
+ output_index: int
1024
+ part: OpenAIResponseContentPartReasoningSummary
1025
+ sequence_number: int
1026
+ summary_index: int
1027
+ type: Literal["response.reasoning_summary_part.added"] = "response.reasoning_summary_part.added"
1028
+
1029
+
1030
+ @json_schema_type
1031
+ class OpenAIResponseObjectStreamResponseReasoningSummaryPartDone(BaseModel):
1032
+ """Streaming event for when a reasoning summary part is completed.
1033
+
1034
+ :param item_id: Unique identifier of the output item
1035
+ :param output_index: Index position of the output item
1036
+ :param part: The completed summary part
1037
+ :param sequence_number: Sequential number for ordering streaming events
1038
+ :param summary_index: Index of the summary part within the reasoning summary
1039
+ :param type: Event type identifier, always "response.reasoning_summary_part.done"
1040
+ """
1041
+
1042
+ item_id: str
1043
+ output_index: int
1044
+ part: OpenAIResponseContentPartReasoningSummary
1045
+ sequence_number: int
1046
+ summary_index: int
1047
+ type: Literal["response.reasoning_summary_part.done"] = "response.reasoning_summary_part.done"
1048
+
1049
+
1050
+ @json_schema_type
1051
+ class OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta(BaseModel):
1052
+ """Streaming event for incremental reasoning summary text updates.
1053
+
1054
+ :param delta: Incremental summary text being added
1055
+ :param item_id: Unique identifier of the output item
1056
+ :param output_index: Index position of the output item
1057
+ :param sequence_number: Sequential number for ordering streaming events
1058
+ :param summary_index: Index of the summary part within the reasoning summary
1059
+ :param type: Event type identifier, always "response.reasoning_summary_text.delta"
1060
+ """
1061
+
1062
+ delta: str
1063
+ item_id: str
1064
+ output_index: int
1065
+ sequence_number: int
1066
+ summary_index: int
1067
+ type: Literal["response.reasoning_summary_text.delta"] = "response.reasoning_summary_text.delta"
1068
+
1069
+
1070
+ @json_schema_type
1071
+ class OpenAIResponseObjectStreamResponseReasoningSummaryTextDone(BaseModel):
1072
+ """Streaming event for when reasoning summary text is completed.
1073
+
1074
+ :param text: Final complete summary text
1075
+ :param item_id: Unique identifier of the output item
1076
+ :param output_index: Index position of the output item
1077
+ :param sequence_number: Sequential number for ordering streaming events
1078
+ :param summary_index: Index of the summary part within the reasoning summary
1079
+ :param type: Event type identifier, always "response.reasoning_summary_text.done"
1080
+ """
1081
+
1082
+ text: str
1083
+ item_id: str
1084
+ output_index: int
1085
+ sequence_number: int
1086
+ summary_index: int
1087
+ type: Literal["response.reasoning_summary_text.done"] = "response.reasoning_summary_text.done"
1088
+
1089
+
1090
+ @json_schema_type
1091
+ class OpenAIResponseObjectStreamResponseRefusalDelta(BaseModel):
1092
+ """Streaming event for incremental refusal text updates.
1093
+
1094
+ :param content_index: Index position of the content part
1095
+ :param delta: Incremental refusal text being added
1096
+ :param item_id: Unique identifier of the output item
1097
+ :param output_index: Index position of the item in the output list
1098
+ :param sequence_number: Sequential number for ordering streaming events
1099
+ :param type: Event type identifier, always "response.refusal.delta"
1100
+ """
1101
+
1102
+ content_index: int
1103
+ delta: str
1104
+ item_id: str
1105
+ output_index: int
1106
+ sequence_number: int
1107
+ type: Literal["response.refusal.delta"] = "response.refusal.delta"
1108
+
1109
+
1110
+ @json_schema_type
1111
+ class OpenAIResponseObjectStreamResponseRefusalDone(BaseModel):
1112
+ """Streaming event for when refusal text is completed.
1113
+
1114
+ :param content_index: Index position of the content part
1115
+ :param refusal: Final complete refusal text
1116
+ :param item_id: Unique identifier of the output item
1117
+ :param output_index: Index position of the item in the output list
1118
+ :param sequence_number: Sequential number for ordering streaming events
1119
+ :param type: Event type identifier, always "response.refusal.done"
1120
+ """
1121
+
1122
+ content_index: int
1123
+ refusal: str
1124
+ item_id: str
1125
+ output_index: int
1126
+ sequence_number: int
1127
+ type: Literal["response.refusal.done"] = "response.refusal.done"
1128
+
1129
+
1130
+ @json_schema_type
1131
+ class OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded(BaseModel):
1132
+ """Streaming event for when an annotation is added to output text.
1133
+
1134
+ :param item_id: Unique identifier of the item to which the annotation is being added
1135
+ :param output_index: Index position of the output item in the response's output array
1136
+ :param content_index: Index position of the content part within the output item
1137
+ :param annotation_index: Index of the annotation within the content part
1138
+ :param annotation: The annotation object being added
1139
+ :param sequence_number: Sequential number for ordering streaming events
1140
+ :param type: Event type identifier, always "response.output_text.annotation.added"
1141
+ """
1142
+
1143
+ item_id: str
1144
+ output_index: int
1145
+ content_index: int
1146
+ annotation_index: int
1147
+ annotation: OpenAIResponseAnnotations
1148
+ sequence_number: int
1149
+ type: Literal["response.output_text.annotation.added"] = "response.output_text.annotation.added"
1150
+
1151
+
1152
+ @json_schema_type
1153
+ class OpenAIResponseObjectStreamResponseFileSearchCallInProgress(BaseModel):
1154
+ """Streaming event for file search calls in progress.
1155
+
1156
+ :param item_id: Unique identifier of the file search call
1157
+ :param output_index: Index position of the item in the output list
1158
+ :param sequence_number: Sequential number for ordering streaming events
1159
+ :param type: Event type identifier, always "response.file_search_call.in_progress"
1160
+ """
1161
+
1162
+ item_id: str
1163
+ output_index: int
1164
+ sequence_number: int
1165
+ type: Literal["response.file_search_call.in_progress"] = "response.file_search_call.in_progress"
1166
+
1167
+
1168
+ @json_schema_type
1169
+ class OpenAIResponseObjectStreamResponseFileSearchCallSearching(BaseModel):
1170
+ """Streaming event for file search currently searching.
1171
+
1172
+ :param item_id: Unique identifier of the file search call
1173
+ :param output_index: Index position of the item in the output list
1174
+ :param sequence_number: Sequential number for ordering streaming events
1175
+ :param type: Event type identifier, always "response.file_search_call.searching"
1176
+ """
1177
+
1178
+ item_id: str
1179
+ output_index: int
1180
+ sequence_number: int
1181
+ type: Literal["response.file_search_call.searching"] = "response.file_search_call.searching"
1182
+
1183
+
1184
+ @json_schema_type
1185
+ class OpenAIResponseObjectStreamResponseFileSearchCallCompleted(BaseModel):
1186
+ """Streaming event for completed file search calls.
1187
+
1188
+ :param item_id: Unique identifier of the completed file search call
1189
+ :param output_index: Index position of the item in the output list
1190
+ :param sequence_number: Sequential number for ordering streaming events
1191
+ :param type: Event type identifier, always "response.file_search_call.completed"
1192
+ """
1193
+
1194
+ item_id: str
1195
+ output_index: int
1196
+ sequence_number: int
1197
+ type: Literal["response.file_search_call.completed"] = "response.file_search_call.completed"
1198
+
1199
+
1200
+ OpenAIResponseObjectStream = Annotated[
1201
+ OpenAIResponseObjectStreamResponseCreated
1202
+ | OpenAIResponseObjectStreamResponseInProgress
1203
+ | OpenAIResponseObjectStreamResponseOutputItemAdded
1204
+ | OpenAIResponseObjectStreamResponseOutputItemDone
1205
+ | OpenAIResponseObjectStreamResponseOutputTextDelta
1206
+ | OpenAIResponseObjectStreamResponseOutputTextDone
1207
+ | OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
1208
+ | OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
1209
+ | OpenAIResponseObjectStreamResponseWebSearchCallInProgress
1210
+ | OpenAIResponseObjectStreamResponseWebSearchCallSearching
1211
+ | OpenAIResponseObjectStreamResponseWebSearchCallCompleted
1212
+ | OpenAIResponseObjectStreamResponseMcpListToolsInProgress
1213
+ | OpenAIResponseObjectStreamResponseMcpListToolsFailed
1214
+ | OpenAIResponseObjectStreamResponseMcpListToolsCompleted
1215
+ | OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
1216
+ | OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
1217
+ | OpenAIResponseObjectStreamResponseMcpCallInProgress
1218
+ | OpenAIResponseObjectStreamResponseMcpCallFailed
1219
+ | OpenAIResponseObjectStreamResponseMcpCallCompleted
1220
+ | OpenAIResponseObjectStreamResponseContentPartAdded
1221
+ | OpenAIResponseObjectStreamResponseContentPartDone
1222
+ | OpenAIResponseObjectStreamResponseReasoningTextDelta
1223
+ | OpenAIResponseObjectStreamResponseReasoningTextDone
1224
+ | OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded
1225
+ | OpenAIResponseObjectStreamResponseReasoningSummaryPartDone
1226
+ | OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta
1227
+ | OpenAIResponseObjectStreamResponseReasoningSummaryTextDone
1228
+ | OpenAIResponseObjectStreamResponseRefusalDelta
1229
+ | OpenAIResponseObjectStreamResponseRefusalDone
1230
+ | OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded
1231
+ | OpenAIResponseObjectStreamResponseFileSearchCallInProgress
1232
+ | OpenAIResponseObjectStreamResponseFileSearchCallSearching
1233
+ | OpenAIResponseObjectStreamResponseFileSearchCallCompleted
1234
+ | OpenAIResponseObjectStreamResponseIncomplete
1235
+ | OpenAIResponseObjectStreamResponseFailed
1236
+ | OpenAIResponseObjectStreamResponseCompleted,
1237
+ Field(discriminator="type"),
1238
+ ]
1239
+ register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
1240
+
1241
+
1242
+ @json_schema_type
1243
+ class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
1244
+ """
1245
+ This represents the output of a function call that gets passed back to the model.
1246
+ """
1247
+
1248
+ call_id: str
1249
+ output: str
1250
+ type: Literal["function_call_output"] = "function_call_output"
1251
+ id: str | None = None
1252
+ status: str | None = None
1253
+
1254
+
1255
+ OpenAIResponseInput = Annotated[
1256
+ # Responses API allows output messages to be passed in as input
1257
+ OpenAIResponseOutputMessageWebSearchToolCall
1258
+ | OpenAIResponseOutputMessageFileSearchToolCall
1259
+ | OpenAIResponseOutputMessageFunctionToolCall
1260
+ | OpenAIResponseInputFunctionToolCallOutput
1261
+ | OpenAIResponseMCPApprovalRequest
1262
+ | OpenAIResponseMCPApprovalResponse
1263
+ | OpenAIResponseOutputMessageMCPCall
1264
+ | OpenAIResponseOutputMessageMCPListTools
1265
+ | OpenAIResponseMessage,
1266
+ Field(union_mode="left_to_right"),
1267
+ ]
1268
+ register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
1269
+
1270
+
1271
+ class ListOpenAIResponseInputItem(BaseModel):
1272
+ """List container for OpenAI response input items.
1273
+
1274
+ :param data: List of input items
1275
+ :param object: Object type identifier, always "list"
1276
+ """
1277
+
1278
+ data: list[OpenAIResponseInput]
1279
+ object: Literal["list"] = "list"
1280
+
1281
+
1282
+ @json_schema_type
1283
+ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
1284
+ """OpenAI response object extended with input context information.
1285
+
1286
+ :param input: List of input items that led to this response
1287
+ """
1288
+
1289
+ input: list[OpenAIResponseInput]
1290
+
1291
+ def to_response_object(self) -> OpenAIResponseObject:
1292
+ """Convert to OpenAIResponseObject by excluding input field."""
1293
+ return OpenAIResponseObject(**{k: v for k, v in self.model_dump().items() if k != "input"})
1294
+
1295
+
1296
+ @json_schema_type
1297
+ class ListOpenAIResponseObject(BaseModel):
1298
+ """Paginated list of OpenAI response objects with navigation metadata.
1299
+
1300
+ :param data: List of response objects with their input context
1301
+ :param has_more: Whether there are more results available beyond this page
1302
+ :param first_id: Identifier of the first item in this page
1303
+ :param last_id: Identifier of the last item in this page
1304
+ :param object: Object type identifier, always "list"
1305
+ """
1306
+
1307
+ data: list[OpenAIResponseObjectWithInput]
1308
+ has_more: bool
1309
+ first_id: str
1310
+ last_id: str
1311
+ object: Literal["list"] = "list"