llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +235 -62
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
  169. llama_stack/providers/registry/agents.py +8 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/providers/utils/vector_io/__init__.py +16 -0
  284. llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
  285. llama_stack/telemetry/constants.py +27 -0
  286. llama_stack/telemetry/helpers.py +43 -0
  287. llama_stack/testing/api_recorder.py +25 -16
  288. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
  289. llama_stack-0.4.1.dist-info/RECORD +588 -0
  290. llama_stack-0.4.1.dist-info/top_level.txt +2 -0
  291. llama_stack_api/__init__.py +945 -0
  292. llama_stack_api/admin/__init__.py +45 -0
  293. llama_stack_api/admin/api.py +72 -0
  294. llama_stack_api/admin/fastapi_routes.py +117 -0
  295. llama_stack_api/admin/models.py +113 -0
  296. llama_stack_api/agents.py +173 -0
  297. llama_stack_api/batches/__init__.py +40 -0
  298. llama_stack_api/batches/api.py +53 -0
  299. llama_stack_api/batches/fastapi_routes.py +113 -0
  300. llama_stack_api/batches/models.py +78 -0
  301. llama_stack_api/benchmarks/__init__.py +43 -0
  302. llama_stack_api/benchmarks/api.py +39 -0
  303. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  304. llama_stack_api/benchmarks/models.py +109 -0
  305. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  306. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  307. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  308. llama_stack_api/common/responses.py +77 -0
  309. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  310. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  311. llama_stack_api/connectors.py +146 -0
  312. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  313. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  314. llama_stack_api/datasets/__init__.py +61 -0
  315. llama_stack_api/datasets/api.py +35 -0
  316. llama_stack_api/datasets/fastapi_routes.py +104 -0
  317. llama_stack_api/datasets/models.py +152 -0
  318. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  319. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  320. llama_stack_api/file_processors/__init__.py +27 -0
  321. llama_stack_api/file_processors/api.py +64 -0
  322. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  323. llama_stack_api/file_processors/models.py +42 -0
  324. llama_stack_api/files/__init__.py +35 -0
  325. llama_stack_api/files/api.py +51 -0
  326. llama_stack_api/files/fastapi_routes.py +124 -0
  327. llama_stack_api/files/models.py +107 -0
  328. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  329. llama_stack_api/inspect_api/__init__.py +37 -0
  330. llama_stack_api/inspect_api/api.py +25 -0
  331. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  332. llama_stack_api/inspect_api/models.py +28 -0
  333. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  334. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  335. llama_stack_api/internal/sqlstore.py +79 -0
  336. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  337. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  338. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  339. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  340. llama_stack_api/providers/__init__.py +33 -0
  341. llama_stack_api/providers/api.py +16 -0
  342. llama_stack_api/providers/fastapi_routes.py +57 -0
  343. llama_stack_api/providers/models.py +24 -0
  344. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  345. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  346. llama_stack_api/router_utils.py +160 -0
  347. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  348. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  349. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  350. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  351. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  352. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  353. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  354. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  355. llama_stack/apis/agents/agents.py +0 -894
  356. llama_stack/apis/batches/__init__.py +0 -9
  357. llama_stack/apis/batches/batches.py +0 -100
  358. llama_stack/apis/benchmarks/__init__.py +0 -7
  359. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  360. llama_stack/apis/common/responses.py +0 -36
  361. llama_stack/apis/conversations/__init__.py +0 -31
  362. llama_stack/apis/datasets/datasets.py +0 -251
  363. llama_stack/apis/datatypes.py +0 -160
  364. llama_stack/apis/eval/__init__.py +0 -7
  365. llama_stack/apis/files/__init__.py +0 -7
  366. llama_stack/apis/files/files.py +0 -199
  367. llama_stack/apis/inference/__init__.py +0 -7
  368. llama_stack/apis/inference/event_logger.py +0 -43
  369. llama_stack/apis/inspect/__init__.py +0 -7
  370. llama_stack/apis/inspect/inspect.py +0 -94
  371. llama_stack/apis/models/__init__.py +0 -7
  372. llama_stack/apis/post_training/__init__.py +0 -7
  373. llama_stack/apis/prompts/__init__.py +0 -9
  374. llama_stack/apis/providers/__init__.py +0 -7
  375. llama_stack/apis/providers/providers.py +0 -69
  376. llama_stack/apis/safety/__init__.py +0 -7
  377. llama_stack/apis/scoring/__init__.py +0 -7
  378. llama_stack/apis/scoring_functions/__init__.py +0 -7
  379. llama_stack/apis/shields/__init__.py +0 -7
  380. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  381. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  382. llama_stack/apis/telemetry/__init__.py +0 -7
  383. llama_stack/apis/telemetry/telemetry.py +0 -423
  384. llama_stack/apis/tools/__init__.py +0 -8
  385. llama_stack/apis/vector_io/__init__.py +0 -7
  386. llama_stack/apis/vector_stores/__init__.py +0 -7
  387. llama_stack/core/server/tracing.py +0 -80
  388. llama_stack/core/ui/app.py +0 -55
  389. llama_stack/core/ui/modules/__init__.py +0 -5
  390. llama_stack/core/ui/modules/api.py +0 -32
  391. llama_stack/core/ui/modules/utils.py +0 -42
  392. llama_stack/core/ui/page/__init__.py +0 -5
  393. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  394. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  395. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  396. llama_stack/core/ui/page/distribution/models.py +0 -18
  397. llama_stack/core/ui/page/distribution/providers.py +0 -27
  398. llama_stack/core/ui/page/distribution/resources.py +0 -48
  399. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  400. llama_stack/core/ui/page/distribution/shields.py +0 -19
  401. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  402. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  403. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  404. llama_stack/core/ui/page/playground/__init__.py +0 -5
  405. llama_stack/core/ui/page/playground/chat.py +0 -130
  406. llama_stack/core/ui/page/playground/tools.py +0 -352
  407. llama_stack/distributions/dell/build.yaml +0 -33
  408. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  409. llama_stack/distributions/nvidia/build.yaml +0 -29
  410. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  411. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  412. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  413. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  414. llama_stack/distributions/starter/build.yaml +0 -61
  415. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  416. llama_stack/distributions/watsonx/build.yaml +0 -33
  417. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  418. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  419. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  420. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  421. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  422. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  423. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  424. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  425. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  426. llama_stack/providers/utils/sqlstore/api.py +0 -128
  427. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  428. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  429. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  430. llama_stack/strong_typing/__init__.py +0 -19
  431. llama_stack/strong_typing/auxiliary.py +0 -228
  432. llama_stack/strong_typing/classdef.py +0 -440
  433. llama_stack/strong_typing/core.py +0 -46
  434. llama_stack/strong_typing/deserializer.py +0 -877
  435. llama_stack/strong_typing/docstring.py +0 -409
  436. llama_stack/strong_typing/exception.py +0 -23
  437. llama_stack/strong_typing/inspection.py +0 -1085
  438. llama_stack/strong_typing/mapping.py +0 -40
  439. llama_stack/strong_typing/name.py +0 -182
  440. llama_stack/strong_typing/schema.py +0 -792
  441. llama_stack/strong_typing/serialization.py +0 -97
  442. llama_stack/strong_typing/serializer.py +0 -500
  443. llama_stack/strong_typing/slots.py +0 -27
  444. llama_stack/strong_typing/topological.py +0 -89
  445. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  446. llama_stack-0.3.5.dist-info/RECORD +0 -625
  447. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  448. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  451. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  452. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  453. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  454. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
  456. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
  457. {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
  458. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  459. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  460. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -0,0 +1,162 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+
8
+ from collections.abc import Iterable
9
+ from typing import Any
10
+
11
+ import httpx
12
+ import oci
13
+ from oci.generative_ai.generative_ai_client import GenerativeAiClient
14
+ from oci.generative_ai.models import ModelCollection
15
+ from openai._base_client import DefaultAsyncHttpxClient
16
+
17
+ from llama_stack.log import get_logger
18
+ from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
19
+ from llama_stack.providers.remote.inference.oci.config import OCIConfig
20
+ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
21
+ from llama_stack_api import Model, ModelType
22
+
23
+ logger = get_logger(name=__name__, category="inference::oci")
24
+
25
+ OCI_AUTH_TYPE_INSTANCE_PRINCIPAL = "instance_principal"
26
+ OCI_AUTH_TYPE_CONFIG_FILE = "config_file"
27
+ VALID_OCI_AUTH_TYPES = [OCI_AUTH_TYPE_INSTANCE_PRINCIPAL, OCI_AUTH_TYPE_CONFIG_FILE]
28
+ DEFAULT_OCI_REGION = "us-ashburn-1"
29
+
30
+ MODEL_CAPABILITIES = ["TEXT_GENERATION", "TEXT_SUMMARIZATION", "TEXT_EMBEDDINGS", "CHAT"]
31
+
32
+
33
+ class OCIInferenceAdapter(OpenAIMixin):
34
+ config: OCIConfig
35
+
36
+ embedding_models: list[str] = []
37
+
38
+ async def initialize(self) -> None:
39
+ """Initialize and validate OCI configuration."""
40
+ if self.config.oci_auth_type not in VALID_OCI_AUTH_TYPES:
41
+ raise ValueError(
42
+ f"Invalid OCI authentication type: {self.config.oci_auth_type}."
43
+ f"Valid types are one of: {VALID_OCI_AUTH_TYPES}"
44
+ )
45
+
46
+ if not self.config.oci_compartment_id:
47
+ raise ValueError("OCI_COMPARTMENT_OCID is a required parameter. Either set in env variable or config.")
48
+
49
+ def get_base_url(self) -> str:
50
+ region = self.config.oci_region or DEFAULT_OCI_REGION
51
+ return f"https://inference.generativeai.{region}.oci.oraclecloud.com/20231130/actions/v1"
52
+
53
+ def get_api_key(self) -> str | None:
54
+ # OCI doesn't use API keys, it uses request signing
55
+ return "<NOTUSED>"
56
+
57
+ def get_extra_client_params(self) -> dict[str, Any]:
58
+ """
59
+ Get extra parameters for the AsyncOpenAI client, including OCI-specific auth and headers.
60
+ """
61
+ auth = self._get_auth()
62
+ compartment_id = self.config.oci_compartment_id or ""
63
+
64
+ return {
65
+ "http_client": DefaultAsyncHttpxClient(
66
+ auth=auth,
67
+ headers={
68
+ "CompartmentId": compartment_id,
69
+ },
70
+ ),
71
+ }
72
+
73
+ def _get_oci_signer(self) -> oci.signer.AbstractBaseSigner | None:
74
+ if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
75
+ return oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
76
+ return None
77
+
78
+ def _get_oci_config(self) -> dict:
79
+ if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
80
+ config = {"region": self.config.oci_region}
81
+ elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE:
82
+ config = oci.config.from_file(self.config.oci_config_file_path, self.config.oci_config_profile)
83
+ if not config.get("region"):
84
+ raise ValueError(
85
+ "Region not specified in config. Please specify in config or with OCI_REGION env variable."
86
+ )
87
+
88
+ return config
89
+
90
+ def _get_auth(self) -> httpx.Auth:
91
+ if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
92
+ return OciInstancePrincipalAuth()
93
+ elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE:
94
+ return OciUserPrincipalAuth(
95
+ config_file=self.config.oci_config_file_path, profile_name=self.config.oci_config_profile
96
+ )
97
+ else:
98
+ raise ValueError(f"Invalid OCI authentication type: {self.config.oci_auth_type}")
99
+
100
+ async def list_provider_model_ids(self) -> Iterable[str]:
101
+ """
102
+ List available models from OCI Generative AI service.
103
+ """
104
+ oci_config = self._get_oci_config()
105
+ oci_signer = self._get_oci_signer()
106
+ compartment_id = self.config.oci_compartment_id or ""
107
+
108
+ if oci_signer is None:
109
+ client = GenerativeAiClient(config=oci_config)
110
+ else:
111
+ client = GenerativeAiClient(config=oci_config, signer=oci_signer)
112
+
113
+ models: ModelCollection = client.list_models(
114
+ compartment_id=compartment_id,
115
+ # capability=MODEL_CAPABILITIES,
116
+ lifecycle_state="ACTIVE",
117
+ ).data
118
+
119
+ seen_models = set()
120
+ model_ids = []
121
+ for model in models.items:
122
+ if model.time_deprecated or model.time_on_demand_retired:
123
+ continue
124
+
125
+ if "UNKNOWN_ENUM_VALUE" in model.capabilities or "FINE_TUNE" in model.capabilities:
126
+ continue
127
+
128
+ # Use display_name + model_type as the key to avoid conflicts
129
+ model_key = (model.display_name, ModelType.llm)
130
+ if model_key in seen_models:
131
+ continue
132
+
133
+ seen_models.add(model_key)
134
+ model_ids.append(model.display_name)
135
+
136
+ if "TEXT_EMBEDDINGS" in model.capabilities:
137
+ self.embedding_models.append(model.display_name)
138
+
139
+ return model_ids
140
+
141
+ def construct_model_from_identifier(self, identifier: str) -> Model:
142
+ """
143
+ Construct a Model instance corresponding to the given identifier
144
+
145
+ Child classes can override this to customize model typing/metadata.
146
+
147
+ :param identifier: The provider's model identifier
148
+ :return: A Model instance
149
+ """
150
+ if identifier in self.embedding_models:
151
+ return Model(
152
+ provider_id=self.__provider_id__, # type: ignore[attr-defined]
153
+ provider_resource_id=identifier,
154
+ identifier=identifier,
155
+ model_type=ModelType.embedding,
156
+ )
157
+ return Model(
158
+ provider_id=self.__provider_id__, # type: ignore[attr-defined]
159
+ provider_resource_id=identifier,
160
+ identifier=identifier,
161
+ model_type=ModelType.llm,
162
+ )
@@ -6,20 +6,22 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from pydantic import Field, SecretStr
9
+ from pydantic import Field, HttpUrl, SecretStr
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
12
 
13
- DEFAULT_OLLAMA_URL = "http://localhost:11434"
13
+ DEFAULT_OLLAMA_URL = "http://localhost:11434/v1"
14
14
 
15
15
 
16
16
  class OllamaImplConfig(RemoteInferenceProviderConfig):
17
17
  auth_credential: SecretStr | None = Field(default=None, exclude=True)
18
18
 
19
- url: str = DEFAULT_OLLAMA_URL
19
+ base_url: HttpUrl | None = Field(default=HttpUrl(DEFAULT_OLLAMA_URL))
20
20
 
21
21
  @classmethod
22
- def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
22
+ def sample_run_config(
23
+ cls, base_url: str = "${env.OLLAMA_URL:=http://localhost:11434/v1}", **kwargs
24
+ ) -> dict[str, Any]:
23
25
  return {
24
- "url": url,
26
+ "base_url": base_url,
25
27
  }
@@ -9,15 +9,15 @@ import asyncio
9
9
 
10
10
  from ollama import AsyncClient as AsyncOllamaClient
11
11
 
12
- from llama_stack.apis.common.errors import UnsupportedModelError
13
- from llama_stack.apis.models import Model
14
12
  from llama_stack.log import get_logger
15
- from llama_stack.providers.datatypes import (
13
+ from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
14
+ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
15
+ from llama_stack_api import (
16
16
  HealthResponse,
17
17
  HealthStatus,
18
+ Model,
19
+ UnsupportedModelError,
18
20
  )
19
- from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
20
- from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
21
21
 
22
22
  logger = get_logger(name=__name__, category="inference::ollama")
23
23
 
@@ -28,6 +28,9 @@ class OllamaInferenceAdapter(OpenAIMixin):
28
28
  # automatically set by the resolver when instantiating the provider
29
29
  __provider_id__: str
30
30
 
31
+ # Ollama does not support the stream_options parameter
32
+ supports_stream_options: bool = False
33
+
31
34
  embedding_model_metadata: dict[str, dict[str, int]] = {
32
35
  "all-minilm:l6-v2": {
33
36
  "embedding_dimension": 384,
@@ -55,17 +58,23 @@ class OllamaInferenceAdapter(OpenAIMixin):
55
58
  # ollama client attaches itself to the current event loop (sadly?)
56
59
  loop = asyncio.get_running_loop()
57
60
  if loop not in self._clients:
58
- self._clients[loop] = AsyncOllamaClient(host=self.config.url)
61
+ # Ollama client expects base URL without /v1 suffix
62
+ base_url_str = str(self.config.base_url)
63
+ if base_url_str.endswith("/v1"):
64
+ host = base_url_str[:-3]
65
+ else:
66
+ host = base_url_str
67
+ self._clients[loop] = AsyncOllamaClient(host=host)
59
68
  return self._clients[loop]
60
69
 
61
70
  def get_api_key(self):
62
71
  return "NO KEY REQUIRED"
63
72
 
64
73
  def get_base_url(self):
65
- return self.config.url.rstrip("/") + "/v1"
74
+ return str(self.config.base_url)
66
75
 
67
76
  async def initialize(self) -> None:
68
- logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
77
+ logger.info(f"checking connectivity to Ollama at `{self.config.base_url}`...")
69
78
  r = await self.health()
70
79
  if r["status"] == HealthStatus.ERROR:
71
80
  logger.warning(
@@ -6,10 +6,10 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from pydantic import BaseModel, Field
9
+ from pydantic import BaseModel, Field, HttpUrl
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  class OpenAIProviderDataValidator(BaseModel):
@@ -21,8 +21,8 @@ class OpenAIProviderDataValidator(BaseModel):
21
21
 
22
22
  @json_schema_type
23
23
  class OpenAIConfig(RemoteInferenceProviderConfig):
24
- base_url: str = Field(
25
- default="https://api.openai.com/v1",
24
+ base_url: HttpUrl | None = Field(
25
+ default=HttpUrl("https://api.openai.com/v1"),
26
26
  description="Base URL for OpenAI API",
27
27
  )
28
28
 
@@ -35,4 +35,4 @@ class OpenAIInferenceAdapter(OpenAIMixin):
35
35
 
36
36
  Returns the OpenAI API base URL from the configuration.
37
37
  """
38
- return self.config.base_url
38
+ return str(self.config.base_url)
@@ -10,8 +10,8 @@ from .config import PassthroughImplConfig
10
10
 
11
11
 
12
12
  class PassthroughProviderDataValidator(BaseModel):
13
- url: str
14
- api_key: str
13
+ passthrough_url: str
14
+ passthrough_api_key: str
15
15
 
16
16
 
17
17
  async def get_adapter_impl(config: PassthroughImplConfig, _deps):
@@ -6,29 +6,24 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from pydantic import Field, SecretStr
9
+ from pydantic import Field, HttpUrl
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
16
16
  class PassthroughImplConfig(RemoteInferenceProviderConfig):
17
- url: str = Field(
17
+ base_url: HttpUrl | None = Field(
18
18
  default=None,
19
19
  description="The URL for the passthrough endpoint",
20
20
  )
21
21
 
22
- api_key: SecretStr | None = Field(
23
- default=None,
24
- description="API Key for the passthrouth endpoint",
25
- )
26
-
27
22
  @classmethod
28
23
  def sample_run_config(
29
- cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
24
+ cls, base_url: HttpUrl | None = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
30
25
  ) -> dict[str, Any]:
31
26
  return {
32
- "url": url,
27
+ "base_url": base_url,
33
28
  "api_key": api_key,
34
29
  }
@@ -5,12 +5,14 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
  from collections.abc import AsyncIterator
8
- from typing import Any
9
8
 
10
- from llama_stack_client import AsyncLlamaStackClient
9
+ from openai import AsyncOpenAI
11
10
 
12
- from llama_stack.apis.inference import (
11
+ from llama_stack.core.request_headers import NeedsRequestProviderData
12
+ from llama_stack.providers.utils.inference.stream_utils import wrap_async_stream
13
+ from llama_stack_api import (
13
14
  Inference,
15
+ Model,
14
16
  OpenAIChatCompletion,
15
17
  OpenAIChatCompletionChunk,
16
18
  OpenAIChatCompletionRequestWithExtraBody,
@@ -19,104 +21,124 @@ from llama_stack.apis.inference import (
19
21
  OpenAIEmbeddingsRequestWithExtraBody,
20
22
  OpenAIEmbeddingsResponse,
21
23
  )
22
- from llama_stack.apis.models import Model
23
- from llama_stack.core.library_client import convert_pydantic_to_json_value
24
- from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
25
24
 
26
25
  from .config import PassthroughImplConfig
27
26
 
28
27
 
29
- class PassthroughInferenceAdapter(Inference):
28
+ class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
30
29
  def __init__(self, config: PassthroughImplConfig) -> None:
31
- ModelRegistryHelper.__init__(self)
32
30
  self.config = config
33
31
 
32
+ async def initialize(self) -> None:
33
+ pass
34
+
35
+ async def shutdown(self) -> None:
36
+ pass
37
+
34
38
  async def unregister_model(self, model_id: str) -> None:
35
39
  pass
36
40
 
37
41
  async def register_model(self, model: Model) -> Model:
38
42
  return model
39
43
 
40
- def _get_client(self) -> AsyncLlamaStackClient:
41
- passthrough_url = None
42
- passthrough_api_key = None
43
- provider_data = None
44
-
45
- if self.config.url is not None:
46
- passthrough_url = self.config.url
47
- else:
48
- provider_data = self.get_request_provider_data()
49
- if provider_data is None or not provider_data.passthrough_url:
50
- raise ValueError(
51
- 'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
52
- )
53
- passthrough_url = provider_data.passthrough_url
54
-
55
- if self.config.api_key is not None:
56
- passthrough_api_key = self.config.api_key.get_secret_value()
57
- else:
58
- provider_data = self.get_request_provider_data()
59
- if provider_data is None or not provider_data.passthrough_api_key:
60
- raise ValueError(
61
- 'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
62
- )
63
- passthrough_api_key = provider_data.passthrough_api_key
64
-
65
- return AsyncLlamaStackClient(
66
- base_url=passthrough_url,
67
- api_key=passthrough_api_key,
68
- provider_data=provider_data,
44
+ async def list_models(self) -> list[Model]:
45
+ """List models by calling the downstream /v1/models endpoint."""
46
+ client = self._get_openai_client()
47
+
48
+ response = await client.models.list()
49
+
50
+ # Convert from OpenAI format to Llama Stack Model format
51
+ models = []
52
+ for model_data in response.data:
53
+ downstream_model_id = model_data.id
54
+ custom_metadata = getattr(model_data, "custom_metadata", {}) or {}
55
+
56
+ # Prefix identifier with provider ID for local registry
57
+ local_identifier = f"{self.__provider_id__}/{downstream_model_id}"
58
+
59
+ model = Model(
60
+ identifier=local_identifier,
61
+ provider_id=self.__provider_id__,
62
+ provider_resource_id=downstream_model_id,
63
+ model_type=custom_metadata.get("model_type", "llm"),
64
+ metadata=custom_metadata,
65
+ )
66
+ models.append(model)
67
+
68
+ return models
69
+
70
+ async def should_refresh_models(self) -> bool:
71
+ """Passthrough should refresh models since they come from downstream dynamically."""
72
+ return self.config.refresh_models
73
+
74
+ def _get_openai_client(self) -> AsyncOpenAI:
75
+ """Get an AsyncOpenAI client configured for the downstream server."""
76
+ base_url = self._get_passthrough_url()
77
+ api_key = self._get_passthrough_api_key()
78
+
79
+ return AsyncOpenAI(
80
+ base_url=f"{base_url.rstrip('/')}/v1",
81
+ api_key=api_key,
69
82
  )
70
83
 
71
- async def openai_embeddings(
72
- self,
73
- params: OpenAIEmbeddingsRequestWithExtraBody,
74
- ) -> OpenAIEmbeddingsResponse:
75
- raise NotImplementedError()
84
+ def _get_passthrough_url(self) -> str:
85
+ """Get the passthrough URL from config or provider data."""
86
+ if self.config.base_url is not None:
87
+ return str(self.config.base_url)
88
+
89
+ provider_data = self.get_request_provider_data()
90
+ if provider_data is None:
91
+ raise ValueError(
92
+ 'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
93
+ )
94
+ return provider_data.passthrough_url
95
+
96
+ def _get_passthrough_api_key(self) -> str:
97
+ """Get the passthrough API key from config or provider data."""
98
+ if self.config.auth_credential is not None:
99
+ return self.config.auth_credential.get_secret_value()
100
+
101
+ provider_data = self.get_request_provider_data()
102
+ if provider_data is None:
103
+ raise ValueError(
104
+ 'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
105
+ )
106
+ return provider_data.passthrough_api_key
76
107
 
77
108
  async def openai_completion(
78
109
  self,
79
110
  params: OpenAICompletionRequestWithExtraBody,
80
- ) -> OpenAICompletion:
81
- client = self._get_client()
82
- model_obj = await self.model_store.get_model(params.model)
83
-
84
- params = params.model_copy()
85
- params.model = model_obj.provider_resource_id
86
-
111
+ ) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
112
+ """Forward completion request to downstream using OpenAI client."""
113
+ client = self._get_openai_client()
87
114
  request_params = params.model_dump(exclude_none=True)
115
+ response = await client.completions.create(**request_params)
116
+
117
+ if params.stream:
118
+ return wrap_async_stream(response)
88
119
 
89
- return await client.inference.openai_completion(**request_params)
120
+ return response # type: ignore[return-value]
90
121
 
91
122
  async def openai_chat_completion(
92
123
  self,
93
124
  params: OpenAIChatCompletionRequestWithExtraBody,
94
125
  ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
95
- client = self._get_client()
96
- model_obj = await self.model_store.get_model(params.model)
126
+ """Forward chat completion request to downstream using OpenAI client."""
127
+ client = self._get_openai_client()
128
+ request_params = params.model_dump(exclude_none=True)
129
+ response = await client.chat.completions.create(**request_params)
97
130
 
98
- params = params.model_copy()
99
- params.model = model_obj.provider_resource_id
131
+ if params.stream:
132
+ return wrap_async_stream(response)
100
133
 
101
- request_params = params.model_dump(exclude_none=True)
134
+ return response # type: ignore[return-value]
102
135
 
103
- return await client.inference.openai_chat_completion(**request_params)
104
-
105
- def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]:
106
- json_params = {}
107
- for key, value in request_params.items():
108
- json_input = convert_pydantic_to_json_value(value)
109
- if isinstance(json_input, dict):
110
- json_input = {k: v for k, v in json_input.items() if v is not None}
111
- elif isinstance(json_input, list):
112
- json_input = [x for x in json_input if x is not None]
113
- new_input = []
114
- for x in json_input:
115
- if isinstance(x, dict):
116
- x = {k: v for k, v in x.items() if v is not None}
117
- new_input.append(x)
118
- json_input = new_input
119
-
120
- json_params[key] = json_input
121
-
122
- return json_params
136
+ async def openai_embeddings(
137
+ self,
138
+ params: OpenAIEmbeddingsRequestWithExtraBody,
139
+ ) -> OpenAIEmbeddingsResponse:
140
+ """Forward embeddings request to downstream using OpenAI client."""
141
+ client = self._get_openai_client()
142
+ request_params = params.model_dump(exclude_none=True)
143
+ response = await client.embeddings.create(**request_params)
144
+ return response # type: ignore
@@ -6,15 +6,22 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from pydantic import Field, SecretStr
9
+ from pydantic import BaseModel, Field, HttpUrl, SecretStr
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
+
14
+
15
+ class RunpodProviderDataValidator(BaseModel):
16
+ runpod_api_token: str | None = Field(
17
+ default=None,
18
+ description="API token for RunPod models",
19
+ )
13
20
 
14
21
 
15
22
  @json_schema_type
16
23
  class RunpodImplConfig(RemoteInferenceProviderConfig):
17
- url: str | None = Field(
24
+ base_url: HttpUrl | None = Field(
18
25
  default=None,
19
26
  description="The URL for the Runpod model serving endpoint",
20
27
  )
@@ -27,6 +34,6 @@ class RunpodImplConfig(RemoteInferenceProviderConfig):
27
34
  @classmethod
28
35
  def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
29
36
  return {
30
- "url": "${env.RUNPOD_URL:=}",
31
- "api_token": "${env.RUNPOD_API_TOKEN}",
37
+ "base_url": "${env.RUNPOD_URL:=}",
38
+ "api_token": "${env.RUNPOD_API_TOKEN:=}",
32
39
  }
@@ -4,13 +4,6 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from collections.abc import AsyncIterator
8
-
9
- from llama_stack.apis.inference import (
10
- OpenAIChatCompletion,
11
- OpenAIChatCompletionChunk,
12
- OpenAIChatCompletionRequestWithExtraBody,
13
- )
14
7
  from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
15
8
 
16
9
  from .config import RunpodImplConfig
@@ -24,19 +17,8 @@ class RunpodInferenceAdapter(OpenAIMixin):
24
17
  """
25
18
 
26
19
  config: RunpodImplConfig
20
+ provider_data_api_key_field: str = "runpod_api_token"
27
21
 
28
22
  def get_base_url(self) -> str:
29
23
  """Get base URL for OpenAI client."""
30
- return self.config.url
31
-
32
- async def openai_chat_completion(
33
- self,
34
- params: OpenAIChatCompletionRequestWithExtraBody,
35
- ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
36
- """Override to add RunPod-specific stream_options requirement."""
37
- params = params.model_copy()
38
-
39
- if params.stream and not params.stream_options:
40
- params.stream_options = {"include_usage": True}
41
-
42
- return await super().openai_chat_completion(params)
24
+ return str(self.config.base_url)
@@ -6,10 +6,10 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from pydantic import BaseModel, Field
9
+ from pydantic import BaseModel, Field, HttpUrl
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  class SambaNovaProviderDataValidator(BaseModel):
@@ -21,14 +21,14 @@ class SambaNovaProviderDataValidator(BaseModel):
21
21
 
22
22
  @json_schema_type
23
23
  class SambaNovaImplConfig(RemoteInferenceProviderConfig):
24
- url: str = Field(
25
- default="https://api.sambanova.ai/v1",
24
+ base_url: HttpUrl | None = Field(
25
+ default=HttpUrl("https://api.sambanova.ai/v1"),
26
26
  description="The URL for the SambaNova AI server",
27
27
  )
28
28
 
29
29
  @classmethod
30
30
  def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
31
31
  return {
32
- "url": "https://api.sambanova.ai/v1",
32
+ "base_url": "https://api.sambanova.ai/v1",
33
33
  "api_key": api_key,
34
34
  }
@@ -25,4 +25,4 @@ class SambaNovaInferenceAdapter(OpenAIMixin):
25
25
 
26
26
  :return: The SambaNova base URL
27
27
  """
28
- return self.config.url
28
+ return str(self.config.base_url)