llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -4,13 +4,13 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from collections.abc import Iterable
7
+ from collections.abc import AsyncIterator, Iterable
8
8
 
9
9
  from databricks.sdk import WorkspaceClient
10
10
 
11
- from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
12
11
  from llama_stack.log import get_logger
13
12
  from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
13
+ from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
14
14
 
15
15
  from .config import DatabricksImplConfig
16
16
 
@@ -20,6 +20,8 @@ logger = get_logger(name=__name__, category="inference::databricks")
20
20
  class DatabricksInferenceAdapter(OpenAIMixin):
21
21
  config: DatabricksImplConfig
22
22
 
23
+ provider_data_api_key_field: str = "databricks_api_token"
24
+
23
25
  # source: https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/supported-models
24
26
  embedding_model_metadata: dict[str, dict[str, int]] = {
25
27
  "databricks-gte-large-en": {"embedding_dimension": 1024, "context_length": 8192},
@@ -27,18 +29,26 @@ class DatabricksInferenceAdapter(OpenAIMixin):
27
29
  }
28
30
 
29
31
  def get_base_url(self) -> str:
30
- return f"{self.config.url}/serving-endpoints"
32
+ return str(self.config.base_url)
31
33
 
32
34
  async def list_provider_model_ids(self) -> Iterable[str]:
35
+ # Filter out None values from endpoint names
36
+ api_token = self._get_api_key_from_config_or_provider_data()
37
+ # WorkspaceClient expects base host without /serving-endpoints suffix
38
+ base_url_str = str(self.config.base_url)
39
+ if base_url_str.endswith("/serving-endpoints"):
40
+ host = base_url_str[:-18] # Remove '/serving-endpoints'
41
+ else:
42
+ host = base_url_str
33
43
  return [
34
- endpoint.name
44
+ endpoint.name # type: ignore[misc]
35
45
  for endpoint in WorkspaceClient(
36
- host=self.config.url, token=self.get_api_key()
46
+ host=host, token=api_token
37
47
  ).serving_endpoints.list() # TODO: this is not async
38
48
  ]
39
49
 
40
50
  async def openai_completion(
41
51
  self,
42
52
  params: OpenAICompletionRequestWithExtraBody,
43
- ) -> OpenAICompletion:
53
+ ) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
44
54
  raise NotImplementedError()
@@ -6,22 +6,22 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from pydantic import Field
9
+ from pydantic import Field, HttpUrl
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  @json_schema_type
16
16
  class FireworksImplConfig(RemoteInferenceProviderConfig):
17
- url: str = Field(
18
- default="https://api.fireworks.ai/inference/v1",
17
+ base_url: HttpUrl | None = Field(
18
+ default=HttpUrl("https://api.fireworks.ai/inference/v1"),
19
19
  description="The URL for the Fireworks server",
20
20
  )
21
21
 
22
22
  @classmethod
23
23
  def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
24
24
  return {
25
- "url": "https://api.fireworks.ai/inference/v1",
25
+ "base_url": "https://api.fireworks.ai/inference/v1",
26
26
  "api_key": api_key,
27
27
  }
@@ -24,4 +24,4 @@ class FireworksInferenceAdapter(OpenAIMixin):
24
24
  provider_data_api_key_field: str = "fireworks_api_key"
25
25
 
26
26
  def get_base_url(self) -> str:
27
- return "https://api.fireworks.ai/inference/v1"
27
+ return str(self.config.base_url)
@@ -9,7 +9,7 @@ from typing import Any
9
9
  from pydantic import BaseModel, Field
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  class GeminiProviderDataValidator(BaseModel):
@@ -4,15 +4,15 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from openai import NOT_GIVEN
7
+ from typing import Any
8
8
 
9
- from llama_stack.apis.inference import (
9
+ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
10
+ from llama_stack_api import (
10
11
  OpenAIEmbeddingData,
11
12
  OpenAIEmbeddingsRequestWithExtraBody,
12
13
  OpenAIEmbeddingsResponse,
13
14
  OpenAIEmbeddingUsage,
14
15
  )
15
- from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
16
16
 
17
17
  from .config import GeminiConfig
18
18
 
@@ -37,21 +37,20 @@ class GeminiInferenceAdapter(OpenAIMixin):
37
37
  Override embeddings method to handle Gemini's missing usage statistics.
38
38
  Gemini's embedding API doesn't return usage information, so we provide default values.
39
39
  """
40
- # Prepare request parameters
41
- request_params = {
40
+ # Build request params conditionally to avoid NotGiven/Omit type mismatch
41
+ request_params: dict[str, Any] = {
42
42
  "model": await self._get_provider_model_id(params.model),
43
43
  "input": params.input,
44
- "encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN,
45
- "dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN,
46
- "user": params.user if params.user is not None else NOT_GIVEN,
47
44
  }
45
+ if params.encoding_format is not None:
46
+ request_params["encoding_format"] = params.encoding_format
47
+ if params.dimensions is not None:
48
+ request_params["dimensions"] = params.dimensions
49
+ if params.user is not None:
50
+ request_params["user"] = params.user
51
+ if params.model_extra:
52
+ request_params["extra_body"] = params.model_extra
48
53
 
49
- # Add extra_body if present
50
- extra_body = params.model_extra
51
- if extra_body:
52
- request_params["extra_body"] = extra_body
53
-
54
- # Call OpenAI embeddings API with properly typed parameters
55
54
  response = await self.client.embeddings.create(**request_params)
56
55
 
57
56
  data = []
@@ -6,10 +6,10 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from pydantic import BaseModel, Field
9
+ from pydantic import BaseModel, Field, HttpUrl
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  class GroqProviderDataValidator(BaseModel):
@@ -21,14 +21,14 @@ class GroqProviderDataValidator(BaseModel):
21
21
 
22
22
  @json_schema_type
23
23
  class GroqConfig(RemoteInferenceProviderConfig):
24
- url: str = Field(
25
- default="https://api.groq.com",
24
+ base_url: HttpUrl | None = Field(
25
+ default=HttpUrl("https://api.groq.com/openai/v1"),
26
26
  description="The URL for the Groq AI server",
27
27
  )
28
28
 
29
29
  @classmethod
30
30
  def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
31
31
  return {
32
- "url": "https://api.groq.com",
32
+ "base_url": "https://api.groq.com/openai/v1",
33
33
  "api_key": api_key,
34
34
  }
@@ -15,4 +15,4 @@ class GroqInferenceAdapter(OpenAIMixin):
15
15
  provider_data_api_key_field: str = "groq_api_key"
16
16
 
17
17
  def get_base_url(self) -> str:
18
- return f"{self.config.url}/openai/v1"
18
+ return str(self.config.base_url)
@@ -6,10 +6,10 @@
6
6
 
7
7
  from typing import Any
8
8
 
9
- from pydantic import BaseModel, Field
9
+ from pydantic import BaseModel, Field, HttpUrl
10
10
 
11
11
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
12
- from llama_stack.schema_utils import json_schema_type
12
+ from llama_stack_api import json_schema_type
13
13
 
14
14
 
15
15
  class LlamaProviderDataValidator(BaseModel):
@@ -21,14 +21,14 @@ class LlamaProviderDataValidator(BaseModel):
21
21
 
22
22
  @json_schema_type
23
23
  class LlamaCompatConfig(RemoteInferenceProviderConfig):
24
- openai_compat_api_base: str = Field(
25
- default="https://api.llama.com/compat/v1/",
24
+ base_url: HttpUrl | None = Field(
25
+ default=HttpUrl("https://api.llama.com/compat/v1/"),
26
26
  description="The URL for the Llama API server",
27
27
  )
28
28
 
29
29
  @classmethod
30
30
  def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
31
31
  return {
32
- "openai_compat_api_base": "https://api.llama.com/compat/v1/",
32
+ "base_url": "https://api.llama.com/compat/v1/",
33
33
  "api_key": api_key,
34
34
  }
@@ -4,15 +4,17 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.apis.inference.inference import (
7
+ from collections.abc import AsyncIterator
8
+
9
+ from llama_stack.log import get_logger
10
+ from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
11
+ from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
12
+ from llama_stack_api import (
8
13
  OpenAICompletion,
9
14
  OpenAICompletionRequestWithExtraBody,
10
15
  OpenAIEmbeddingsRequestWithExtraBody,
11
16
  OpenAIEmbeddingsResponse,
12
17
  )
13
- from llama_stack.log import get_logger
14
- from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
15
- from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
16
18
 
17
19
  logger = get_logger(name=__name__, category="inference::llama_openai_compat")
18
20
 
@@ -31,12 +33,12 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
31
33
 
32
34
  :return: The Llama API base URL
33
35
  """
34
- return self.config.openai_compat_api_base
36
+ return str(self.config.base_url)
35
37
 
36
38
  async def openai_completion(
37
39
  self,
38
40
  params: OpenAICompletionRequestWithExtraBody,
39
- ) -> OpenAICompletion:
41
+ ) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
40
42
  raise NotImplementedError()
41
43
 
42
44
  async def openai_embeddings(
@@ -4,7 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.apis.inference import Inference
7
+ from llama_stack_api import Inference
8
8
 
9
9
  from .config import NVIDIAConfig
10
10
 
@@ -7,10 +7,17 @@
7
7
  import os
8
8
  from typing import Any
9
9
 
10
- from pydantic import Field
10
+ from pydantic import BaseModel, Field, HttpUrl
11
11
 
12
12
  from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
13
- from llama_stack.schema_utils import json_schema_type
13
+ from llama_stack_api import json_schema_type
14
+
15
+
16
+ class NVIDIAProviderDataValidator(BaseModel):
17
+ nvidia_api_key: str | None = Field(
18
+ default=None,
19
+ description="API key for NVIDIA NIM models",
20
+ )
14
21
 
15
22
 
16
23
  @json_schema_type
@@ -21,6 +28,7 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
21
28
  Attributes:
22
29
  url (str): A base url for accessing the NVIDIA NIM, e.g. http://localhost:8000
23
30
  api_key (str): The access key for the hosted NIM endpoints
31
+ rerank_model_to_url (dict[str, str]): Mapping of rerank model identifiers to their API endpoints
24
32
 
25
33
  There are two ways to access NVIDIA NIMs -
26
34
  0. Hosted: Preview APIs hosted at https://integrate.api.nvidia.com
@@ -36,29 +44,31 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
36
44
  URL of your running NVIDIA NIM and do not need to set the api_key.
37
45
  """
38
46
 
39
- url: str = Field(
40
- default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com"),
47
+ base_url: HttpUrl | None = Field(
48
+ default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1"),
41
49
  description="A base url for accessing the NVIDIA NIM",
42
50
  )
43
51
  timeout: int = Field(
44
52
  default=60,
45
53
  description="Timeout for the HTTP requests",
46
54
  )
47
- append_api_version: bool = Field(
48
- default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
49
- description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
55
+ rerank_model_to_url: dict[str, str] = Field(
56
+ default_factory=lambda: {
57
+ "nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
58
+ "nvidia/nv-rerankqa-mistral-4b-v3": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking",
59
+ "nvidia/llama-3.2-nv-rerankqa-1b-v2": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking",
60
+ },
61
+ description="Mapping of rerank model identifiers to their API endpoints. ",
50
62
  )
51
63
 
52
64
  @classmethod
53
65
  def sample_run_config(
54
66
  cls,
55
- url: str = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
67
+ base_url: HttpUrl | None = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}",
56
68
  api_key: str = "${env.NVIDIA_API_KEY:=}",
57
- append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}",
58
69
  **kwargs,
59
70
  ) -> dict[str, Any]:
60
71
  return {
61
- "url": url,
72
+ "base_url": base_url,
62
73
  "api_key": api_key,
63
- "append_api_version": append_api_version,
64
74
  }
@@ -5,8 +5,20 @@
5
5
  # the root directory of this source tree.
6
6
 
7
7
 
8
+ from collections.abc import Iterable
9
+
10
+ import aiohttp
11
+
8
12
  from llama_stack.log import get_logger
9
13
  from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
14
+ from llama_stack_api import (
15
+ Model,
16
+ ModelType,
17
+ OpenAIChatCompletionContentPartImageParam,
18
+ OpenAIChatCompletionContentPartTextParam,
19
+ RerankData,
20
+ RerankResponse,
21
+ )
10
22
 
11
23
  from . import NVIDIAConfig
12
24
  from .utils import _is_nvidia_hosted
@@ -17,6 +29,8 @@ logger = get_logger(name=__name__, category="inference::nvidia")
17
29
  class NVIDIAInferenceAdapter(OpenAIMixin):
18
30
  config: NVIDIAConfig
19
31
 
32
+ provider_data_api_key_field: str = "nvidia_api_key"
33
+
20
34
  """
21
35
  NVIDIA Inference Adapter for Llama Stack.
22
36
  """
@@ -30,11 +44,11 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
30
44
  }
31
45
 
32
46
  async def initialize(self) -> None:
33
- logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...")
47
+ logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.base_url})...")
34
48
 
35
49
  if _is_nvidia_hosted(self.config):
36
50
  if not self.config.auth_credential:
37
- raise RuntimeError(
51
+ logger.error(
38
52
  "API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM."
39
53
  )
40
54
 
@@ -58,4 +72,102 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
58
72
 
59
73
  :return: The NVIDIA API base URL
60
74
  """
61
- return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
75
+ return str(self.config.base_url)
76
+
77
+ async def list_provider_model_ids(self) -> Iterable[str]:
78
+ """
79
+ Return both dynamic model IDs and statically configured rerank model IDs.
80
+ """
81
+ dynamic_ids: Iterable[str] = []
82
+ try:
83
+ dynamic_ids = await super().list_provider_model_ids()
84
+ except Exception:
85
+ # If the dynamic listing fails, proceed with just configured rerank IDs
86
+ dynamic_ids = []
87
+
88
+ configured_rerank_ids = list(self.config.rerank_model_to_url.keys())
89
+ return list(dict.fromkeys(list(dynamic_ids) + configured_rerank_ids)) # remove duplicates
90
+
91
+ def construct_model_from_identifier(self, identifier: str) -> Model:
92
+ """
93
+ Classify rerank models from config; otherwise use the base behavior.
94
+ """
95
+ if identifier in self.config.rerank_model_to_url:
96
+ return Model(
97
+ provider_id=self.__provider_id__, # type: ignore[attr-defined]
98
+ provider_resource_id=identifier,
99
+ identifier=identifier,
100
+ model_type=ModelType.rerank,
101
+ )
102
+ return super().construct_model_from_identifier(identifier)
103
+
104
+ async def rerank(
105
+ self,
106
+ model: str,
107
+ query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
108
+ items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
109
+ max_num_results: int | None = None,
110
+ ) -> RerankResponse:
111
+ provider_model_id = await self._get_provider_model_id(model)
112
+
113
+ ranking_url = self.get_base_url()
114
+
115
+ if _is_nvidia_hosted(self.config) and provider_model_id in self.config.rerank_model_to_url:
116
+ ranking_url = self.config.rerank_model_to_url[provider_model_id]
117
+
118
+ logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}")
119
+
120
+ # Convert query to text format
121
+ if isinstance(query, str):
122
+ query_text = query
123
+ elif isinstance(query, OpenAIChatCompletionContentPartTextParam):
124
+ query_text = query.text
125
+ else:
126
+ raise ValueError("Query must be a string or text content part")
127
+
128
+ # Convert items to text format
129
+ passages = []
130
+ for item in items:
131
+ if isinstance(item, str):
132
+ passages.append({"text": item})
133
+ elif isinstance(item, OpenAIChatCompletionContentPartTextParam):
134
+ passages.append({"text": item.text})
135
+ else:
136
+ raise ValueError("Items must be strings or text content parts")
137
+
138
+ payload = {
139
+ "model": provider_model_id,
140
+ "query": {"text": query_text},
141
+ "passages": passages,
142
+ }
143
+
144
+ headers = {
145
+ "Authorization": f"Bearer {self.get_api_key()}",
146
+ "Content-Type": "application/json",
147
+ }
148
+
149
+ try:
150
+ async with aiohttp.ClientSession() as session:
151
+ async with session.post(ranking_url, headers=headers, json=payload) as response:
152
+ if response.status != 200:
153
+ response_text = await response.text()
154
+ raise ConnectionError(
155
+ f"NVIDIA rerank API request failed with status {response.status}: {response_text}"
156
+ )
157
+
158
+ result = await response.json()
159
+ rankings = result.get("rankings", [])
160
+
161
+ # Convert to RerankData format
162
+ rerank_data = []
163
+ for ranking in rankings:
164
+ rerank_data.append(RerankData(index=ranking["index"], relevance_score=ranking["logit"]))
165
+
166
+ # Apply max_num_results limit
167
+ if max_num_results is not None:
168
+ rerank_data = rerank_data[:max_num_results]
169
+
170
+ return RerankResponse(data=rerank_data)
171
+
172
+ except aiohttp.ClientError as e:
173
+ raise ConnectionError(f"Failed to connect to NVIDIA rerank API at {ranking_url}: {e}") from e
@@ -8,4 +8,4 @@ from . import NVIDIAConfig
8
8
 
9
9
 
10
10
  def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
11
- return "integrate.api.nvidia.com" in config.url
11
+ return "integrate.api.nvidia.com" in str(config.base_url)
@@ -0,0 +1,17 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from llama_stack_api import InferenceProvider
8
+
9
+ from .config import OCIConfig
10
+
11
+
12
+ async def get_adapter_impl(config: OCIConfig, _deps) -> InferenceProvider:
13
+ from .oci import OCIInferenceAdapter
14
+
15
+ adapter = OCIInferenceAdapter(config=config)
16
+ await adapter.initialize()
17
+ return adapter
@@ -0,0 +1,79 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ from collections.abc import Generator, Mapping
8
+ from typing import Any, override
9
+
10
+ import httpx
11
+ import oci
12
+ import requests
13
+ from oci.config import DEFAULT_LOCATION, DEFAULT_PROFILE
14
+
15
+ OciAuthSigner = type[oci.signer.AbstractBaseSigner]
16
+
17
+
18
+ class HttpxOciAuth(httpx.Auth):
19
+ """
20
+ Custom HTTPX authentication class that implements OCI request signing.
21
+
22
+ This class handles the authentication flow for HTTPX requests by signing them
23
+ using the OCI Signer, which adds the necessary authentication headers for
24
+ OCI API calls.
25
+
26
+ Attributes:
27
+ signer (oci.signer.Signer): The OCI signer instance used for request signing
28
+ """
29
+
30
+ def __init__(self, signer: OciAuthSigner):
31
+ self.signer = signer
32
+
33
+ @override
34
+ def auth_flow(self, request: httpx.Request) -> Generator[httpx.Request, httpx.Response, None]:
35
+ # Read the request content to handle streaming requests properly
36
+ try:
37
+ content = request.content
38
+ except httpx.RequestNotRead:
39
+ # For streaming requests, we need to read the content first
40
+ content = request.read()
41
+
42
+ req = requests.Request(
43
+ method=request.method,
44
+ url=str(request.url),
45
+ headers=dict(request.headers),
46
+ data=content,
47
+ )
48
+ prepared_request = req.prepare()
49
+
50
+ # Sign the request using the OCI Signer
51
+ self.signer.do_request_sign(prepared_request) # type: ignore
52
+
53
+ # Update the original HTTPX request with the signed headers
54
+ request.headers.update(prepared_request.headers)
55
+
56
+ yield request
57
+
58
+
59
+ class OciInstancePrincipalAuth(HttpxOciAuth):
60
+ def __init__(self, **kwargs: Mapping[str, Any]):
61
+ self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner(**kwargs)
62
+
63
+
64
+ class OciUserPrincipalAuth(HttpxOciAuth):
65
+ def __init__(self, config_file: str = DEFAULT_LOCATION, profile_name: str = DEFAULT_PROFILE):
66
+ config = oci.config.from_file(config_file, profile_name)
67
+ oci.config.validate_config(config) # type: ignore
68
+ key_content = ""
69
+ with open(config["key_file"]) as f:
70
+ key_content = f.read()
71
+
72
+ self.signer = oci.signer.Signer(
73
+ tenancy=config["tenancy"],
74
+ user=config["user"],
75
+ fingerprint=config["fingerprint"],
76
+ private_key_file_location=config.get("key_file"),
77
+ pass_phrase="none", # type: ignore
78
+ private_key_content=key_content,
79
+ )
@@ -0,0 +1,75 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ import os
8
+ from typing import Any
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+ from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
13
+ from llama_stack_api import json_schema_type
14
+
15
+
16
+ class OCIProviderDataValidator(BaseModel):
17
+ oci_auth_type: str = Field(
18
+ description="OCI authentication type (must be one of: instance_principal, config_file)",
19
+ )
20
+ oci_region: str = Field(
21
+ description="OCI region (e.g., us-ashburn-1)",
22
+ )
23
+ oci_compartment_id: str = Field(
24
+ description="OCI compartment ID for the Generative AI service",
25
+ )
26
+ oci_config_file_path: str | None = Field(
27
+ default="~/.oci/config",
28
+ description="OCI config file path (required if oci_auth_type is config_file)",
29
+ )
30
+ oci_config_profile: str | None = Field(
31
+ default="DEFAULT",
32
+ description="OCI config profile (required if oci_auth_type is config_file)",
33
+ )
34
+
35
+
36
+ @json_schema_type
37
+ class OCIConfig(RemoteInferenceProviderConfig):
38
+ oci_auth_type: str = Field(
39
+ description="OCI authentication type (must be one of: instance_principal, config_file)",
40
+ default_factory=lambda: os.getenv("OCI_AUTH_TYPE", "instance_principal"),
41
+ )
42
+ oci_region: str = Field(
43
+ default_factory=lambda: os.getenv("OCI_REGION", "us-ashburn-1"),
44
+ description="OCI region (e.g., us-ashburn-1)",
45
+ )
46
+ oci_compartment_id: str = Field(
47
+ default_factory=lambda: os.getenv("OCI_COMPARTMENT_OCID", ""),
48
+ description="OCI compartment ID for the Generative AI service",
49
+ )
50
+ oci_config_file_path: str = Field(
51
+ default_factory=lambda: os.getenv("OCI_CONFIG_FILE_PATH", "~/.oci/config"),
52
+ description="OCI config file path (required if oci_auth_type is config_file)",
53
+ )
54
+ oci_config_profile: str = Field(
55
+ default_factory=lambda: os.getenv("OCI_CLI_PROFILE", "DEFAULT"),
56
+ description="OCI config profile (required if oci_auth_type is config_file)",
57
+ )
58
+
59
+ @classmethod
60
+ def sample_run_config(
61
+ cls,
62
+ oci_auth_type: str = "${env.OCI_AUTH_TYPE:=instance_principal}",
63
+ oci_config_file_path: str = "${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}",
64
+ oci_config_profile: str = "${env.OCI_CLI_PROFILE:=DEFAULT}",
65
+ oci_region: str = "${env.OCI_REGION:=us-ashburn-1}",
66
+ oci_compartment_id: str = "${env.OCI_COMPARTMENT_OCID:=}",
67
+ **kwargs,
68
+ ) -> dict[str, Any]:
69
+ return {
70
+ "oci_auth_type": oci_auth_type,
71
+ "oci_config_file_path": oci_config_file_path,
72
+ "oci_config_profile": oci_config_profile,
73
+ "oci_region": oci_region,
74
+ "oci_compartment_id": oci_compartment_id,
75
+ }