llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.5.dist-info/RECORD +0 -625
  445. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -4,25 +4,22 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from llama_stack.apis.agents import (
8
- Order,
9
- )
10
- from llama_stack.apis.agents.openai_responses import (
7
+ from llama_stack.core.datatypes import AccessRule
8
+ from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
9
+ from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
10
+ from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
11
+ from llama_stack.log import get_logger
12
+ from llama_stack_api import (
11
13
  ListOpenAIResponseInputItem,
12
14
  ListOpenAIResponseObject,
13
15
  OpenAIDeleteResponseObject,
16
+ OpenAIMessageParam,
14
17
  OpenAIResponseInput,
15
18
  OpenAIResponseObject,
16
19
  OpenAIResponseObjectWithInput,
20
+ Order,
17
21
  )
18
- from llama_stack.apis.inference import OpenAIMessageParam
19
- from llama_stack.core.datatypes import AccessRule
20
- from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
21
- from llama_stack.log import get_logger
22
-
23
- from ..sqlstore.api import ColumnDefinition, ColumnType
24
- from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
25
- from ..sqlstore.sqlstore import sqlstore_impl
22
+ from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
26
23
 
27
24
  logger = get_logger(name=__name__, category="openai_responses")
28
25
 
@@ -252,19 +249,12 @@ class ResponsesStore:
252
249
  # Serialize messages to dict format for JSON storage
253
250
  messages_data = [msg.model_dump() for msg in messages]
254
251
 
255
- # Upsert: try insert first, update if exists
256
- try:
257
- await self.sql_store.insert(
258
- table="conversation_messages",
259
- data={"conversation_id": conversation_id, "messages": messages_data},
260
- )
261
- except Exception:
262
- # If insert fails due to ID conflict, update existing record
263
- await self.sql_store.update(
264
- table="conversation_messages",
265
- data={"messages": messages_data},
266
- where={"conversation_id": conversation_id},
267
- )
252
+ await self.sql_store.upsert(
253
+ table="conversation_messages",
254
+ data={"conversation_id": conversation_id, "messages": messages_data},
255
+ conflict_columns=["conversation_id"],
256
+ update_columns=["messages"],
257
+ )
268
258
 
269
259
  logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}")
270
260
 
@@ -6,8 +6,7 @@
6
6
  import statistics
7
7
  from typing import Any
8
8
 
9
- from llama_stack.apis.scoring import ScoringResultRow
10
- from llama_stack.apis.scoring_functions import AggregationFunctionType
9
+ from llama_stack_api import AggregationFunctionType, ScoringResultRow
11
10
 
12
11
 
13
12
  def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]:
@@ -6,9 +6,8 @@
6
6
  from abc import ABC, abstractmethod
7
7
  from typing import Any
8
8
 
9
- from llama_stack.apis.scoring import ScoringFnParams, ScoringResultRow
10
- from llama_stack.apis.scoring_functions import ScoringFn
11
9
  from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics
10
+ from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow
12
11
 
13
12
 
14
13
  class BaseScoringFn(ABC):
@@ -15,18 +15,55 @@ from mcp import types as mcp_types
15
15
  from mcp.client.sse import sse_client
16
16
  from mcp.client.streamable_http import streamablehttp_client
17
17
 
18
- from llama_stack.apis.common.content_types import ImageContentItem, InterleavedContentItem, TextContentItem
19
- from llama_stack.apis.tools import (
18
+ from llama_stack.core.datatypes import AuthenticationRequiredError
19
+ from llama_stack.log import get_logger
20
+ from llama_stack.providers.utils.tools.ttl_dict import TTLDict
21
+ from llama_stack_api import (
22
+ ImageContentItem,
23
+ InterleavedContentItem,
20
24
  ListToolDefsResponse,
25
+ TextContentItem,
21
26
  ToolDef,
22
27
  ToolInvocationResult,
28
+ _URLOrData,
23
29
  )
24
- from llama_stack.core.datatypes import AuthenticationRequiredError
25
- from llama_stack.log import get_logger
26
- from llama_stack.providers.utils.tools.ttl_dict import TTLDict
27
30
 
28
31
  logger = get_logger(__name__, category="tools")
29
32
 
33
+
34
+ def prepare_mcp_headers(base_headers: dict[str, str] | None, authorization: str | None) -> dict[str, str]:
35
+ """
36
+ Prepare headers for MCP requests with authorization support.
37
+
38
+ Args:
39
+ base_headers: Base headers dictionary (can be None)
40
+ authorization: OAuth access token (without "Bearer " prefix)
41
+
42
+ Returns:
43
+ Headers dictionary with Authorization header if token provided
44
+
45
+ Raises:
46
+ ValueError: If Authorization header is specified in the headers dict (security risk)
47
+ """
48
+ headers = dict(base_headers or {})
49
+
50
+ # Security check: reject any Authorization header in the headers dict
51
+ # Users must use the authorization parameter instead to avoid security risks
52
+ existing_keys_lower = {k.lower() for k in headers.keys()}
53
+ if "authorization" in existing_keys_lower:
54
+ raise ValueError(
55
+ "For security reasons, Authorization header cannot be passed via 'headers'. "
56
+ "Please use the 'authorization' parameter instead."
57
+ )
58
+
59
+ # Add Authorization header if token provided
60
+ if authorization:
61
+ # OAuth access token - add "Bearer " prefix
62
+ headers["Authorization"] = f"Bearer {authorization}"
63
+
64
+ return headers
65
+
66
+
30
67
  protocol_cache = TTLDict(ttl_seconds=3600)
31
68
 
32
69
 
@@ -49,7 +86,10 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
49
86
  try:
50
87
  client = streamablehttp_client
51
88
  if strategy == MCPProtol.SSE:
52
- client = sse_client
89
+ # sse_client and streamablehttp_client have different signatures, but both
90
+ # are called the same way here, so we cast to Any to avoid type errors
91
+ client = cast(Any, sse_client)
92
+
53
93
  async with client(endpoint, headers=headers) as client_streams:
54
94
  async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session:
55
95
  await session.initialize()
@@ -107,9 +147,29 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
107
147
  raise
108
148
 
109
149
 
110
- async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefsResponse:
150
+ async def list_mcp_tools(
151
+ endpoint: str,
152
+ headers: dict[str, str] | None = None,
153
+ authorization: str | None = None,
154
+ ) -> ListToolDefsResponse:
155
+ """List tools available from an MCP server.
156
+
157
+ Args:
158
+ endpoint: MCP server endpoint URL
159
+ headers: Optional base headers to include
160
+ authorization: Optional OAuth access token (just the token, not "Bearer <token>")
161
+
162
+ Returns:
163
+ List of tool definitions from the MCP server
164
+
165
+ Raises:
166
+ ValueError: If Authorization is found in the headers parameter
167
+ """
168
+ # Prepare headers with authorization handling
169
+ final_headers = prepare_mcp_headers(headers, authorization)
170
+
111
171
  tools = []
112
- async with client_wrapper(endpoint, headers) as session:
172
+ async with client_wrapper(endpoint, final_headers) as session:
113
173
  tools_result = await session.list_tools()
114
174
  for tool in tools_result.tools:
115
175
  tools.append(
@@ -127,9 +187,31 @@ async def list_mcp_tools(endpoint: str, headers: dict[str, str]) -> ListToolDefs
127
187
 
128
188
 
129
189
  async def invoke_mcp_tool(
130
- endpoint: str, headers: dict[str, str], tool_name: str, kwargs: dict[str, Any]
190
+ endpoint: str,
191
+ tool_name: str,
192
+ kwargs: dict[str, Any],
193
+ headers: dict[str, str] | None = None,
194
+ authorization: str | None = None,
131
195
  ) -> ToolInvocationResult:
132
- async with client_wrapper(endpoint, headers) as session:
196
+ """Invoke an MCP tool with the given arguments.
197
+
198
+ Args:
199
+ endpoint: MCP server endpoint URL
200
+ tool_name: Name of the tool to invoke
201
+ kwargs: Tool invocation arguments
202
+ headers: Optional base headers to include
203
+ authorization: Optional OAuth access token (just the token, not "Bearer <token>")
204
+
205
+ Returns:
206
+ Tool invocation result with content and error information
207
+
208
+ Raises:
209
+ ValueError: If Authorization header is found in the headers parameter
210
+ """
211
+ # Prepare headers with authorization handling
212
+ final_headers = prepare_mcp_headers(headers, authorization)
213
+
214
+ async with client_wrapper(endpoint, final_headers) as session:
133
215
  result = await session.call_tool(tool_name, kwargs)
134
216
 
135
217
  content: list[InterleavedContentItem] = []
@@ -137,7 +219,7 @@ async def invoke_mcp_tool(
137
219
  if isinstance(item, mcp_types.TextContent):
138
220
  content.append(TextContentItem(text=item.text))
139
221
  elif isinstance(item, mcp_types.ImageContent):
140
- content.append(ImageContentItem(image=item.data))
222
+ content.append(ImageContentItem(image=_URLOrData(data=item.data)))
141
223
  elif isinstance(item, mcp_types.EmbeddedResource):
142
224
  logger.warning(f"EmbeddedResource is not supported: {item}")
143
225
  else:
@@ -0,0 +1,27 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ """
8
+ This file contains constants used for naming data captured for telemetry.
9
+
10
+ This is used to ensure that the data captured for telemetry is consistent and can be used to
11
+ identify and correlate data. If custom telemetry data is added to llama stack, please add
12
+ constants for it here.
13
+ """
14
+
15
+ llama_stack_prefix = "llama_stack"
16
+
17
+ # Safety Attributes
18
+ RUN_SHIELD_OPERATION_NAME = "run_shield"
19
+
20
+ SAFETY_REQUEST_PREFIX = f"{llama_stack_prefix}.safety.request"
21
+ SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.shield_id"
22
+ SAFETY_REQUEST_MESSAGES_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.messages"
23
+
24
+ SAFETY_RESPONSE_PREFIX = f"{llama_stack_prefix}.safety.response"
25
+ SAFETY_RESPONSE_METADATA_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.metadata"
26
+ SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.level"
27
+ SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.user_message"
@@ -0,0 +1,43 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ import json
8
+
9
+ from opentelemetry import trace
10
+
11
+ from llama_stack_api import OpenAIMessageParam, RunShieldResponse
12
+
13
+ from .constants import (
14
+ RUN_SHIELD_OPERATION_NAME,
15
+ SAFETY_REQUEST_MESSAGES_ATTRIBUTE,
16
+ SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE,
17
+ SAFETY_RESPONSE_METADATA_ATTRIBUTE,
18
+ SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE,
19
+ SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE,
20
+ )
21
+
22
+
23
+ def safety_span_name(shield_id: str) -> str:
24
+ return f"{RUN_SHIELD_OPERATION_NAME} {shield_id}"
25
+
26
+
27
+ # TODO: Consider using Wrapt to automatically instrument code
28
+ # This is the industry standard way to package automatically instrumentation in python.
29
+ def safety_request_span_attributes(
30
+ shield_id: str, messages: list[OpenAIMessageParam], response: RunShieldResponse
31
+ ) -> None:
32
+ span = trace.get_current_span()
33
+ span.set_attribute(SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE, shield_id)
34
+ messages_json = json.dumps([msg.model_dump() for msg in messages])
35
+ span.set_attribute(SAFETY_REQUEST_MESSAGES_ATTRIBUTE, messages_json)
36
+
37
+ if response.violation:
38
+ if response.violation.metadata:
39
+ metadata_json = json.dumps(response.violation.metadata)
40
+ span.set_attribute(SAFETY_RESPONSE_METADATA_ATTRIBUTE, metadata_json)
41
+ if response.violation.user_message:
42
+ span.set_attribute(SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE, response.violation.user_message)
43
+ span.set_attribute(SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE, response.violation.violation_level.value)
@@ -40,10 +40,12 @@ from openai.types.completion_choice import CompletionChoice
40
40
  from llama_stack.core.testing_context import get_test_context, is_debug_mode
41
41
 
42
42
  # update the "finish_reason" field, since its type definition is wrong (no None is accepted)
43
- CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
43
+ CompletionChoice.model_fields["finish_reason"].annotation = cast(
44
+ type[Any] | None, Literal["stop", "length", "content_filter"] | None
45
+ )
44
46
  CompletionChoice.model_rebuild()
45
47
 
46
- REPO_ROOT = Path(__file__).parent.parent.parent
48
+ REPO_ROOT = Path(__file__).parent.parent.parent.parent
47
49
  DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/common"
48
50
 
49
51
 
@@ -154,7 +156,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
154
156
  }
155
157
 
156
158
  # Include test_id for isolation, except for shared infrastructure endpoints
157
- if parsed.path not in ("/api/tags", "/v1/models"):
159
+ if parsed.path not in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
158
160
  normalized["test_id"] = test_id
159
161
 
160
162
  normalized_json = json.dumps(normalized, sort_keys=True)
@@ -428,7 +430,7 @@ class ResponseStorage:
428
430
 
429
431
  # For model-list endpoints, include digest in filename to distinguish different model sets
430
432
  endpoint = request.get("endpoint")
431
- if endpoint in ("/api/tags", "/v1/models"):
433
+ if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
432
434
  digest = _model_identifiers_digest(endpoint, response)
433
435
  response_file = f"models-{request_hash}-{digest}.json"
434
436
 
@@ -552,13 +554,14 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
552
554
  Supported endpoints:
553
555
  - '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
554
556
  - '/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
557
+ - '/v1/openai/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
555
558
  Returns a list of unique identifiers or None if structure doesn't match.
556
559
  """
557
560
  if "models" in response["body"]:
558
561
  # ollama
559
562
  items = response["body"]["models"]
560
563
  else:
561
- # openai
564
+ # openai or openai-style endpoints
562
565
  items = response["body"]
563
566
  idents = [m.model if endpoint == "/api/tags" else m.id for m in items]
564
567
  return sorted(set(idents))
@@ -579,7 +582,7 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
579
582
  seen: dict[str, dict[str, Any]] = {}
580
583
  for rec in records:
581
584
  body = rec["response"]["body"]
582
- if endpoint == "/v1/models":
585
+ if endpoint in ("/v1/models", "/v1/openai/v1/models"):
583
586
  for m in body:
584
587
  key = m.id
585
588
  seen[key] = m
@@ -597,19 +600,23 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
597
600
  if endpoint == "/api/tags":
598
601
  from ollama import ListResponse
599
602
 
600
- body = ListResponse(models=ordered)
603
+ # Both cast(Any, ...) and type: ignore are needed here:
604
+ # - cast(Any, ...) attempts to bypass type checking on the argument
605
+ # - type: ignore is still needed because mypy checks the call site independently
606
+ # and reports arg-type mismatch even after casting
607
+ body = ListResponse(models=cast(Any, ordered)) # type: ignore[arg-type]
601
608
  return {"request": canonical_req, "response": {"body": body, "is_streaming": False}}
602
609
 
603
610
 
604
611
  async def _patched_tool_invoke_method(
605
- original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any]
612
+ original_method, provider_name: str, self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
606
613
  ):
607
614
  """Patched version of tool runtime invoke_tool method for recording/replay."""
608
615
  global _current_mode, _current_storage
609
616
 
610
617
  if _current_mode == APIRecordingMode.LIVE or _current_storage is None:
611
618
  # Normal operation
612
- return await original_method(self, tool_name, kwargs)
619
+ return await original_method(self, tool_name, kwargs, authorization=authorization)
613
620
 
614
621
  request_hash = normalize_tool_request(provider_name, tool_name, kwargs)
615
622
 
@@ -627,7 +634,7 @@ async def _patched_tool_invoke_method(
627
634
 
628
635
  if _current_mode in (APIRecordingMode.RECORD, APIRecordingMode.RECORD_IF_MISSING):
629
636
  # Make the tool call and record it
630
- result = await original_method(self, tool_name, kwargs)
637
+ result = await original_method(self, tool_name, kwargs, authorization=authorization)
631
638
 
632
639
  request_data = {
633
640
  "test_id": get_test_context(),
@@ -659,7 +666,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
659
666
  logger.info(f" Test context: {get_test_context()}")
660
667
 
661
668
  if mode == APIRecordingMode.LIVE or storage is None:
662
- if endpoint == "/v1/models":
669
+ if endpoint in ("/v1/models", "/v1/openai/v1/models"):
663
670
  return original_method(self, *args, **kwargs)
664
671
  else:
665
672
  return await original_method(self, *args, **kwargs)
@@ -693,7 +700,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
693
700
  recording = None
694
701
  if mode == APIRecordingMode.REPLAY or mode == APIRecordingMode.RECORD_IF_MISSING:
695
702
  # Special handling for model-list endpoints: merge all recordings with this hash
696
- if endpoint in ("/api/tags", "/v1/models"):
703
+ if endpoint in ("/api/tags", "/v1/models", "/v1/openai/v1/models"):
697
704
  records = storage._model_list_responses(request_hash)
698
705
  recording = _combine_model_list_responses(endpoint, records)
699
706
  else:
@@ -733,13 +740,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
733
740
  )
734
741
 
735
742
  if mode == APIRecordingMode.RECORD or (mode == APIRecordingMode.RECORD_IF_MISSING and not recording):
736
- if endpoint == "/v1/models":
743
+ if endpoint in ("/v1/models", "/v1/openai/v1/models"):
737
744
  response = original_method(self, *args, **kwargs)
738
745
  else:
739
746
  response = await original_method(self, *args, **kwargs)
740
747
 
741
748
  # we want to store the result of the iterator, not the iterator itself
742
- if endpoint == "/v1/models":
749
+ if endpoint in ("/v1/models", "/v1/openai/v1/models"):
743
750
  response = [m async for m in response]
744
751
 
745
752
  request_data = {
@@ -878,9 +885,11 @@ def patch_inference_clients():
878
885
  OllamaAsyncClient.list = patched_ollama_list
879
886
 
880
887
  # Create patched methods for tool runtimes
881
- async def patched_tavily_invoke_tool(self, tool_name: str, kwargs: dict[str, Any]):
888
+ async def patched_tavily_invoke_tool(
889
+ self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
890
+ ):
882
891
  return await _patched_tool_invoke_method(
883
- _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs
892
+ _original_methods["tavily_invoke_tool"], "tavily", self, tool_name, kwargs, authorization=authorization
884
893
  )
885
894
 
886
895
  # Apply tool runtime patches
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama_stack
3
- Version: 0.3.5
3
+ Version: 0.4.0
4
4
  Summary: Llama Stack
5
5
  Author-email: Meta Llama <llama-oss@meta.com>
6
6
  License: MIT
@@ -16,20 +16,20 @@ Classifier: Topic :: Scientific/Engineering :: Information Analysis
16
16
  Requires-Python: >=3.12
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
+ Requires-Dist: PyYAML>=6.0
19
20
  Requires-Dist: aiohttp
20
21
  Requires-Dist: fastapi<1.0,>=0.115.0
21
22
  Requires-Dist: fire
22
23
  Requires-Dist: httpx
23
24
  Requires-Dist: jinja2>=3.1.6
24
25
  Requires-Dist: jsonschema
25
- Requires-Dist: llama-stack-client==0.3.5
26
- Requires-Dist: openai>=1.107
26
+ Requires-Dist: llama-stack-api
27
+ Requires-Dist: openai>=2.5.0
27
28
  Requires-Dist: prompt-toolkit
28
29
  Requires-Dist: python-dotenv
29
30
  Requires-Dist: pyjwt[crypto]>=2.10.0
30
31
  Requires-Dist: pydantic>=2.11.9
31
32
  Requires-Dist: rich
32
- Requires-Dist: starlette
33
33
  Requires-Dist: termcolor
34
34
  Requires-Dist: tiktoken
35
35
  Requires-Dist: pillow
@@ -42,11 +42,11 @@ Requires-Dist: aiosqlite>=0.21.0
42
42
  Requires-Dist: asyncpg
43
43
  Requires-Dist: sqlalchemy[asyncio]>=2.0.41
44
44
  Requires-Dist: starlette>=0.49.1
45
- Provides-Extra: ui
46
- Requires-Dist: streamlit; extra == "ui"
47
- Requires-Dist: pandas; extra == "ui"
48
- Requires-Dist: llama-stack-client==0.3.5; extra == "ui"
49
- Requires-Dist: streamlit-option-menu; extra == "ui"
45
+ Requires-Dist: psycopg2-binary
46
+ Requires-Dist: tornado>=6.5.3
47
+ Requires-Dist: urllib3>=2.6.0
48
+ Provides-Extra: client
49
+ Requires-Dist: llama-stack-client==0.4.0; extra == "client"
50
50
  Dynamic: license-file
51
51
 
52
52
  # Llama Stack
@@ -71,9 +71,9 @@ curl -LsSf https://github.com/llamastack/llama-stack/raw/main/scripts/install.sh
71
71
 
72
72
  ### Overview
73
73
 
74
- Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
74
+ Llama Stack defines and standardizes the core building blocks that simplify AI application development. It provides a unified set of APIs with implementations from leading service providers. More specifically, it provides:
75
75
 
76
- - **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
76
+ - **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals.
77
77
  - **Plugin architecture** to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile.
78
78
  - **Prepackaged verified distributions** which offer a one-stop solution for developers to get started quickly and reliably in any environment.
79
79
  - **Multiple developer interfaces** like CLI and SDKs for Python, Typescript, iOS, and Android.
@@ -88,76 +88,81 @@ Llama Stack standardizes the core building blocks that simplify AI application d
88
88
  />
89
89
  </div>
90
90
 
91
- ### Llama Stack Benefits
92
- - **Flexible Options**: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
91
+ #### Llama Stack Benefits
92
+
93
+ - **Flexibility**: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
93
94
  - **Consistent Experience**: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior.
94
- - **Robust Ecosystem**: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
95
+ - **Robust Ecosystem**: Llama Stack is integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
95
96
 
96
- By reducing friction and complexity, Llama Stack empowers developers to focus on what they do best: building transformative generative AI applications.
97
+ For more information, see the [Benefits of Llama Stack](https://llamastack.github.io/docs/latest/concepts/architecture#benefits-of-llama-stack) documentation.
97
98
 
98
99
  ### API Providers
99
100
  Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
100
101
  Please checkout for [full list](https://llamastack.github.io/docs/providers)
101
102
 
102
- | API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
103
- |:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
104
- | Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
105
- | SambaNova | Hosted | | ✅ | | ✅ | | | | |
106
- | Cerebras | Hosted | | ✅ | | | | | | |
107
- | Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | |
108
- | AWS Bedrock | Hosted | | ✅ | | ✅ | | | | |
109
- | Together | Hosted | ✅ | ✅ | | ✅ | | | | |
110
- | Groq | Hosted | | ✅ | | | | | | |
111
- | Ollama | Single Node | | ✅ | | | | | | |
112
- | TGI | Hosted/Single Node | | ✅ | | | | | | |
113
- | NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | |
114
- | ChromaDB | Hosted/Single Node | | | ✅ | | | | | |
115
- | Milvus | Hosted/Single Node | | | ✅ | | | | | |
116
- | Qdrant | Hosted/Single Node | | | ✅ | | | | | |
117
- | Weaviate | Hosted/Single Node | | | ✅ | | | | | |
118
- | SQLite-vec | Single Node | | | ✅ | | | | | |
119
- | PG Vector | Single Node | | | ✅ | | | | | |
120
- | PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | |
121
- | vLLM | Single Node | | ✅ | | | | | | |
122
- | OpenAI | Hosted | | ✅ | | | | | | |
123
- | Anthropic | Hosted | | ✅ | | | | | | |
124
- | Gemini | Hosted | | ✅ | | | | | | |
125
- | WatsonX | Hosted | | ✅ | | | | | | |
126
- | HuggingFace | Single Node | | | | | | ✅ | | ✅ |
127
- | TorchTune | Single Node | | | | | | ✅ | | |
128
- | NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
129
- | NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |
103
+ | API Provider | Environments | Agents | Inference | VectorIO | Safety | Post Training | Eval | DatasetIO |
104
+ |:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:-------------:|:----:|:--------:|
105
+ | Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
106
+ | SambaNova | Hosted | | ✅ | | ✅ | | | |
107
+ | Cerebras | Hosted | | ✅ | | | | | |
108
+ | Fireworks | Hosted | ✅ | ✅ | ✅ | | | | |
109
+ | AWS Bedrock | Hosted | | ✅ | | ✅ | | | |
110
+ | Together | Hosted | ✅ | ✅ | | ✅ | | | |
111
+ | Groq | Hosted | | ✅ | | | | | |
112
+ | Ollama | Single Node | | ✅ | | | | | |
113
+ | TGI | Hosted/Single Node | | ✅ | | | | | |
114
+ | NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | |
115
+ | ChromaDB | Hosted/Single Node | | | ✅ | | | | |
116
+ | Milvus | Hosted/Single Node | | | ✅ | | | | |
117
+ | Qdrant | Hosted/Single Node | | | ✅ | | | | |
118
+ | Weaviate | Hosted/Single Node | | | ✅ | | | | |
119
+ | SQLite-vec | Single Node | | | ✅ | | | | |
120
+ | PG Vector | Single Node | | | ✅ | | | | |
121
+ | PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | |
122
+ | vLLM | Single Node | | ✅ | | | | | |
123
+ | OpenAI | Hosted | | ✅ | | | | | |
124
+ | Anthropic | Hosted | | ✅ | | | | | |
125
+ | Gemini | Hosted | | ✅ | | | | | |
126
+ | WatsonX | Hosted | | ✅ | | | | | |
127
+ | HuggingFace | Single Node | | | | | ✅ | | ✅ |
128
+ | TorchTune | Single Node | | | | | ✅ | | |
129
+ | NVIDIA NEMO | Hosted | | ✅ | ✅ | | ✅ | ✅ | ✅ |
130
+ | NVIDIA | Hosted | | | | | ✅ | ✅ | ✅ |
130
131
 
131
132
  > **Note**: Additional providers are available through external packages. See [External Providers](https://llamastack.github.io/docs/providers/external) documentation.
132
133
 
133
134
  ### Distributions
134
135
 
135
- A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code.
136
+ A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario. For example, you can begin with a local setup of Ollama and seamlessly transition to production, with fireworks, without changing your application code.
136
137
  Here are some of the distributions we support:
137
138
 
138
139
  | **Distribution** | **Llama Stack Docker** | Start This Distribution |
139
140
  |:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
140
- | Starter Distribution | [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general) | [Guide](https://llamastack.github.io/latest/distributions/self_hosted_distro/starter.html) |
141
- | Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llamastack.github.io/latest/distributions/self_hosted_distro/meta-reference-gpu.html) |
141
+ | Starter Distribution | [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general) | [Guide](https://llamastack.github.io/docs/distributions/self_hosted_distro/starter) |
142
+ | Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llamastack.github.io/docs/distributions/self_hosted_distro/meta-reference-gpu) |
142
143
  | PostgreSQL | [llamastack/distribution-postgres-demo](https://hub.docker.com/repository/docker/llamastack/distribution-postgres-demo/general) | |
143
144
 
145
+ For full documentation on the Llama Stack distributions see the [Distributions Overview](https://llamastack.github.io/docs/distributions) page.
146
+
144
147
  ### Documentation
145
148
 
146
- Please checkout our [Documentation](https://llamastack.github.io/latest/index.html) page for more details.
149
+ Please checkout our [Documentation](https://llamastack.github.io/docs) page for more details.
147
150
 
148
151
  * CLI references
149
- * [llama (server-side) CLI Reference](https://llamastack.github.io/latest/references/llama_cli_reference/index.html): Guide for using the `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution.
150
- * [llama (client-side) CLI Reference](https://llamastack.github.io/latest/references/llama_stack_client_cli_reference.html): Guide for using the `llama-stack-client` CLI, which allows you to query information about the distribution.
152
+ * [llama (server-side) CLI Reference](https://llamastack.github.io/docs/references/llama_cli_reference): Guide for using the `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution.
153
+ * [llama (client-side) CLI Reference](https://llamastack.github.io/docs/references/llama_stack_client_cli_reference): Guide for using the `llama-stack-client` CLI, which allows you to query information about the distribution.
151
154
  * Getting Started
152
- * [Quick guide to start a Llama Stack server](https://llamastack.github.io/latest/getting_started/index.html).
155
+ * [Quick guide to start a Llama Stack server](https://llamastack.github.io/docs/getting_started/quickstart).
153
156
  * [Jupyter notebook](./docs/getting_started.ipynb) to walk-through how to use simple text and vision inference llama_stack_client APIs
154
157
  * The complete Llama Stack lesson [Colab notebook](https://colab.research.google.com/drive/1dtVmxotBsI4cGZQNsJRYPrLiDeT0Wnwt) of the new [Llama 3.2 course on Deeplearning.ai](https://learn.deeplearning.ai/courses/introducing-multimodal-llama-3-2/lesson/8/llama-stack).
155
158
  * A [Zero-to-Hero Guide](https://github.com/meta-llama/llama-stack/tree/main/docs/zero_to_hero_guide) that guide you through all the key components of llama stack with code samples.
156
159
  * [Contributing](CONTRIBUTING.md)
157
- * [Adding a new API Provider](https://llamastack.github.io/latest/contributing/new_api_provider.html) to walk-through how to add a new API provider.
160
+ * [Adding a new API Provider](https://llamastack.github.io/docs/contributing/new_api_provider) to walk-through how to add a new API provider.
158
161
 
159
162
  ### Llama Stack Client SDKs
160
163
 
164
+ Check out our client SDKs for connecting to a Llama Stack server in your preferred language.
165
+
161
166
  | **Language** | **Client SDK** | **Package** |
162
167
  | :----: | :----: | :----: |
163
168
  | Python | [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) | [![PyPI version](https://img.shields.io/pypi/v/llama_stack_client.svg)](https://pypi.org/project/llama_stack_client/)
@@ -165,11 +170,8 @@ Please checkout our [Documentation](https://llamastack.github.io/latest/index.ht
165
170
  | Typescript | [llama-stack-client-typescript](https://github.com/meta-llama/llama-stack-client-typescript) | [![NPM version](https://img.shields.io/npm/v/llama-stack-client.svg)](https://npmjs.org/package/llama-stack-client)
166
171
  | Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) | [![Maven version](https://img.shields.io/maven-central/v/com.llama.llamastack/llama-stack-client-kotlin)](https://central.sonatype.com/artifact/com.llama.llamastack/llama-stack-client-kotlin)
167
172
 
168
- Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
169
-
170
173
  You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
171
174
 
172
-
173
175
  ## 🌟 GitHub Star History
174
176
  ## Star History
175
177