llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +7 -8
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.5.dist-info/RECORD +0 -625
  445. llama_stack-0.3.5.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -1,130 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import streamlit as st
8
-
9
- from llama_stack.core.ui.modules.api import llama_stack_api
10
-
11
- # Sidebar configurations
12
- with st.sidebar:
13
- st.header("Configuration")
14
- available_models = llama_stack_api.client.models.list()
15
- available_models = [model.identifier for model in available_models if model.model_type == "llm"]
16
- selected_model = st.selectbox(
17
- "Choose a model",
18
- available_models,
19
- index=0,
20
- )
21
-
22
- temperature = st.slider(
23
- "Temperature",
24
- min_value=0.0,
25
- max_value=1.0,
26
- value=0.0,
27
- step=0.1,
28
- help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
29
- )
30
-
31
- top_p = st.slider(
32
- "Top P",
33
- min_value=0.0,
34
- max_value=1.0,
35
- value=0.95,
36
- step=0.1,
37
- )
38
-
39
- max_tokens = st.slider(
40
- "Max Tokens",
41
- min_value=0,
42
- max_value=4096,
43
- value=512,
44
- step=1,
45
- help="The maximum number of tokens to generate",
46
- )
47
-
48
- repetition_penalty = st.slider(
49
- "Repetition Penalty",
50
- min_value=1.0,
51
- max_value=2.0,
52
- value=1.0,
53
- step=0.1,
54
- help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
55
- )
56
-
57
- stream = st.checkbox("Stream", value=True)
58
- system_prompt = st.text_area(
59
- "System Prompt",
60
- value="You are a helpful AI assistant.",
61
- help="Initial instructions given to the AI to set its behavior and context",
62
- )
63
-
64
- # Add clear chat button to sidebar
65
- if st.button("Clear Chat", use_container_width=True):
66
- st.session_state.messages = []
67
- st.rerun()
68
-
69
-
70
- # Main chat interface
71
- st.title("🦙 Chat")
72
-
73
-
74
- # Initialize chat history
75
- if "messages" not in st.session_state:
76
- st.session_state.messages = []
77
-
78
- # Display chat messages
79
- for message in st.session_state.messages:
80
- with st.chat_message(message["role"]):
81
- st.markdown(message["content"])
82
-
83
- # Chat input
84
- if prompt := st.chat_input("Example: What is Llama Stack?"):
85
- # Add user message to chat history
86
- st.session_state.messages.append({"role": "user", "content": prompt})
87
-
88
- # Display user message
89
- with st.chat_message("user"):
90
- st.markdown(prompt)
91
-
92
- # Display assistant response
93
- with st.chat_message("assistant"):
94
- message_placeholder = st.empty()
95
- full_response = ""
96
-
97
- if temperature > 0.0:
98
- strategy = {
99
- "type": "top_p",
100
- "temperature": temperature,
101
- "top_p": top_p,
102
- }
103
- else:
104
- strategy = {"type": "greedy"}
105
-
106
- response = llama_stack_api.client.inference.chat_completion(
107
- messages=[
108
- {"role": "system", "content": system_prompt},
109
- {"role": "user", "content": prompt},
110
- ],
111
- model_id=selected_model,
112
- stream=stream,
113
- sampling_params={
114
- "strategy": strategy,
115
- "max_tokens": max_tokens,
116
- "repetition_penalty": repetition_penalty,
117
- },
118
- )
119
-
120
- if stream:
121
- for chunk in response:
122
- if chunk.event.event_type == "progress":
123
- full_response += chunk.event.delta.text
124
- message_placeholder.markdown(full_response + "▌")
125
- message_placeholder.markdown(full_response)
126
- else:
127
- full_response = response.completion_message.content
128
- message_placeholder.markdown(full_response)
129
-
130
- st.session_state.messages.append({"role": "assistant", "content": full_response})
@@ -1,352 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import enum
8
- import json
9
- import uuid
10
-
11
- import streamlit as st
12
- from llama_stack_client import Agent
13
- from llama_stack_client.lib.agents.react.agent import ReActAgent
14
- from llama_stack_client.lib.agents.react.tool_parser import ReActOutput
15
-
16
- from llama_stack.core.ui.modules.api import llama_stack_api
17
-
18
-
19
- class AgentType(enum.Enum):
20
- REGULAR = "Regular"
21
- REACT = "ReAct"
22
-
23
-
24
- def tool_chat_page():
25
- st.title("🛠 Tools")
26
-
27
- client = llama_stack_api.client
28
- models = client.models.list()
29
- model_list = [model.identifier for model in models if model.api_model_type == "llm"]
30
-
31
- tool_groups = client.toolgroups.list()
32
- tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
33
- mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
34
- builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
35
- selected_vector_stores = []
36
-
37
- def reset_agent():
38
- st.session_state.clear()
39
- st.cache_resource.clear()
40
-
41
- with st.sidebar:
42
- st.title("Configuration")
43
- st.subheader("Model")
44
- model = st.selectbox(label="Model", options=model_list, on_change=reset_agent, label_visibility="collapsed")
45
-
46
- st.subheader("Available ToolGroups")
47
-
48
- toolgroup_selection = st.pills(
49
- label="Built-in tools",
50
- options=builtin_tools_list,
51
- selection_mode="multi",
52
- on_change=reset_agent,
53
- format_func=lambda tool: "".join(tool.split("::")[1:]),
54
- help="List of built-in tools from your llama stack server.",
55
- )
56
-
57
- if "builtin::rag" in toolgroup_selection:
58
- vector_stores = llama_stack_api.client.vector_stores.list() or []
59
- if not vector_stores:
60
- st.info("No vector databases available for selection.")
61
- vector_stores = [vector_store.identifier for vector_store in vector_stores]
62
- selected_vector_stores = st.multiselect(
63
- label="Select Document Collections to use in RAG queries",
64
- options=vector_stores,
65
- on_change=reset_agent,
66
- )
67
-
68
- mcp_selection = st.pills(
69
- label="MCP Servers",
70
- options=mcp_tools_list,
71
- selection_mode="multi",
72
- on_change=reset_agent,
73
- format_func=lambda tool: "".join(tool.split("::")[1:]),
74
- help="List of MCP servers registered to your llama stack server.",
75
- )
76
-
77
- toolgroup_selection.extend(mcp_selection)
78
-
79
- grouped_tools = {}
80
- total_tools = 0
81
-
82
- for toolgroup_id in toolgroup_selection:
83
- tools = client.tools.list(toolgroup_id=toolgroup_id)
84
- grouped_tools[toolgroup_id] = [tool.name for tool in tools]
85
- total_tools += len(tools)
86
-
87
- st.markdown(f"Active Tools: 🛠 {total_tools}")
88
-
89
- for group_id, tools in grouped_tools.items():
90
- with st.expander(f"🔧 Tools from `{group_id}`"):
91
- for idx, tool in enumerate(tools, start=1):
92
- st.markdown(f"{idx}. `{tool.split(':')[-1]}`")
93
-
94
- st.subheader("Agent Configurations")
95
- st.subheader("Agent Type")
96
- agent_type = st.radio(
97
- label="Select Agent Type",
98
- options=["Regular", "ReAct"],
99
- on_change=reset_agent,
100
- )
101
-
102
- if agent_type == "ReAct":
103
- agent_type = AgentType.REACT
104
- else:
105
- agent_type = AgentType.REGULAR
106
-
107
- max_tokens = st.slider(
108
- "Max Tokens",
109
- min_value=0,
110
- max_value=4096,
111
- value=512,
112
- step=64,
113
- help="The maximum number of tokens to generate",
114
- on_change=reset_agent,
115
- )
116
-
117
- for i, tool_name in enumerate(toolgroup_selection):
118
- if tool_name == "builtin::rag":
119
- tool_dict = dict(
120
- name="builtin::rag",
121
- args={
122
- "vector_store_ids": list(selected_vector_stores),
123
- },
124
- )
125
- toolgroup_selection[i] = tool_dict
126
-
127
- @st.cache_resource
128
- def create_agent():
129
- if "agent_type" in st.session_state and st.session_state.agent_type == AgentType.REACT:
130
- return ReActAgent(
131
- client=client,
132
- model=model,
133
- tools=toolgroup_selection,
134
- response_format={
135
- "type": "json_schema",
136
- "json_schema": ReActOutput.model_json_schema(),
137
- },
138
- sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
139
- )
140
- else:
141
- return Agent(
142
- client,
143
- model=model,
144
- instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
145
- tools=toolgroup_selection,
146
- sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
147
- )
148
-
149
- st.session_state.agent_type = agent_type
150
-
151
- agent = create_agent()
152
-
153
- if "agent_session_id" not in st.session_state:
154
- st.session_state["agent_session_id"] = agent.create_session(session_name=f"tool_demo_{uuid.uuid4()}")
155
-
156
- session_id = st.session_state["agent_session_id"]
157
-
158
- if "messages" not in st.session_state:
159
- st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
160
-
161
- for msg in st.session_state.messages:
162
- with st.chat_message(msg["role"]):
163
- st.markdown(msg["content"])
164
-
165
- if prompt := st.chat_input(placeholder=""):
166
- with st.chat_message("user"):
167
- st.markdown(prompt)
168
-
169
- st.session_state.messages.append({"role": "user", "content": prompt})
170
-
171
- turn_response = agent.create_turn(
172
- session_id=session_id,
173
- messages=[{"role": "user", "content": prompt}],
174
- stream=True,
175
- )
176
-
177
- def response_generator(turn_response):
178
- if st.session_state.get("agent_type") == AgentType.REACT:
179
- return _handle_react_response(turn_response)
180
- else:
181
- return _handle_regular_response(turn_response)
182
-
183
- def _handle_react_response(turn_response):
184
- current_step_content = ""
185
- final_answer = None
186
- tool_results = []
187
-
188
- for response in turn_response:
189
- if not hasattr(response.event, "payload"):
190
- yield (
191
- "\n\n🚨 :red[_Llama Stack server Error:_]\n"
192
- "The response received is missing an expected `payload` attribute.\n"
193
- "This could indicate a malformed response or an internal issue within the server.\n\n"
194
- f"Error details: {response}"
195
- )
196
- return
197
-
198
- payload = response.event.payload
199
-
200
- if payload.event_type == "step_progress" and hasattr(payload.delta, "text"):
201
- current_step_content += payload.delta.text
202
- continue
203
-
204
- if payload.event_type == "step_complete":
205
- step_details = payload.step_details
206
-
207
- if step_details.step_type == "inference":
208
- yield from _process_inference_step(current_step_content, tool_results, final_answer)
209
- current_step_content = ""
210
- elif step_details.step_type == "tool_execution":
211
- tool_results = _process_tool_execution(step_details, tool_results)
212
- current_step_content = ""
213
- else:
214
- current_step_content = ""
215
-
216
- if not final_answer and tool_results:
217
- yield from _format_tool_results_summary(tool_results)
218
-
219
- def _process_inference_step(current_step_content, tool_results, final_answer):
220
- try:
221
- react_output_data = json.loads(current_step_content)
222
- thought = react_output_data.get("thought")
223
- action = react_output_data.get("action")
224
- answer = react_output_data.get("answer")
225
-
226
- if answer and answer != "null" and answer is not None:
227
- final_answer = answer
228
-
229
- if thought:
230
- with st.expander("🤔 Thinking...", expanded=False):
231
- st.markdown(f":grey[__{thought}__]")
232
-
233
- if action and isinstance(action, dict):
234
- tool_name = action.get("tool_name")
235
- tool_params = action.get("tool_params")
236
- with st.expander(f'🛠 Action: Using tool "{tool_name}"', expanded=False):
237
- st.json(tool_params)
238
-
239
- if answer and answer != "null" and answer is not None:
240
- yield f"\n\n✅ **Final Answer:**\n{answer}"
241
-
242
- except json.JSONDecodeError:
243
- yield f"\n\nFailed to parse ReAct step content:\n```json\n{current_step_content}\n```"
244
- except Exception as e:
245
- yield f"\n\nFailed to process ReAct step: {e}\n```json\n{current_step_content}\n```"
246
-
247
- return final_answer
248
-
249
- def _process_tool_execution(step_details, tool_results):
250
- try:
251
- if hasattr(step_details, "tool_responses") and step_details.tool_responses:
252
- for tool_response in step_details.tool_responses:
253
- tool_name = tool_response.tool_name
254
- content = tool_response.content
255
- tool_results.append((tool_name, content))
256
- with st.expander(f'⚙️ Observation (Result from "{tool_name}")', expanded=False):
257
- try:
258
- parsed_content = json.loads(content)
259
- st.json(parsed_content)
260
- except json.JSONDecodeError:
261
- st.code(content, language=None)
262
- else:
263
- with st.expander("⚙️ Observation", expanded=False):
264
- st.markdown(":grey[_Tool execution step completed, but no response data found._]")
265
- except Exception as e:
266
- with st.expander("⚙️ Error in Tool Execution", expanded=False):
267
- st.markdown(f":red[_Error processing tool execution: {str(e)}_]")
268
-
269
- return tool_results
270
-
271
- def _format_tool_results_summary(tool_results):
272
- yield "\n\n**Here's what I found:**\n"
273
- for tool_name, content in tool_results:
274
- try:
275
- parsed_content = json.loads(content)
276
-
277
- if tool_name == "web_search" and "top_k" in parsed_content:
278
- yield from _format_web_search_results(parsed_content)
279
- elif "results" in parsed_content and isinstance(parsed_content["results"], list):
280
- yield from _format_results_list(parsed_content["results"])
281
- elif isinstance(parsed_content, dict) and len(parsed_content) > 0:
282
- yield from _format_dict_results(parsed_content)
283
- elif isinstance(parsed_content, list) and len(parsed_content) > 0:
284
- yield from _format_list_results(parsed_content)
285
- except json.JSONDecodeError:
286
- yield f"\n**{tool_name}** was used but returned complex data. Check the observation for details.\n"
287
- except (TypeError, AttributeError, KeyError, IndexError) as e:
288
- print(f"Error processing {tool_name} result: {type(e).__name__}: {e}")
289
-
290
- def _format_web_search_results(parsed_content):
291
- for i, result in enumerate(parsed_content["top_k"], 1):
292
- if i <= 3:
293
- title = result.get("title", "Untitled")
294
- url = result.get("url", "")
295
- content_text = result.get("content", "").strip()
296
- yield f"\n- **{title}**\n {content_text}\n [Source]({url})\n"
297
-
298
- def _format_results_list(results):
299
- for i, result in enumerate(results, 1):
300
- if i <= 3:
301
- if isinstance(result, dict):
302
- name = result.get("name", result.get("title", "Result " + str(i)))
303
- description = result.get("description", result.get("content", result.get("summary", "")))
304
- yield f"\n- **{name}**\n {description}\n"
305
- else:
306
- yield f"\n- {result}\n"
307
-
308
- def _format_dict_results(parsed_content):
309
- yield "\n```\n"
310
- for key, value in list(parsed_content.items())[:5]:
311
- if isinstance(value, str) and len(value) < 100:
312
- yield f"{key}: {value}\n"
313
- else:
314
- yield f"{key}: [Complex data]\n"
315
- yield "```\n"
316
-
317
- def _format_list_results(parsed_content):
318
- yield "\n"
319
- for _, item in enumerate(parsed_content[:3], 1):
320
- if isinstance(item, str):
321
- yield f"- {item}\n"
322
- elif isinstance(item, dict) and "text" in item:
323
- yield f"- {item['text']}\n"
324
- elif isinstance(item, dict) and len(item) > 0:
325
- first_value = next(iter(item.values()))
326
- if isinstance(first_value, str) and len(first_value) < 100:
327
- yield f"- {first_value}\n"
328
-
329
- def _handle_regular_response(turn_response):
330
- for response in turn_response:
331
- if hasattr(response.event, "payload"):
332
- print(response.event.payload)
333
- if response.event.payload.event_type == "step_progress":
334
- if hasattr(response.event.payload.delta, "text"):
335
- yield response.event.payload.delta.text
336
- if response.event.payload.event_type == "step_complete":
337
- if response.event.payload.step_details.step_type == "tool_execution":
338
- if response.event.payload.step_details.tool_calls:
339
- tool_name = str(response.event.payload.step_details.tool_calls[0].tool_name)
340
- yield f'\n\n🛠 :grey[_Using "{tool_name}" tool:_]\n\n'
341
- else:
342
- yield "No tool_calls present in step_details"
343
- else:
344
- yield f"Error occurred in the Llama Stack Cluster: {response}"
345
-
346
- with st.chat_message("assistant"):
347
- response_content = st.write_stream(response_generator(turn_response))
348
-
349
- st.session_state.messages.append({"role": "assistant", "content": response_content})
350
-
351
-
352
- tool_chat_page()
@@ -1,33 +0,0 @@
1
- version: 2
2
- distribution_spec:
3
- description: Dell's distribution of Llama Stack. TGI inference via Dell's custom
4
- container
5
- providers:
6
- inference:
7
- - provider_type: remote::tgi
8
- - provider_type: inline::sentence-transformers
9
- vector_io:
10
- - provider_type: inline::faiss
11
- - provider_type: remote::chromadb
12
- - provider_type: remote::pgvector
13
- safety:
14
- - provider_type: inline::llama-guard
15
- agents:
16
- - provider_type: inline::meta-reference
17
- eval:
18
- - provider_type: inline::meta-reference
19
- datasetio:
20
- - provider_type: remote::huggingface
21
- - provider_type: inline::localfs
22
- scoring:
23
- - provider_type: inline::basic
24
- - provider_type: inline::llm-as-judge
25
- - provider_type: inline::braintrust
26
- tool_runtime:
27
- - provider_type: remote::brave-search
28
- - provider_type: remote::tavily-search
29
- - provider_type: inline::rag-runtime
30
- image_type: venv
31
- additional_pip_packages:
32
- - aiosqlite
33
- - sqlalchemy[asyncio]
@@ -1,32 +0,0 @@
1
- version: 2
2
- distribution_spec:
3
- description: Use Meta Reference for running LLM inference
4
- providers:
5
- inference:
6
- - provider_type: inline::meta-reference
7
- vector_io:
8
- - provider_type: inline::faiss
9
- - provider_type: remote::chromadb
10
- - provider_type: remote::pgvector
11
- safety:
12
- - provider_type: inline::llama-guard
13
- agents:
14
- - provider_type: inline::meta-reference
15
- eval:
16
- - provider_type: inline::meta-reference
17
- datasetio:
18
- - provider_type: remote::huggingface
19
- - provider_type: inline::localfs
20
- scoring:
21
- - provider_type: inline::basic
22
- - provider_type: inline::llm-as-judge
23
- - provider_type: inline::braintrust
24
- tool_runtime:
25
- - provider_type: remote::brave-search
26
- - provider_type: remote::tavily-search
27
- - provider_type: inline::rag-runtime
28
- - provider_type: remote::model-context-protocol
29
- image_type: venv
30
- additional_pip_packages:
31
- - aiosqlite
32
- - sqlalchemy[asyncio]
@@ -1,29 +0,0 @@
1
- version: 2
2
- distribution_spec:
3
- description: Use NVIDIA NIM for running LLM inference, evaluation and safety
4
- providers:
5
- inference:
6
- - provider_type: remote::nvidia
7
- vector_io:
8
- - provider_type: inline::faiss
9
- safety:
10
- - provider_type: remote::nvidia
11
- agents:
12
- - provider_type: inline::meta-reference
13
- eval:
14
- - provider_type: remote::nvidia
15
- post_training:
16
- - provider_type: remote::nvidia
17
- datasetio:
18
- - provider_type: inline::localfs
19
- - provider_type: remote::nvidia
20
- scoring:
21
- - provider_type: inline::basic
22
- tool_runtime:
23
- - provider_type: inline::rag-runtime
24
- files:
25
- - provider_type: inline::localfs
26
- image_type: venv
27
- additional_pip_packages:
28
- - aiosqlite
29
- - sqlalchemy[asyncio]
@@ -1,36 +0,0 @@
1
- version: 2
2
- distribution_spec:
3
- description: Distribution for running open benchmarks
4
- providers:
5
- inference:
6
- - provider_type: remote::openai
7
- - provider_type: remote::anthropic
8
- - provider_type: remote::gemini
9
- - provider_type: remote::groq
10
- - provider_type: remote::together
11
- vector_io:
12
- - provider_type: inline::sqlite-vec
13
- - provider_type: remote::chromadb
14
- - provider_type: remote::pgvector
15
- safety:
16
- - provider_type: inline::llama-guard
17
- agents:
18
- - provider_type: inline::meta-reference
19
- eval:
20
- - provider_type: inline::meta-reference
21
- datasetio:
22
- - provider_type: remote::huggingface
23
- - provider_type: inline::localfs
24
- scoring:
25
- - provider_type: inline::basic
26
- - provider_type: inline::llm-as-judge
27
- - provider_type: inline::braintrust
28
- tool_runtime:
29
- - provider_type: remote::brave-search
30
- - provider_type: remote::tavily-search
31
- - provider_type: inline::rag-runtime
32
- - provider_type: remote::model-context-protocol
33
- image_type: venv
34
- additional_pip_packages:
35
- - aiosqlite
36
- - sqlalchemy[asyncio]
@@ -1,7 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- from .postgres_demo import get_distribution_template # noqa: F401
@@ -1,23 +0,0 @@
1
- version: 2
2
- distribution_spec:
3
- description: Quick start template for running Llama Stack with several popular providers
4
- providers:
5
- inference:
6
- - provider_type: remote::vllm
7
- - provider_type: inline::sentence-transformers
8
- vector_io:
9
- - provider_type: remote::chromadb
10
- safety:
11
- - provider_type: inline::llama-guard
12
- agents:
13
- - provider_type: inline::meta-reference
14
- tool_runtime:
15
- - provider_type: remote::brave-search
16
- - provider_type: remote::tavily-search
17
- - provider_type: inline::rag-runtime
18
- - provider_type: remote::model-context-protocol
19
- image_type: venv
20
- additional_pip_packages:
21
- - asyncpg
22
- - psycopg2-binary
23
- - sqlalchemy[asyncio]