llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. llama_stack/__init__.py +0 -5
  2. llama_stack/cli/llama.py +3 -3
  3. llama_stack/cli/stack/_list_deps.py +12 -23
  4. llama_stack/cli/stack/list_stacks.py +37 -18
  5. llama_stack/cli/stack/run.py +121 -11
  6. llama_stack/cli/stack/utils.py +0 -127
  7. llama_stack/core/access_control/access_control.py +69 -28
  8. llama_stack/core/access_control/conditions.py +15 -5
  9. llama_stack/core/admin.py +267 -0
  10. llama_stack/core/build.py +6 -74
  11. llama_stack/core/client.py +1 -1
  12. llama_stack/core/configure.py +6 -6
  13. llama_stack/core/conversations/conversations.py +28 -25
  14. llama_stack/core/datatypes.py +271 -79
  15. llama_stack/core/distribution.py +15 -16
  16. llama_stack/core/external.py +3 -3
  17. llama_stack/core/inspect.py +98 -15
  18. llama_stack/core/library_client.py +73 -61
  19. llama_stack/core/prompts/prompts.py +12 -11
  20. llama_stack/core/providers.py +17 -11
  21. llama_stack/core/resolver.py +65 -56
  22. llama_stack/core/routers/__init__.py +8 -12
  23. llama_stack/core/routers/datasets.py +1 -4
  24. llama_stack/core/routers/eval_scoring.py +7 -4
  25. llama_stack/core/routers/inference.py +55 -271
  26. llama_stack/core/routers/safety.py +52 -24
  27. llama_stack/core/routers/tool_runtime.py +6 -48
  28. llama_stack/core/routers/vector_io.py +130 -51
  29. llama_stack/core/routing_tables/benchmarks.py +24 -20
  30. llama_stack/core/routing_tables/common.py +1 -4
  31. llama_stack/core/routing_tables/datasets.py +22 -22
  32. llama_stack/core/routing_tables/models.py +119 -6
  33. llama_stack/core/routing_tables/scoring_functions.py +7 -7
  34. llama_stack/core/routing_tables/shields.py +1 -2
  35. llama_stack/core/routing_tables/toolgroups.py +17 -7
  36. llama_stack/core/routing_tables/vector_stores.py +51 -16
  37. llama_stack/core/server/auth.py +5 -3
  38. llama_stack/core/server/auth_providers.py +36 -20
  39. llama_stack/core/server/fastapi_router_registry.py +84 -0
  40. llama_stack/core/server/quota.py +2 -2
  41. llama_stack/core/server/routes.py +79 -27
  42. llama_stack/core/server/server.py +102 -87
  43. llama_stack/core/stack.py +201 -58
  44. llama_stack/core/storage/datatypes.py +26 -3
  45. llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
  46. llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
  47. llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
  48. llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
  49. llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
  50. llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
  51. llama_stack/core/storage/sqlstore/__init__.py +17 -0
  52. llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
  53. llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
  54. llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
  55. llama_stack/core/store/registry.py +1 -1
  56. llama_stack/core/utils/config.py +8 -2
  57. llama_stack/core/utils/config_resolution.py +32 -29
  58. llama_stack/core/utils/context.py +4 -10
  59. llama_stack/core/utils/exec.py +9 -0
  60. llama_stack/core/utils/type_inspection.py +45 -0
  61. llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
  62. llama_stack/distributions/dell/dell.py +2 -2
  63. llama_stack/distributions/dell/run-with-safety.yaml +3 -2
  64. llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
  65. llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
  66. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
  67. llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
  68. llama_stack/distributions/nvidia/nvidia.py +1 -1
  69. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
  70. llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
  71. llama_stack/distributions/oci/config.yaml +134 -0
  72. llama_stack/distributions/oci/oci.py +108 -0
  73. llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
  74. llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
  75. llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
  76. llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
  77. llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
  78. llama_stack/distributions/starter/starter.py +8 -5
  79. llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
  80. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
  81. llama_stack/distributions/template.py +13 -69
  82. llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
  83. llama_stack/distributions/watsonx/watsonx.py +1 -1
  84. llama_stack/log.py +28 -11
  85. llama_stack/models/llama/checkpoint.py +6 -6
  86. llama_stack/models/llama/hadamard_utils.py +2 -0
  87. llama_stack/models/llama/llama3/generation.py +3 -1
  88. llama_stack/models/llama/llama3/interface.py +2 -5
  89. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
  90. llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
  91. llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
  92. llama_stack/models/llama/llama3/tool_utils.py +2 -1
  93. llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
  94. llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
  95. llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
  96. llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
  97. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
  98. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
  99. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
  100. llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
  101. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
  102. llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
  103. llama_stack/providers/inline/batches/reference/__init__.py +2 -4
  104. llama_stack/providers/inline/batches/reference/batches.py +78 -60
  105. llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
  106. llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
  107. llama_stack/providers/inline/files/localfs/files.py +37 -28
  108. llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
  109. llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
  110. llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
  111. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
  112. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
  113. llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
  114. llama_stack/providers/inline/post_training/common/validator.py +1 -5
  115. llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
  116. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
  117. llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
  118. llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
  119. llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
  120. llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
  121. llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
  122. llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
  123. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
  124. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
  125. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
  126. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
  127. llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
  128. llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
  129. llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
  130. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
  131. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
  132. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
  133. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
  134. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
  135. llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
  136. llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
  137. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
  138. llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
  139. llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
  140. llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
  141. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
  142. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
  143. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
  144. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
  145. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
  146. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
  147. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
  148. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
  149. llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
  150. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
  151. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
  152. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
  153. llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
  154. llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
  155. llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
  156. llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
  157. llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
  158. llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
  159. llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
  160. llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
  161. llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
  162. llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
  163. llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
  164. llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
  165. llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
  166. llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
  167. llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
  168. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
  169. llama_stack/providers/registry/agents.py +7 -3
  170. llama_stack/providers/registry/batches.py +1 -1
  171. llama_stack/providers/registry/datasetio.py +1 -1
  172. llama_stack/providers/registry/eval.py +1 -1
  173. llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
  174. llama_stack/providers/registry/files.py +11 -2
  175. llama_stack/providers/registry/inference.py +22 -3
  176. llama_stack/providers/registry/post_training.py +1 -1
  177. llama_stack/providers/registry/safety.py +1 -1
  178. llama_stack/providers/registry/scoring.py +1 -1
  179. llama_stack/providers/registry/tool_runtime.py +2 -2
  180. llama_stack/providers/registry/vector_io.py +7 -7
  181. llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
  182. llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
  183. llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
  184. llama_stack/providers/remote/files/openai/__init__.py +19 -0
  185. llama_stack/providers/remote/files/openai/config.py +28 -0
  186. llama_stack/providers/remote/files/openai/files.py +253 -0
  187. llama_stack/providers/remote/files/s3/files.py +52 -30
  188. llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
  189. llama_stack/providers/remote/inference/anthropic/config.py +1 -1
  190. llama_stack/providers/remote/inference/azure/azure.py +1 -3
  191. llama_stack/providers/remote/inference/azure/config.py +8 -7
  192. llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
  193. llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
  194. llama_stack/providers/remote/inference/bedrock/config.py +24 -3
  195. llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
  196. llama_stack/providers/remote/inference/cerebras/config.py +12 -5
  197. llama_stack/providers/remote/inference/databricks/config.py +13 -6
  198. llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
  199. llama_stack/providers/remote/inference/fireworks/config.py +5 -5
  200. llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
  201. llama_stack/providers/remote/inference/gemini/config.py +1 -1
  202. llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
  203. llama_stack/providers/remote/inference/groq/config.py +5 -5
  204. llama_stack/providers/remote/inference/groq/groq.py +1 -1
  205. llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
  206. llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
  207. llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
  208. llama_stack/providers/remote/inference/nvidia/config.py +21 -11
  209. llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
  210. llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
  211. llama_stack/providers/remote/inference/oci/__init__.py +17 -0
  212. llama_stack/providers/remote/inference/oci/auth.py +79 -0
  213. llama_stack/providers/remote/inference/oci/config.py +75 -0
  214. llama_stack/providers/remote/inference/oci/oci.py +162 -0
  215. llama_stack/providers/remote/inference/ollama/config.py +7 -5
  216. llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
  217. llama_stack/providers/remote/inference/openai/config.py +4 -4
  218. llama_stack/providers/remote/inference/openai/openai.py +1 -1
  219. llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
  220. llama_stack/providers/remote/inference/passthrough/config.py +5 -10
  221. llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
  222. llama_stack/providers/remote/inference/runpod/config.py +12 -5
  223. llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
  224. llama_stack/providers/remote/inference/sambanova/config.py +5 -5
  225. llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
  226. llama_stack/providers/remote/inference/tgi/config.py +7 -6
  227. llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
  228. llama_stack/providers/remote/inference/together/config.py +5 -5
  229. llama_stack/providers/remote/inference/together/together.py +15 -12
  230. llama_stack/providers/remote/inference/vertexai/config.py +1 -1
  231. llama_stack/providers/remote/inference/vllm/config.py +5 -5
  232. llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
  233. llama_stack/providers/remote/inference/watsonx/config.py +4 -4
  234. llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
  235. llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
  236. llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
  237. llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
  238. llama_stack/providers/remote/safety/bedrock/config.py +1 -1
  239. llama_stack/providers/remote/safety/nvidia/config.py +1 -1
  240. llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
  241. llama_stack/providers/remote/safety/sambanova/config.py +1 -1
  242. llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
  243. llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
  244. llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
  245. llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
  246. llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
  247. llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
  248. llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
  249. llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
  250. llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
  251. llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
  252. llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
  253. llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
  254. llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
  255. llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
  256. llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
  257. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
  258. llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
  259. llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
  260. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
  261. llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
  262. llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
  263. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
  264. llama_stack/providers/utils/common/data_schema_validator.py +1 -5
  265. llama_stack/providers/utils/files/form_data.py +1 -1
  266. llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
  267. llama_stack/providers/utils/inference/inference_store.py +12 -21
  268. llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
  269. llama_stack/providers/utils/inference/model_registry.py +1 -3
  270. llama_stack/providers/utils/inference/openai_compat.py +44 -1171
  271. llama_stack/providers/utils/inference/openai_mixin.py +68 -42
  272. llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
  273. llama_stack/providers/utils/inference/stream_utils.py +23 -0
  274. llama_stack/providers/utils/memory/__init__.py +2 -0
  275. llama_stack/providers/utils/memory/file_utils.py +1 -1
  276. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
  277. llama_stack/providers/utils/memory/vector_store.py +39 -38
  278. llama_stack/providers/utils/pagination.py +1 -1
  279. llama_stack/providers/utils/responses/responses_store.py +15 -25
  280. llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
  281. llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
  282. llama_stack/providers/utils/tools/mcp.py +93 -11
  283. llama_stack/telemetry/constants.py +27 -0
  284. llama_stack/telemetry/helpers.py +43 -0
  285. llama_stack/testing/api_recorder.py +25 -16
  286. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
  287. llama_stack-0.4.0.dist-info/RECORD +588 -0
  288. llama_stack-0.4.0.dist-info/top_level.txt +2 -0
  289. llama_stack_api/__init__.py +945 -0
  290. llama_stack_api/admin/__init__.py +45 -0
  291. llama_stack_api/admin/api.py +72 -0
  292. llama_stack_api/admin/fastapi_routes.py +117 -0
  293. llama_stack_api/admin/models.py +113 -0
  294. llama_stack_api/agents.py +173 -0
  295. llama_stack_api/batches/__init__.py +40 -0
  296. llama_stack_api/batches/api.py +53 -0
  297. llama_stack_api/batches/fastapi_routes.py +113 -0
  298. llama_stack_api/batches/models.py +78 -0
  299. llama_stack_api/benchmarks/__init__.py +43 -0
  300. llama_stack_api/benchmarks/api.py +39 -0
  301. llama_stack_api/benchmarks/fastapi_routes.py +109 -0
  302. llama_stack_api/benchmarks/models.py +109 -0
  303. {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
  304. {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
  305. {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
  306. llama_stack_api/common/responses.py +77 -0
  307. {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
  308. {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
  309. llama_stack_api/connectors.py +146 -0
  310. {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
  311. {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
  312. llama_stack_api/datasets/__init__.py +61 -0
  313. llama_stack_api/datasets/api.py +35 -0
  314. llama_stack_api/datasets/fastapi_routes.py +104 -0
  315. llama_stack_api/datasets/models.py +152 -0
  316. {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
  317. {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
  318. llama_stack_api/file_processors/__init__.py +27 -0
  319. llama_stack_api/file_processors/api.py +64 -0
  320. llama_stack_api/file_processors/fastapi_routes.py +78 -0
  321. llama_stack_api/file_processors/models.py +42 -0
  322. llama_stack_api/files/__init__.py +35 -0
  323. llama_stack_api/files/api.py +51 -0
  324. llama_stack_api/files/fastapi_routes.py +124 -0
  325. llama_stack_api/files/models.py +107 -0
  326. {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
  327. llama_stack_api/inspect_api/__init__.py +37 -0
  328. llama_stack_api/inspect_api/api.py +25 -0
  329. llama_stack_api/inspect_api/fastapi_routes.py +76 -0
  330. llama_stack_api/inspect_api/models.py +28 -0
  331. {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
  332. llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
  333. llama_stack_api/internal/sqlstore.py +79 -0
  334. {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
  335. {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
  336. {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
  337. {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
  338. llama_stack_api/providers/__init__.py +33 -0
  339. llama_stack_api/providers/api.py +16 -0
  340. llama_stack_api/providers/fastapi_routes.py +57 -0
  341. llama_stack_api/providers/models.py +24 -0
  342. {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
  343. {llama_stack/apis → llama_stack_api}/resource.py +1 -1
  344. llama_stack_api/router_utils.py +160 -0
  345. {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
  346. {llama_stack → llama_stack_api}/schema_utils.py +94 -4
  347. {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
  348. {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
  349. {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
  350. {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
  351. {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
  352. {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
  353. llama_stack/apis/agents/agents.py +0 -894
  354. llama_stack/apis/batches/__init__.py +0 -9
  355. llama_stack/apis/batches/batches.py +0 -100
  356. llama_stack/apis/benchmarks/__init__.py +0 -7
  357. llama_stack/apis/benchmarks/benchmarks.py +0 -108
  358. llama_stack/apis/common/responses.py +0 -36
  359. llama_stack/apis/conversations/__init__.py +0 -31
  360. llama_stack/apis/datasets/datasets.py +0 -251
  361. llama_stack/apis/datatypes.py +0 -160
  362. llama_stack/apis/eval/__init__.py +0 -7
  363. llama_stack/apis/files/__init__.py +0 -7
  364. llama_stack/apis/files/files.py +0 -199
  365. llama_stack/apis/inference/__init__.py +0 -7
  366. llama_stack/apis/inference/event_logger.py +0 -43
  367. llama_stack/apis/inspect/__init__.py +0 -7
  368. llama_stack/apis/inspect/inspect.py +0 -94
  369. llama_stack/apis/models/__init__.py +0 -7
  370. llama_stack/apis/post_training/__init__.py +0 -7
  371. llama_stack/apis/prompts/__init__.py +0 -9
  372. llama_stack/apis/providers/__init__.py +0 -7
  373. llama_stack/apis/providers/providers.py +0 -69
  374. llama_stack/apis/safety/__init__.py +0 -7
  375. llama_stack/apis/scoring/__init__.py +0 -7
  376. llama_stack/apis/scoring_functions/__init__.py +0 -7
  377. llama_stack/apis/shields/__init__.py +0 -7
  378. llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
  379. llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
  380. llama_stack/apis/telemetry/__init__.py +0 -7
  381. llama_stack/apis/telemetry/telemetry.py +0 -423
  382. llama_stack/apis/tools/__init__.py +0 -8
  383. llama_stack/apis/vector_io/__init__.py +0 -7
  384. llama_stack/apis/vector_stores/__init__.py +0 -7
  385. llama_stack/core/server/tracing.py +0 -80
  386. llama_stack/core/ui/app.py +0 -55
  387. llama_stack/core/ui/modules/__init__.py +0 -5
  388. llama_stack/core/ui/modules/api.py +0 -32
  389. llama_stack/core/ui/modules/utils.py +0 -42
  390. llama_stack/core/ui/page/__init__.py +0 -5
  391. llama_stack/core/ui/page/distribution/__init__.py +0 -5
  392. llama_stack/core/ui/page/distribution/datasets.py +0 -18
  393. llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
  394. llama_stack/core/ui/page/distribution/models.py +0 -18
  395. llama_stack/core/ui/page/distribution/providers.py +0 -27
  396. llama_stack/core/ui/page/distribution/resources.py +0 -48
  397. llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
  398. llama_stack/core/ui/page/distribution/shields.py +0 -19
  399. llama_stack/core/ui/page/evaluations/__init__.py +0 -5
  400. llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
  401. llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
  402. llama_stack/core/ui/page/playground/__init__.py +0 -5
  403. llama_stack/core/ui/page/playground/chat.py +0 -130
  404. llama_stack/core/ui/page/playground/tools.py +0 -352
  405. llama_stack/distributions/dell/build.yaml +0 -33
  406. llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
  407. llama_stack/distributions/nvidia/build.yaml +0 -29
  408. llama_stack/distributions/open-benchmark/build.yaml +0 -36
  409. llama_stack/distributions/postgres-demo/__init__.py +0 -7
  410. llama_stack/distributions/postgres-demo/build.yaml +0 -23
  411. llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
  412. llama_stack/distributions/starter/build.yaml +0 -61
  413. llama_stack/distributions/starter-gpu/build.yaml +0 -61
  414. llama_stack/distributions/watsonx/build.yaml +0 -33
  415. llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
  416. llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
  417. llama_stack/providers/inline/telemetry/__init__.py +0 -5
  418. llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
  419. llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
  420. llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
  421. llama_stack/providers/remote/inference/bedrock/models.py +0 -29
  422. llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
  423. llama_stack/providers/utils/sqlstore/__init__.py +0 -5
  424. llama_stack/providers/utils/sqlstore/api.py +0 -128
  425. llama_stack/providers/utils/telemetry/__init__.py +0 -5
  426. llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
  427. llama_stack/providers/utils/telemetry/tracing.py +0 -384
  428. llama_stack/strong_typing/__init__.py +0 -19
  429. llama_stack/strong_typing/auxiliary.py +0 -228
  430. llama_stack/strong_typing/classdef.py +0 -440
  431. llama_stack/strong_typing/core.py +0 -46
  432. llama_stack/strong_typing/deserializer.py +0 -877
  433. llama_stack/strong_typing/docstring.py +0 -409
  434. llama_stack/strong_typing/exception.py +0 -23
  435. llama_stack/strong_typing/inspection.py +0 -1085
  436. llama_stack/strong_typing/mapping.py +0 -40
  437. llama_stack/strong_typing/name.py +0 -182
  438. llama_stack/strong_typing/schema.py +0 -792
  439. llama_stack/strong_typing/serialization.py +0 -97
  440. llama_stack/strong_typing/serializer.py +0 -500
  441. llama_stack/strong_typing/slots.py +0 -27
  442. llama_stack/strong_typing/topological.py +0 -89
  443. llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
  444. llama_stack-0.3.4.dist-info/RECORD +0 -625
  445. llama_stack-0.3.4.dist-info/top_level.txt +0 -1
  446. /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
  447. /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
  448. /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
  449. /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
  450. /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
  451. /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
  452. /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
  453. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
  454. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
  455. {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
  456. {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
  457. {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
  458. {llama_stack/apis → llama_stack_api}/version.py +0 -0
@@ -1,32 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import os
8
-
9
- from llama_stack_client import LlamaStackClient
10
-
11
-
12
- class LlamaStackApi:
13
- def __init__(self):
14
- self.client = LlamaStackClient(
15
- base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:8321"),
16
- provider_data={
17
- "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""),
18
- "together_api_key": os.environ.get("TOGETHER_API_KEY", ""),
19
- "sambanova_api_key": os.environ.get("SAMBANOVA_API_KEY", ""),
20
- "openai_api_key": os.environ.get("OPENAI_API_KEY", ""),
21
- "tavily_search_api_key": os.environ.get("TAVILY_SEARCH_API_KEY", ""),
22
- },
23
- )
24
-
25
- def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
26
- """Run scoring on a single row"""
27
- if not scoring_params:
28
- scoring_params = dict.fromkeys(scoring_function_ids)
29
- return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
30
-
31
-
32
- llama_stack_api = LlamaStackApi()
@@ -1,42 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import base64
8
- import os
9
-
10
- import pandas as pd
11
- import streamlit as st
12
-
13
-
14
- def process_dataset(file):
15
- if file is None:
16
- return "No file uploaded", None
17
-
18
- try:
19
- # Determine file type and read accordingly
20
- file_ext = os.path.splitext(file.name)[1].lower()
21
- if file_ext == ".csv":
22
- df = pd.read_csv(file)
23
- elif file_ext in [".xlsx", ".xls"]:
24
- df = pd.read_excel(file)
25
- else:
26
- return "Unsupported file format. Please upload a CSV or Excel file.", None
27
-
28
- return df
29
-
30
- except Exception as e:
31
- st.error(f"Error processing file: {str(e)}")
32
- return None
33
-
34
-
35
- def data_url_from_file(file) -> str:
36
- file_content = file.getvalue()
37
- base64_content = base64.b64encode(file_content).decode("utf-8")
38
- mime_type = file.type
39
-
40
- data_url = f"data:{mime_type};base64,{base64_content}"
41
-
42
- return data_url
@@ -1,5 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
@@ -1,5 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
@@ -1,18 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import streamlit as st
8
-
9
- from llama_stack.core.ui.modules.api import llama_stack_api
10
-
11
-
12
- def datasets():
13
- st.header("Datasets")
14
-
15
- datasets_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()}
16
- if len(datasets_info) > 0:
17
- selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
18
- st.json(datasets_info[selected_dataset], expanded=True)
@@ -1,20 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import streamlit as st
8
-
9
- from llama_stack.core.ui.modules.api import llama_stack_api
10
-
11
-
12
- def benchmarks():
13
- # Benchmarks Section
14
- st.header("Benchmarks")
15
-
16
- benchmarks_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.benchmarks.list()}
17
-
18
- if len(benchmarks_info) > 0:
19
- selected_benchmark = st.selectbox("Select an eval task", list(benchmarks_info.keys()), key="benchmark_inspect")
20
- st.json(benchmarks_info[selected_benchmark], expanded=True)
@@ -1,18 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import streamlit as st
8
-
9
- from llama_stack.core.ui.modules.api import llama_stack_api
10
-
11
-
12
- def models():
13
- # Models Section
14
- st.header("Models")
15
- models_info = {m.identifier: m.to_dict() for m in llama_stack_api.client.models.list()}
16
-
17
- selected_model = st.selectbox("Select a model", list(models_info.keys()))
18
- st.json(models_info[selected_model])
@@ -1,27 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import streamlit as st
8
-
9
- from llama_stack.core.ui.modules.api import llama_stack_api
10
-
11
-
12
- def providers():
13
- st.header("🔍 API Providers")
14
- apis_providers_lst = llama_stack_api.client.providers.list()
15
- api_to_providers = {}
16
- for api_provider in apis_providers_lst:
17
- if api_provider.api in api_to_providers:
18
- api_to_providers[api_provider.api].append(api_provider)
19
- else:
20
- api_to_providers[api_provider.api] = [api_provider]
21
-
22
- for api in api_to_providers.keys():
23
- st.markdown(f"###### {api}")
24
- st.dataframe([x.to_dict() for x in api_to_providers[api]], width=500)
25
-
26
-
27
- providers()
@@ -1,48 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- from streamlit_option_menu import option_menu
8
-
9
- from llama_stack.core.ui.page.distribution.datasets import datasets
10
- from llama_stack.core.ui.page.distribution.eval_tasks import benchmarks
11
- from llama_stack.core.ui.page.distribution.models import models
12
- from llama_stack.core.ui.page.distribution.scoring_functions import scoring_functions
13
- from llama_stack.core.ui.page.distribution.shields import shields
14
-
15
-
16
- def resources_page():
17
- options = [
18
- "Models",
19
- "Shields",
20
- "Scoring Functions",
21
- "Datasets",
22
- "Benchmarks",
23
- ]
24
- icons = ["magic", "shield", "file-bar-graph", "database", "list-task"]
25
- selected_resource = option_menu(
26
- None,
27
- options,
28
- icons=icons,
29
- orientation="horizontal",
30
- styles={
31
- "nav-link": {
32
- "font-size": "12px",
33
- },
34
- },
35
- )
36
- if selected_resource == "Benchmarks":
37
- benchmarks()
38
- elif selected_resource == "Datasets":
39
- datasets()
40
- elif selected_resource == "Models":
41
- models()
42
- elif selected_resource == "Scoring Functions":
43
- scoring_functions()
44
- elif selected_resource == "Shields":
45
- shields()
46
-
47
-
48
- resources_page()
@@ -1,18 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import streamlit as st
8
-
9
- from llama_stack.core.ui.modules.api import llama_stack_api
10
-
11
-
12
- def scoring_functions():
13
- st.header("Scoring Functions")
14
-
15
- scoring_functions_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.scoring_functions.list()}
16
-
17
- selected_scoring_function = st.selectbox("Select a scoring function", list(scoring_functions_info.keys()))
18
- st.json(scoring_functions_info[selected_scoring_function], expanded=True)
@@ -1,19 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import streamlit as st
8
-
9
- from llama_stack.core.ui.modules.api import llama_stack_api
10
-
11
-
12
- def shields():
13
- # Shields Section
14
- st.header("Shields")
15
-
16
- shields_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.shields.list()}
17
-
18
- selected_shield = st.selectbox("Select a shield", list(shields_info.keys()))
19
- st.json(shields_info[selected_shield])
@@ -1,5 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
@@ -1,143 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import json
8
-
9
- import pandas as pd
10
- import streamlit as st
11
-
12
- from llama_stack.core.ui.modules.api import llama_stack_api
13
- from llama_stack.core.ui.modules.utils import process_dataset
14
-
15
-
16
- def application_evaluation_page():
17
- st.set_page_config(page_title="Evaluations (Scoring)", page_icon="🦙")
18
- st.title("📊 Evaluations (Scoring)")
19
-
20
- # File uploader
21
- uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"])
22
-
23
- if uploaded_file is None:
24
- st.error("No file uploaded")
25
- return
26
-
27
- # Process uploaded file
28
- df = process_dataset(uploaded_file)
29
- if df is None:
30
- st.error("Error processing file")
31
- return
32
-
33
- # Display dataset information
34
- st.success("Dataset loaded successfully!")
35
-
36
- # Display dataframe preview
37
- st.subheader("Dataset Preview")
38
- st.dataframe(df)
39
-
40
- # Select Scoring Functions to Run Evaluation On
41
- st.subheader("Select Scoring Functions")
42
- scoring_functions = llama_stack_api.client.scoring_functions.list()
43
- scoring_functions = {sf.identifier: sf for sf in scoring_functions}
44
- scoring_functions_names = list(scoring_functions.keys())
45
- selected_scoring_functions = st.multiselect(
46
- "Choose one or more scoring functions",
47
- options=scoring_functions_names,
48
- help="Choose one or more scoring functions.",
49
- )
50
-
51
- available_models = llama_stack_api.client.models.list()
52
- available_models = [m.identifier for m in available_models]
53
-
54
- scoring_params = {}
55
- if selected_scoring_functions:
56
- st.write("Selected:")
57
- for scoring_fn_id in selected_scoring_functions:
58
- scoring_fn = scoring_functions[scoring_fn_id]
59
- st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}")
60
- new_params = None
61
- if scoring_fn.params:
62
- new_params = {}
63
- for param_name, param_value in scoring_fn.params.to_dict().items():
64
- if param_name == "type":
65
- new_params[param_name] = param_value
66
- continue
67
-
68
- if param_name == "judge_model":
69
- value = st.selectbox(
70
- f"Select **{param_name}** for {scoring_fn_id}",
71
- options=available_models,
72
- index=0,
73
- key=f"{scoring_fn_id}_{param_name}",
74
- )
75
- new_params[param_name] = value
76
- else:
77
- value = st.text_area(
78
- f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format",
79
- value=json.dumps(param_value, indent=2),
80
- height=80,
81
- )
82
- try:
83
- new_params[param_name] = json.loads(value)
84
- except json.JSONDecodeError:
85
- st.error(f"Invalid JSON for **{param_name}** in {scoring_fn_id}")
86
-
87
- st.json(new_params)
88
- scoring_params[scoring_fn_id] = new_params
89
-
90
- # Add run evaluation button & slider
91
- total_rows = len(df)
92
- num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows)
93
-
94
- if st.button("Run Evaluation"):
95
- progress_text = "Running evaluation..."
96
- progress_bar = st.progress(0, text=progress_text)
97
- rows = df.to_dict(orient="records")
98
- if num_rows < total_rows:
99
- rows = rows[:num_rows]
100
-
101
- # Create separate containers for progress text and results
102
- progress_text_container = st.empty()
103
- results_container = st.empty()
104
- output_res = {}
105
- for i, r in enumerate(rows):
106
- # Update progress
107
- progress = i / len(rows)
108
- progress_bar.progress(progress, text=progress_text)
109
-
110
- # Run evaluation for current row
111
- score_res = llama_stack_api.run_scoring(
112
- r,
113
- scoring_function_ids=selected_scoring_functions,
114
- scoring_params=scoring_params,
115
- )
116
-
117
- for k in r.keys():
118
- if k not in output_res:
119
- output_res[k] = []
120
- output_res[k].append(r[k])
121
-
122
- for fn_id in selected_scoring_functions:
123
- if fn_id not in output_res:
124
- output_res[fn_id] = []
125
- output_res[fn_id].append(score_res.results[fn_id].score_rows[0])
126
-
127
- # Display current row results using separate containers
128
- progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})")
129
- results_container.json(
130
- score_res.to_json(),
131
- expanded=2,
132
- )
133
-
134
- progress_bar.progress(1.0, text="Evaluation complete!")
135
-
136
- # Display results in dataframe
137
- if output_res:
138
- output_df = pd.DataFrame(output_res)
139
- st.subheader("Evaluation Results")
140
- st.dataframe(output_df)
141
-
142
-
143
- application_evaluation_page()
@@ -1,253 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- import json
8
-
9
- import pandas as pd
10
- import streamlit as st
11
-
12
- from llama_stack.core.ui.modules.api import llama_stack_api
13
-
14
-
15
- def select_benchmark_1():
16
- # Select Benchmarks
17
- st.subheader("1. Choose An Eval Task")
18
- benchmarks = llama_stack_api.client.benchmarks.list()
19
- benchmarks = {et.identifier: et for et in benchmarks}
20
- benchmarks_names = list(benchmarks.keys())
21
- selected_benchmark = st.selectbox(
22
- "Choose an eval task.",
23
- options=benchmarks_names,
24
- help="Choose an eval task. Each eval task is parameterized by a dataset, and list of scoring functions.",
25
- )
26
- with st.expander("View Eval Task"):
27
- st.json(benchmarks[selected_benchmark], expanded=True)
28
-
29
- st.session_state["selected_benchmark"] = selected_benchmark
30
- st.session_state["benchmarks"] = benchmarks
31
- if st.button("Confirm", key="confirm_1"):
32
- st.session_state["selected_benchmark_1_next"] = True
33
-
34
-
35
- def define_eval_candidate_2():
36
- if not st.session_state.get("selected_benchmark_1_next", None):
37
- return
38
-
39
- st.subheader("2. Define Eval Candidate")
40
- st.info(
41
- """
42
- Define the configurations for the evaluation candidate model or agent used for generation.
43
- Select "model" if you want to run generation with inference API, or "agent" if you want to run generation with agent API through specifying AgentConfig.
44
- """
45
- )
46
- with st.expander("Define Eval Candidate", expanded=True):
47
- # Define Eval Candidate
48
- candidate_type = st.radio("Candidate Type", ["model", "agent"])
49
-
50
- available_models = llama_stack_api.client.models.list()
51
- available_models = [model.identifier for model in available_models]
52
- selected_model = st.selectbox(
53
- "Choose a model",
54
- available_models,
55
- index=0,
56
- )
57
-
58
- # Sampling Parameters
59
- st.markdown("##### Sampling Parameters")
60
- temperature = st.slider(
61
- "Temperature",
62
- min_value=0.0,
63
- max_value=1.0,
64
- value=0.0,
65
- step=0.1,
66
- help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
67
- )
68
- top_p = st.slider(
69
- "Top P",
70
- min_value=0.0,
71
- max_value=1.0,
72
- value=0.95,
73
- step=0.1,
74
- )
75
- max_tokens = st.slider(
76
- "Max Tokens",
77
- min_value=0,
78
- max_value=4096,
79
- value=512,
80
- step=1,
81
- help="The maximum number of tokens to generate",
82
- )
83
- repetition_penalty = st.slider(
84
- "Repetition Penalty",
85
- min_value=1.0,
86
- max_value=2.0,
87
- value=1.0,
88
- step=0.1,
89
- help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
90
- )
91
- if candidate_type == "model":
92
- if temperature > 0.0:
93
- strategy = {
94
- "type": "top_p",
95
- "temperature": temperature,
96
- "top_p": top_p,
97
- }
98
- else:
99
- strategy = {"type": "greedy"}
100
-
101
- eval_candidate = {
102
- "type": "model",
103
- "model": selected_model,
104
- "sampling_params": {
105
- "strategy": strategy,
106
- "max_tokens": max_tokens,
107
- "repetition_penalty": repetition_penalty,
108
- },
109
- }
110
- elif candidate_type == "agent":
111
- system_prompt = st.text_area(
112
- "System Prompt",
113
- value="You are a helpful AI assistant.",
114
- help="Initial instructions given to the AI to set its behavior and context",
115
- )
116
- tools_json = st.text_area(
117
- "Tools Configuration (JSON)",
118
- value=json.dumps(
119
- [
120
- {
121
- "type": "brave_search",
122
- "engine": "brave",
123
- "api_key": "ENTER_BRAVE_API_KEY_HERE",
124
- }
125
- ]
126
- ),
127
- help="Enter tool configurations in JSON format. Each tool should have a name, description, and parameters.",
128
- height=200,
129
- )
130
- try:
131
- tools = json.loads(tools_json)
132
- except json.JSONDecodeError:
133
- st.error("Invalid JSON format for tools configuration")
134
- tools = []
135
- eval_candidate = {
136
- "type": "agent",
137
- "config": {
138
- "model": selected_model,
139
- "instructions": system_prompt,
140
- "tools": tools,
141
- "tool_choice": "auto",
142
- "tool_prompt_format": "json",
143
- "input_shields": [],
144
- "output_shields": [],
145
- "enable_session_persistence": False,
146
- },
147
- }
148
- st.session_state["eval_candidate"] = eval_candidate
149
-
150
- if st.button("Confirm", key="confirm_2"):
151
- st.session_state["selected_eval_candidate_2_next"] = True
152
-
153
-
154
- def run_evaluation_3():
155
- if not st.session_state.get("selected_eval_candidate_2_next", None):
156
- return
157
-
158
- st.subheader("3. Run Evaluation")
159
- # Add info box to explain configurations being used
160
- st.info(
161
- """
162
- Review the configurations that will be used for this evaluation run, make any necessary changes, and then click the "Run Evaluation" button.
163
- """
164
- )
165
- selected_benchmark = st.session_state["selected_benchmark"]
166
- benchmarks = st.session_state["benchmarks"]
167
- eval_candidate = st.session_state["eval_candidate"]
168
-
169
- dataset_id = benchmarks[selected_benchmark].dataset_id
170
- rows = llama_stack_api.client.datasets.iterrows(
171
- dataset_id=dataset_id,
172
- )
173
- total_rows = len(rows.data)
174
- # Add number of examples control
175
- num_rows = st.number_input(
176
- "Number of Examples to Evaluate",
177
- min_value=1,
178
- max_value=total_rows,
179
- value=5,
180
- help="Number of examples from the dataset to evaluate. ",
181
- )
182
-
183
- benchmark_config = {
184
- "type": "benchmark",
185
- "eval_candidate": eval_candidate,
186
- "scoring_params": {},
187
- }
188
-
189
- with st.expander("View Evaluation Task", expanded=True):
190
- st.json(benchmarks[selected_benchmark], expanded=True)
191
- with st.expander("View Evaluation Task Configuration", expanded=True):
192
- st.json(benchmark_config, expanded=True)
193
-
194
- # Add run button and handle evaluation
195
- if st.button("Run Evaluation"):
196
- progress_text = "Running evaluation..."
197
- progress_bar = st.progress(0, text=progress_text)
198
- rows = rows.data
199
- if num_rows < total_rows:
200
- rows = rows[:num_rows]
201
-
202
- # Create separate containers for progress text and results
203
- progress_text_container = st.empty()
204
- results_container = st.empty()
205
- output_res = {}
206
- for i, r in enumerate(rows):
207
- # Update progress
208
- progress = i / len(rows)
209
- progress_bar.progress(progress, text=progress_text)
210
- # Run evaluation for current row
211
- eval_res = llama_stack_api.client.eval.evaluate_rows(
212
- benchmark_id=selected_benchmark,
213
- input_rows=[r],
214
- scoring_functions=benchmarks[selected_benchmark].scoring_functions,
215
- benchmark_config=benchmark_config,
216
- )
217
-
218
- for k in r.keys():
219
- if k not in output_res:
220
- output_res[k] = []
221
- output_res[k].append(r[k])
222
-
223
- for k in eval_res.generations[0].keys():
224
- if k not in output_res:
225
- output_res[k] = []
226
- output_res[k].append(eval_res.generations[0][k])
227
-
228
- for scoring_fn in benchmarks[selected_benchmark].scoring_functions:
229
- if scoring_fn not in output_res:
230
- output_res[scoring_fn] = []
231
- output_res[scoring_fn].append(eval_res.scores[scoring_fn].score_rows[0])
232
-
233
- progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})")
234
- results_container.json(eval_res, expanded=2)
235
-
236
- progress_bar.progress(1.0, text="Evaluation complete!")
237
- # Display results in dataframe
238
- if output_res:
239
- output_df = pd.DataFrame(output_res)
240
- st.subheader("Evaluation Results")
241
- st.dataframe(output_df)
242
-
243
-
244
- def native_evaluation_page():
245
- st.set_page_config(page_title="Evaluations (Generation + Scoring)", page_icon="🦙")
246
- st.title("📊 Evaluations (Generation + Scoring)")
247
-
248
- select_benchmark_1()
249
- define_eval_candidate_2()
250
- run_evaluation_3()
251
-
252
-
253
- native_evaluation_page()
@@ -1,5 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.