llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import streamlit as st
|
|
8
|
-
|
|
9
|
-
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
10
|
-
|
|
11
|
-
# Sidebar configurations
|
|
12
|
-
with st.sidebar:
|
|
13
|
-
st.header("Configuration")
|
|
14
|
-
available_models = llama_stack_api.client.models.list()
|
|
15
|
-
available_models = [model.identifier for model in available_models if model.model_type == "llm"]
|
|
16
|
-
selected_model = st.selectbox(
|
|
17
|
-
"Choose a model",
|
|
18
|
-
available_models,
|
|
19
|
-
index=0,
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
temperature = st.slider(
|
|
23
|
-
"Temperature",
|
|
24
|
-
min_value=0.0,
|
|
25
|
-
max_value=1.0,
|
|
26
|
-
value=0.0,
|
|
27
|
-
step=0.1,
|
|
28
|
-
help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
top_p = st.slider(
|
|
32
|
-
"Top P",
|
|
33
|
-
min_value=0.0,
|
|
34
|
-
max_value=1.0,
|
|
35
|
-
value=0.95,
|
|
36
|
-
step=0.1,
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
max_tokens = st.slider(
|
|
40
|
-
"Max Tokens",
|
|
41
|
-
min_value=0,
|
|
42
|
-
max_value=4096,
|
|
43
|
-
value=512,
|
|
44
|
-
step=1,
|
|
45
|
-
help="The maximum number of tokens to generate",
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
repetition_penalty = st.slider(
|
|
49
|
-
"Repetition Penalty",
|
|
50
|
-
min_value=1.0,
|
|
51
|
-
max_value=2.0,
|
|
52
|
-
value=1.0,
|
|
53
|
-
step=0.1,
|
|
54
|
-
help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
stream = st.checkbox("Stream", value=True)
|
|
58
|
-
system_prompt = st.text_area(
|
|
59
|
-
"System Prompt",
|
|
60
|
-
value="You are a helpful AI assistant.",
|
|
61
|
-
help="Initial instructions given to the AI to set its behavior and context",
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
# Add clear chat button to sidebar
|
|
65
|
-
if st.button("Clear Chat", use_container_width=True):
|
|
66
|
-
st.session_state.messages = []
|
|
67
|
-
st.rerun()
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
# Main chat interface
|
|
71
|
-
st.title("🦙 Chat")
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
# Initialize chat history
|
|
75
|
-
if "messages" not in st.session_state:
|
|
76
|
-
st.session_state.messages = []
|
|
77
|
-
|
|
78
|
-
# Display chat messages
|
|
79
|
-
for message in st.session_state.messages:
|
|
80
|
-
with st.chat_message(message["role"]):
|
|
81
|
-
st.markdown(message["content"])
|
|
82
|
-
|
|
83
|
-
# Chat input
|
|
84
|
-
if prompt := st.chat_input("Example: What is Llama Stack?"):
|
|
85
|
-
# Add user message to chat history
|
|
86
|
-
st.session_state.messages.append({"role": "user", "content": prompt})
|
|
87
|
-
|
|
88
|
-
# Display user message
|
|
89
|
-
with st.chat_message("user"):
|
|
90
|
-
st.markdown(prompt)
|
|
91
|
-
|
|
92
|
-
# Display assistant response
|
|
93
|
-
with st.chat_message("assistant"):
|
|
94
|
-
message_placeholder = st.empty()
|
|
95
|
-
full_response = ""
|
|
96
|
-
|
|
97
|
-
if temperature > 0.0:
|
|
98
|
-
strategy = {
|
|
99
|
-
"type": "top_p",
|
|
100
|
-
"temperature": temperature,
|
|
101
|
-
"top_p": top_p,
|
|
102
|
-
}
|
|
103
|
-
else:
|
|
104
|
-
strategy = {"type": "greedy"}
|
|
105
|
-
|
|
106
|
-
response = llama_stack_api.client.inference.chat_completion(
|
|
107
|
-
messages=[
|
|
108
|
-
{"role": "system", "content": system_prompt},
|
|
109
|
-
{"role": "user", "content": prompt},
|
|
110
|
-
],
|
|
111
|
-
model_id=selected_model,
|
|
112
|
-
stream=stream,
|
|
113
|
-
sampling_params={
|
|
114
|
-
"strategy": strategy,
|
|
115
|
-
"max_tokens": max_tokens,
|
|
116
|
-
"repetition_penalty": repetition_penalty,
|
|
117
|
-
},
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
if stream:
|
|
121
|
-
for chunk in response:
|
|
122
|
-
if chunk.event.event_type == "progress":
|
|
123
|
-
full_response += chunk.event.delta.text
|
|
124
|
-
message_placeholder.markdown(full_response + "▌")
|
|
125
|
-
message_placeholder.markdown(full_response)
|
|
126
|
-
else:
|
|
127
|
-
full_response = response.completion_message.content
|
|
128
|
-
message_placeholder.markdown(full_response)
|
|
129
|
-
|
|
130
|
-
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
|
@@ -1,352 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import enum
|
|
8
|
-
import json
|
|
9
|
-
import uuid
|
|
10
|
-
|
|
11
|
-
import streamlit as st
|
|
12
|
-
from llama_stack_client import Agent
|
|
13
|
-
from llama_stack_client.lib.agents.react.agent import ReActAgent
|
|
14
|
-
from llama_stack_client.lib.agents.react.tool_parser import ReActOutput
|
|
15
|
-
|
|
16
|
-
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class AgentType(enum.Enum):
|
|
20
|
-
REGULAR = "Regular"
|
|
21
|
-
REACT = "ReAct"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def tool_chat_page():
|
|
25
|
-
st.title("🛠 Tools")
|
|
26
|
-
|
|
27
|
-
client = llama_stack_api.client
|
|
28
|
-
models = client.models.list()
|
|
29
|
-
model_list = [model.identifier for model in models if model.api_model_type == "llm"]
|
|
30
|
-
|
|
31
|
-
tool_groups = client.toolgroups.list()
|
|
32
|
-
tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
|
|
33
|
-
mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
|
|
34
|
-
builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
|
|
35
|
-
selected_vector_stores = []
|
|
36
|
-
|
|
37
|
-
def reset_agent():
|
|
38
|
-
st.session_state.clear()
|
|
39
|
-
st.cache_resource.clear()
|
|
40
|
-
|
|
41
|
-
with st.sidebar:
|
|
42
|
-
st.title("Configuration")
|
|
43
|
-
st.subheader("Model")
|
|
44
|
-
model = st.selectbox(label="Model", options=model_list, on_change=reset_agent, label_visibility="collapsed")
|
|
45
|
-
|
|
46
|
-
st.subheader("Available ToolGroups")
|
|
47
|
-
|
|
48
|
-
toolgroup_selection = st.pills(
|
|
49
|
-
label="Built-in tools",
|
|
50
|
-
options=builtin_tools_list,
|
|
51
|
-
selection_mode="multi",
|
|
52
|
-
on_change=reset_agent,
|
|
53
|
-
format_func=lambda tool: "".join(tool.split("::")[1:]),
|
|
54
|
-
help="List of built-in tools from your llama stack server.",
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
if "builtin::rag" in toolgroup_selection:
|
|
58
|
-
vector_stores = llama_stack_api.client.vector_stores.list() or []
|
|
59
|
-
if not vector_stores:
|
|
60
|
-
st.info("No vector databases available for selection.")
|
|
61
|
-
vector_stores = [vector_store.identifier for vector_store in vector_stores]
|
|
62
|
-
selected_vector_stores = st.multiselect(
|
|
63
|
-
label="Select Document Collections to use in RAG queries",
|
|
64
|
-
options=vector_stores,
|
|
65
|
-
on_change=reset_agent,
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
mcp_selection = st.pills(
|
|
69
|
-
label="MCP Servers",
|
|
70
|
-
options=mcp_tools_list,
|
|
71
|
-
selection_mode="multi",
|
|
72
|
-
on_change=reset_agent,
|
|
73
|
-
format_func=lambda tool: "".join(tool.split("::")[1:]),
|
|
74
|
-
help="List of MCP servers registered to your llama stack server.",
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
toolgroup_selection.extend(mcp_selection)
|
|
78
|
-
|
|
79
|
-
grouped_tools = {}
|
|
80
|
-
total_tools = 0
|
|
81
|
-
|
|
82
|
-
for toolgroup_id in toolgroup_selection:
|
|
83
|
-
tools = client.tools.list(toolgroup_id=toolgroup_id)
|
|
84
|
-
grouped_tools[toolgroup_id] = [tool.name for tool in tools]
|
|
85
|
-
total_tools += len(tools)
|
|
86
|
-
|
|
87
|
-
st.markdown(f"Active Tools: 🛠 {total_tools}")
|
|
88
|
-
|
|
89
|
-
for group_id, tools in grouped_tools.items():
|
|
90
|
-
with st.expander(f"🔧 Tools from `{group_id}`"):
|
|
91
|
-
for idx, tool in enumerate(tools, start=1):
|
|
92
|
-
st.markdown(f"{idx}. `{tool.split(':')[-1]}`")
|
|
93
|
-
|
|
94
|
-
st.subheader("Agent Configurations")
|
|
95
|
-
st.subheader("Agent Type")
|
|
96
|
-
agent_type = st.radio(
|
|
97
|
-
label="Select Agent Type",
|
|
98
|
-
options=["Regular", "ReAct"],
|
|
99
|
-
on_change=reset_agent,
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
if agent_type == "ReAct":
|
|
103
|
-
agent_type = AgentType.REACT
|
|
104
|
-
else:
|
|
105
|
-
agent_type = AgentType.REGULAR
|
|
106
|
-
|
|
107
|
-
max_tokens = st.slider(
|
|
108
|
-
"Max Tokens",
|
|
109
|
-
min_value=0,
|
|
110
|
-
max_value=4096,
|
|
111
|
-
value=512,
|
|
112
|
-
step=64,
|
|
113
|
-
help="The maximum number of tokens to generate",
|
|
114
|
-
on_change=reset_agent,
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
for i, tool_name in enumerate(toolgroup_selection):
|
|
118
|
-
if tool_name == "builtin::rag":
|
|
119
|
-
tool_dict = dict(
|
|
120
|
-
name="builtin::rag",
|
|
121
|
-
args={
|
|
122
|
-
"vector_store_ids": list(selected_vector_stores),
|
|
123
|
-
},
|
|
124
|
-
)
|
|
125
|
-
toolgroup_selection[i] = tool_dict
|
|
126
|
-
|
|
127
|
-
@st.cache_resource
|
|
128
|
-
def create_agent():
|
|
129
|
-
if "agent_type" in st.session_state and st.session_state.agent_type == AgentType.REACT:
|
|
130
|
-
return ReActAgent(
|
|
131
|
-
client=client,
|
|
132
|
-
model=model,
|
|
133
|
-
tools=toolgroup_selection,
|
|
134
|
-
response_format={
|
|
135
|
-
"type": "json_schema",
|
|
136
|
-
"json_schema": ReActOutput.model_json_schema(),
|
|
137
|
-
},
|
|
138
|
-
sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
|
|
139
|
-
)
|
|
140
|
-
else:
|
|
141
|
-
return Agent(
|
|
142
|
-
client,
|
|
143
|
-
model=model,
|
|
144
|
-
instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
|
|
145
|
-
tools=toolgroup_selection,
|
|
146
|
-
sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
st.session_state.agent_type = agent_type
|
|
150
|
-
|
|
151
|
-
agent = create_agent()
|
|
152
|
-
|
|
153
|
-
if "agent_session_id" not in st.session_state:
|
|
154
|
-
st.session_state["agent_session_id"] = agent.create_session(session_name=f"tool_demo_{uuid.uuid4()}")
|
|
155
|
-
|
|
156
|
-
session_id = st.session_state["agent_session_id"]
|
|
157
|
-
|
|
158
|
-
if "messages" not in st.session_state:
|
|
159
|
-
st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
|
|
160
|
-
|
|
161
|
-
for msg in st.session_state.messages:
|
|
162
|
-
with st.chat_message(msg["role"]):
|
|
163
|
-
st.markdown(msg["content"])
|
|
164
|
-
|
|
165
|
-
if prompt := st.chat_input(placeholder=""):
|
|
166
|
-
with st.chat_message("user"):
|
|
167
|
-
st.markdown(prompt)
|
|
168
|
-
|
|
169
|
-
st.session_state.messages.append({"role": "user", "content": prompt})
|
|
170
|
-
|
|
171
|
-
turn_response = agent.create_turn(
|
|
172
|
-
session_id=session_id,
|
|
173
|
-
messages=[{"role": "user", "content": prompt}],
|
|
174
|
-
stream=True,
|
|
175
|
-
)
|
|
176
|
-
|
|
177
|
-
def response_generator(turn_response):
|
|
178
|
-
if st.session_state.get("agent_type") == AgentType.REACT:
|
|
179
|
-
return _handle_react_response(turn_response)
|
|
180
|
-
else:
|
|
181
|
-
return _handle_regular_response(turn_response)
|
|
182
|
-
|
|
183
|
-
def _handle_react_response(turn_response):
|
|
184
|
-
current_step_content = ""
|
|
185
|
-
final_answer = None
|
|
186
|
-
tool_results = []
|
|
187
|
-
|
|
188
|
-
for response in turn_response:
|
|
189
|
-
if not hasattr(response.event, "payload"):
|
|
190
|
-
yield (
|
|
191
|
-
"\n\n🚨 :red[_Llama Stack server Error:_]\n"
|
|
192
|
-
"The response received is missing an expected `payload` attribute.\n"
|
|
193
|
-
"This could indicate a malformed response or an internal issue within the server.\n\n"
|
|
194
|
-
f"Error details: {response}"
|
|
195
|
-
)
|
|
196
|
-
return
|
|
197
|
-
|
|
198
|
-
payload = response.event.payload
|
|
199
|
-
|
|
200
|
-
if payload.event_type == "step_progress" and hasattr(payload.delta, "text"):
|
|
201
|
-
current_step_content += payload.delta.text
|
|
202
|
-
continue
|
|
203
|
-
|
|
204
|
-
if payload.event_type == "step_complete":
|
|
205
|
-
step_details = payload.step_details
|
|
206
|
-
|
|
207
|
-
if step_details.step_type == "inference":
|
|
208
|
-
yield from _process_inference_step(current_step_content, tool_results, final_answer)
|
|
209
|
-
current_step_content = ""
|
|
210
|
-
elif step_details.step_type == "tool_execution":
|
|
211
|
-
tool_results = _process_tool_execution(step_details, tool_results)
|
|
212
|
-
current_step_content = ""
|
|
213
|
-
else:
|
|
214
|
-
current_step_content = ""
|
|
215
|
-
|
|
216
|
-
if not final_answer and tool_results:
|
|
217
|
-
yield from _format_tool_results_summary(tool_results)
|
|
218
|
-
|
|
219
|
-
def _process_inference_step(current_step_content, tool_results, final_answer):
|
|
220
|
-
try:
|
|
221
|
-
react_output_data = json.loads(current_step_content)
|
|
222
|
-
thought = react_output_data.get("thought")
|
|
223
|
-
action = react_output_data.get("action")
|
|
224
|
-
answer = react_output_data.get("answer")
|
|
225
|
-
|
|
226
|
-
if answer and answer != "null" and answer is not None:
|
|
227
|
-
final_answer = answer
|
|
228
|
-
|
|
229
|
-
if thought:
|
|
230
|
-
with st.expander("🤔 Thinking...", expanded=False):
|
|
231
|
-
st.markdown(f":grey[__{thought}__]")
|
|
232
|
-
|
|
233
|
-
if action and isinstance(action, dict):
|
|
234
|
-
tool_name = action.get("tool_name")
|
|
235
|
-
tool_params = action.get("tool_params")
|
|
236
|
-
with st.expander(f'🛠 Action: Using tool "{tool_name}"', expanded=False):
|
|
237
|
-
st.json(tool_params)
|
|
238
|
-
|
|
239
|
-
if answer and answer != "null" and answer is not None:
|
|
240
|
-
yield f"\n\n✅ **Final Answer:**\n{answer}"
|
|
241
|
-
|
|
242
|
-
except json.JSONDecodeError:
|
|
243
|
-
yield f"\n\nFailed to parse ReAct step content:\n```json\n{current_step_content}\n```"
|
|
244
|
-
except Exception as e:
|
|
245
|
-
yield f"\n\nFailed to process ReAct step: {e}\n```json\n{current_step_content}\n```"
|
|
246
|
-
|
|
247
|
-
return final_answer
|
|
248
|
-
|
|
249
|
-
def _process_tool_execution(step_details, tool_results):
|
|
250
|
-
try:
|
|
251
|
-
if hasattr(step_details, "tool_responses") and step_details.tool_responses:
|
|
252
|
-
for tool_response in step_details.tool_responses:
|
|
253
|
-
tool_name = tool_response.tool_name
|
|
254
|
-
content = tool_response.content
|
|
255
|
-
tool_results.append((tool_name, content))
|
|
256
|
-
with st.expander(f'⚙️ Observation (Result from "{tool_name}")', expanded=False):
|
|
257
|
-
try:
|
|
258
|
-
parsed_content = json.loads(content)
|
|
259
|
-
st.json(parsed_content)
|
|
260
|
-
except json.JSONDecodeError:
|
|
261
|
-
st.code(content, language=None)
|
|
262
|
-
else:
|
|
263
|
-
with st.expander("⚙️ Observation", expanded=False):
|
|
264
|
-
st.markdown(":grey[_Tool execution step completed, but no response data found._]")
|
|
265
|
-
except Exception as e:
|
|
266
|
-
with st.expander("⚙️ Error in Tool Execution", expanded=False):
|
|
267
|
-
st.markdown(f":red[_Error processing tool execution: {str(e)}_]")
|
|
268
|
-
|
|
269
|
-
return tool_results
|
|
270
|
-
|
|
271
|
-
def _format_tool_results_summary(tool_results):
|
|
272
|
-
yield "\n\n**Here's what I found:**\n"
|
|
273
|
-
for tool_name, content in tool_results:
|
|
274
|
-
try:
|
|
275
|
-
parsed_content = json.loads(content)
|
|
276
|
-
|
|
277
|
-
if tool_name == "web_search" and "top_k" in parsed_content:
|
|
278
|
-
yield from _format_web_search_results(parsed_content)
|
|
279
|
-
elif "results" in parsed_content and isinstance(parsed_content["results"], list):
|
|
280
|
-
yield from _format_results_list(parsed_content["results"])
|
|
281
|
-
elif isinstance(parsed_content, dict) and len(parsed_content) > 0:
|
|
282
|
-
yield from _format_dict_results(parsed_content)
|
|
283
|
-
elif isinstance(parsed_content, list) and len(parsed_content) > 0:
|
|
284
|
-
yield from _format_list_results(parsed_content)
|
|
285
|
-
except json.JSONDecodeError:
|
|
286
|
-
yield f"\n**{tool_name}** was used but returned complex data. Check the observation for details.\n"
|
|
287
|
-
except (TypeError, AttributeError, KeyError, IndexError) as e:
|
|
288
|
-
print(f"Error processing {tool_name} result: {type(e).__name__}: {e}")
|
|
289
|
-
|
|
290
|
-
def _format_web_search_results(parsed_content):
|
|
291
|
-
for i, result in enumerate(parsed_content["top_k"], 1):
|
|
292
|
-
if i <= 3:
|
|
293
|
-
title = result.get("title", "Untitled")
|
|
294
|
-
url = result.get("url", "")
|
|
295
|
-
content_text = result.get("content", "").strip()
|
|
296
|
-
yield f"\n- **{title}**\n {content_text}\n [Source]({url})\n"
|
|
297
|
-
|
|
298
|
-
def _format_results_list(results):
|
|
299
|
-
for i, result in enumerate(results, 1):
|
|
300
|
-
if i <= 3:
|
|
301
|
-
if isinstance(result, dict):
|
|
302
|
-
name = result.get("name", result.get("title", "Result " + str(i)))
|
|
303
|
-
description = result.get("description", result.get("content", result.get("summary", "")))
|
|
304
|
-
yield f"\n- **{name}**\n {description}\n"
|
|
305
|
-
else:
|
|
306
|
-
yield f"\n- {result}\n"
|
|
307
|
-
|
|
308
|
-
def _format_dict_results(parsed_content):
|
|
309
|
-
yield "\n```\n"
|
|
310
|
-
for key, value in list(parsed_content.items())[:5]:
|
|
311
|
-
if isinstance(value, str) and len(value) < 100:
|
|
312
|
-
yield f"{key}: {value}\n"
|
|
313
|
-
else:
|
|
314
|
-
yield f"{key}: [Complex data]\n"
|
|
315
|
-
yield "```\n"
|
|
316
|
-
|
|
317
|
-
def _format_list_results(parsed_content):
|
|
318
|
-
yield "\n"
|
|
319
|
-
for _, item in enumerate(parsed_content[:3], 1):
|
|
320
|
-
if isinstance(item, str):
|
|
321
|
-
yield f"- {item}\n"
|
|
322
|
-
elif isinstance(item, dict) and "text" in item:
|
|
323
|
-
yield f"- {item['text']}\n"
|
|
324
|
-
elif isinstance(item, dict) and len(item) > 0:
|
|
325
|
-
first_value = next(iter(item.values()))
|
|
326
|
-
if isinstance(first_value, str) and len(first_value) < 100:
|
|
327
|
-
yield f"- {first_value}\n"
|
|
328
|
-
|
|
329
|
-
def _handle_regular_response(turn_response):
|
|
330
|
-
for response in turn_response:
|
|
331
|
-
if hasattr(response.event, "payload"):
|
|
332
|
-
print(response.event.payload)
|
|
333
|
-
if response.event.payload.event_type == "step_progress":
|
|
334
|
-
if hasattr(response.event.payload.delta, "text"):
|
|
335
|
-
yield response.event.payload.delta.text
|
|
336
|
-
if response.event.payload.event_type == "step_complete":
|
|
337
|
-
if response.event.payload.step_details.step_type == "tool_execution":
|
|
338
|
-
if response.event.payload.step_details.tool_calls:
|
|
339
|
-
tool_name = str(response.event.payload.step_details.tool_calls[0].tool_name)
|
|
340
|
-
yield f'\n\n🛠 :grey[_Using "{tool_name}" tool:_]\n\n'
|
|
341
|
-
else:
|
|
342
|
-
yield "No tool_calls present in step_details"
|
|
343
|
-
else:
|
|
344
|
-
yield f"Error occurred in the Llama Stack Cluster: {response}"
|
|
345
|
-
|
|
346
|
-
with st.chat_message("assistant"):
|
|
347
|
-
response_content = st.write_stream(response_generator(turn_response))
|
|
348
|
-
|
|
349
|
-
st.session_state.messages.append({"role": "assistant", "content": response_content})
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
tool_chat_page()
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
version: 2
|
|
2
|
-
distribution_spec:
|
|
3
|
-
description: Dell's distribution of Llama Stack. TGI inference via Dell's custom
|
|
4
|
-
container
|
|
5
|
-
providers:
|
|
6
|
-
inference:
|
|
7
|
-
- provider_type: remote::tgi
|
|
8
|
-
- provider_type: inline::sentence-transformers
|
|
9
|
-
vector_io:
|
|
10
|
-
- provider_type: inline::faiss
|
|
11
|
-
- provider_type: remote::chromadb
|
|
12
|
-
- provider_type: remote::pgvector
|
|
13
|
-
safety:
|
|
14
|
-
- provider_type: inline::llama-guard
|
|
15
|
-
agents:
|
|
16
|
-
- provider_type: inline::meta-reference
|
|
17
|
-
eval:
|
|
18
|
-
- provider_type: inline::meta-reference
|
|
19
|
-
datasetio:
|
|
20
|
-
- provider_type: remote::huggingface
|
|
21
|
-
- provider_type: inline::localfs
|
|
22
|
-
scoring:
|
|
23
|
-
- provider_type: inline::basic
|
|
24
|
-
- provider_type: inline::llm-as-judge
|
|
25
|
-
- provider_type: inline::braintrust
|
|
26
|
-
tool_runtime:
|
|
27
|
-
- provider_type: remote::brave-search
|
|
28
|
-
- provider_type: remote::tavily-search
|
|
29
|
-
- provider_type: inline::rag-runtime
|
|
30
|
-
image_type: venv
|
|
31
|
-
additional_pip_packages:
|
|
32
|
-
- aiosqlite
|
|
33
|
-
- sqlalchemy[asyncio]
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
version: 2
|
|
2
|
-
distribution_spec:
|
|
3
|
-
description: Use Meta Reference for running LLM inference
|
|
4
|
-
providers:
|
|
5
|
-
inference:
|
|
6
|
-
- provider_type: inline::meta-reference
|
|
7
|
-
vector_io:
|
|
8
|
-
- provider_type: inline::faiss
|
|
9
|
-
- provider_type: remote::chromadb
|
|
10
|
-
- provider_type: remote::pgvector
|
|
11
|
-
safety:
|
|
12
|
-
- provider_type: inline::llama-guard
|
|
13
|
-
agents:
|
|
14
|
-
- provider_type: inline::meta-reference
|
|
15
|
-
eval:
|
|
16
|
-
- provider_type: inline::meta-reference
|
|
17
|
-
datasetio:
|
|
18
|
-
- provider_type: remote::huggingface
|
|
19
|
-
- provider_type: inline::localfs
|
|
20
|
-
scoring:
|
|
21
|
-
- provider_type: inline::basic
|
|
22
|
-
- provider_type: inline::llm-as-judge
|
|
23
|
-
- provider_type: inline::braintrust
|
|
24
|
-
tool_runtime:
|
|
25
|
-
- provider_type: remote::brave-search
|
|
26
|
-
- provider_type: remote::tavily-search
|
|
27
|
-
- provider_type: inline::rag-runtime
|
|
28
|
-
- provider_type: remote::model-context-protocol
|
|
29
|
-
image_type: venv
|
|
30
|
-
additional_pip_packages:
|
|
31
|
-
- aiosqlite
|
|
32
|
-
- sqlalchemy[asyncio]
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
version: 2
|
|
2
|
-
distribution_spec:
|
|
3
|
-
description: Use NVIDIA NIM for running LLM inference, evaluation and safety
|
|
4
|
-
providers:
|
|
5
|
-
inference:
|
|
6
|
-
- provider_type: remote::nvidia
|
|
7
|
-
vector_io:
|
|
8
|
-
- provider_type: inline::faiss
|
|
9
|
-
safety:
|
|
10
|
-
- provider_type: remote::nvidia
|
|
11
|
-
agents:
|
|
12
|
-
- provider_type: inline::meta-reference
|
|
13
|
-
eval:
|
|
14
|
-
- provider_type: remote::nvidia
|
|
15
|
-
post_training:
|
|
16
|
-
- provider_type: remote::nvidia
|
|
17
|
-
datasetio:
|
|
18
|
-
- provider_type: inline::localfs
|
|
19
|
-
- provider_type: remote::nvidia
|
|
20
|
-
scoring:
|
|
21
|
-
- provider_type: inline::basic
|
|
22
|
-
tool_runtime:
|
|
23
|
-
- provider_type: inline::rag-runtime
|
|
24
|
-
files:
|
|
25
|
-
- provider_type: inline::localfs
|
|
26
|
-
image_type: venv
|
|
27
|
-
additional_pip_packages:
|
|
28
|
-
- aiosqlite
|
|
29
|
-
- sqlalchemy[asyncio]
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
version: 2
|
|
2
|
-
distribution_spec:
|
|
3
|
-
description: Distribution for running open benchmarks
|
|
4
|
-
providers:
|
|
5
|
-
inference:
|
|
6
|
-
- provider_type: remote::openai
|
|
7
|
-
- provider_type: remote::anthropic
|
|
8
|
-
- provider_type: remote::gemini
|
|
9
|
-
- provider_type: remote::groq
|
|
10
|
-
- provider_type: remote::together
|
|
11
|
-
vector_io:
|
|
12
|
-
- provider_type: inline::sqlite-vec
|
|
13
|
-
- provider_type: remote::chromadb
|
|
14
|
-
- provider_type: remote::pgvector
|
|
15
|
-
safety:
|
|
16
|
-
- provider_type: inline::llama-guard
|
|
17
|
-
agents:
|
|
18
|
-
- provider_type: inline::meta-reference
|
|
19
|
-
eval:
|
|
20
|
-
- provider_type: inline::meta-reference
|
|
21
|
-
datasetio:
|
|
22
|
-
- provider_type: remote::huggingface
|
|
23
|
-
- provider_type: inline::localfs
|
|
24
|
-
scoring:
|
|
25
|
-
- provider_type: inline::basic
|
|
26
|
-
- provider_type: inline::llm-as-judge
|
|
27
|
-
- provider_type: inline::braintrust
|
|
28
|
-
tool_runtime:
|
|
29
|
-
- provider_type: remote::brave-search
|
|
30
|
-
- provider_type: remote::tavily-search
|
|
31
|
-
- provider_type: inline::rag-runtime
|
|
32
|
-
- provider_type: remote::model-context-protocol
|
|
33
|
-
image_type: venv
|
|
34
|
-
additional_pip_packages:
|
|
35
|
-
- aiosqlite
|
|
36
|
-
- sqlalchemy[asyncio]
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
from .postgres_demo import get_distribution_template # noqa: F401
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
version: 2
|
|
2
|
-
distribution_spec:
|
|
3
|
-
description: Quick start template for running Llama Stack with several popular providers
|
|
4
|
-
providers:
|
|
5
|
-
inference:
|
|
6
|
-
- provider_type: remote::vllm
|
|
7
|
-
- provider_type: inline::sentence-transformers
|
|
8
|
-
vector_io:
|
|
9
|
-
- provider_type: remote::chromadb
|
|
10
|
-
safety:
|
|
11
|
-
- provider_type: inline::llama-guard
|
|
12
|
-
agents:
|
|
13
|
-
- provider_type: inline::meta-reference
|
|
14
|
-
tool_runtime:
|
|
15
|
-
- provider_type: remote::brave-search
|
|
16
|
-
- provider_type: remote::tavily-search
|
|
17
|
-
- provider_type: inline::rag-runtime
|
|
18
|
-
- provider_type: remote::model-context-protocol
|
|
19
|
-
image_type: venv
|
|
20
|
-
additional_pip_packages:
|
|
21
|
-
- asyncpg
|
|
22
|
-
- psycopg2-binary
|
|
23
|
-
- sqlalchemy[asyncio]
|