llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import os
|
|
8
|
-
|
|
9
|
-
from llama_stack_client import LlamaStackClient
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class LlamaStackApi:
|
|
13
|
-
def __init__(self):
|
|
14
|
-
self.client = LlamaStackClient(
|
|
15
|
-
base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:8321"),
|
|
16
|
-
provider_data={
|
|
17
|
-
"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""),
|
|
18
|
-
"together_api_key": os.environ.get("TOGETHER_API_KEY", ""),
|
|
19
|
-
"sambanova_api_key": os.environ.get("SAMBANOVA_API_KEY", ""),
|
|
20
|
-
"openai_api_key": os.environ.get("OPENAI_API_KEY", ""),
|
|
21
|
-
"tavily_search_api_key": os.environ.get("TAVILY_SEARCH_API_KEY", ""),
|
|
22
|
-
},
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
|
|
26
|
-
"""Run scoring on a single row"""
|
|
27
|
-
if not scoring_params:
|
|
28
|
-
scoring_params = dict.fromkeys(scoring_function_ids)
|
|
29
|
-
return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
llama_stack_api = LlamaStackApi()
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import base64
|
|
8
|
-
import os
|
|
9
|
-
|
|
10
|
-
import pandas as pd
|
|
11
|
-
import streamlit as st
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def process_dataset(file):
|
|
15
|
-
if file is None:
|
|
16
|
-
return "No file uploaded", None
|
|
17
|
-
|
|
18
|
-
try:
|
|
19
|
-
# Determine file type and read accordingly
|
|
20
|
-
file_ext = os.path.splitext(file.name)[1].lower()
|
|
21
|
-
if file_ext == ".csv":
|
|
22
|
-
df = pd.read_csv(file)
|
|
23
|
-
elif file_ext in [".xlsx", ".xls"]:
|
|
24
|
-
df = pd.read_excel(file)
|
|
25
|
-
else:
|
|
26
|
-
return "Unsupported file format. Please upload a CSV or Excel file.", None
|
|
27
|
-
|
|
28
|
-
return df
|
|
29
|
-
|
|
30
|
-
except Exception as e:
|
|
31
|
-
st.error(f"Error processing file: {str(e)}")
|
|
32
|
-
return None
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def data_url_from_file(file) -> str:
|
|
36
|
-
file_content = file.getvalue()
|
|
37
|
-
base64_content = base64.b64encode(file_content).decode("utf-8")
|
|
38
|
-
mime_type = file.type
|
|
39
|
-
|
|
40
|
-
data_url = f"data:{mime_type};base64,{base64_content}"
|
|
41
|
-
|
|
42
|
-
return data_url
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import streamlit as st
|
|
8
|
-
|
|
9
|
-
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def datasets():
|
|
13
|
-
st.header("Datasets")
|
|
14
|
-
|
|
15
|
-
datasets_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()}
|
|
16
|
-
if len(datasets_info) > 0:
|
|
17
|
-
selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
|
|
18
|
-
st.json(datasets_info[selected_dataset], expanded=True)
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import streamlit as st
|
|
8
|
-
|
|
9
|
-
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def benchmarks():
|
|
13
|
-
# Benchmarks Section
|
|
14
|
-
st.header("Benchmarks")
|
|
15
|
-
|
|
16
|
-
benchmarks_info = {d.identifier: d.to_dict() for d in llama_stack_api.client.benchmarks.list()}
|
|
17
|
-
|
|
18
|
-
if len(benchmarks_info) > 0:
|
|
19
|
-
selected_benchmark = st.selectbox("Select an eval task", list(benchmarks_info.keys()), key="benchmark_inspect")
|
|
20
|
-
st.json(benchmarks_info[selected_benchmark], expanded=True)
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import streamlit as st
|
|
8
|
-
|
|
9
|
-
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def models():
|
|
13
|
-
# Models Section
|
|
14
|
-
st.header("Models")
|
|
15
|
-
models_info = {m.identifier: m.to_dict() for m in llama_stack_api.client.models.list()}
|
|
16
|
-
|
|
17
|
-
selected_model = st.selectbox("Select a model", list(models_info.keys()))
|
|
18
|
-
st.json(models_info[selected_model])
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import streamlit as st
|
|
8
|
-
|
|
9
|
-
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def providers():
|
|
13
|
-
st.header("🔍 API Providers")
|
|
14
|
-
apis_providers_lst = llama_stack_api.client.providers.list()
|
|
15
|
-
api_to_providers = {}
|
|
16
|
-
for api_provider in apis_providers_lst:
|
|
17
|
-
if api_provider.api in api_to_providers:
|
|
18
|
-
api_to_providers[api_provider.api].append(api_provider)
|
|
19
|
-
else:
|
|
20
|
-
api_to_providers[api_provider.api] = [api_provider]
|
|
21
|
-
|
|
22
|
-
for api in api_to_providers.keys():
|
|
23
|
-
st.markdown(f"###### {api}")
|
|
24
|
-
st.dataframe([x.to_dict() for x in api_to_providers[api]], width=500)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
providers()
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
from streamlit_option_menu import option_menu
|
|
8
|
-
|
|
9
|
-
from llama_stack.core.ui.page.distribution.datasets import datasets
|
|
10
|
-
from llama_stack.core.ui.page.distribution.eval_tasks import benchmarks
|
|
11
|
-
from llama_stack.core.ui.page.distribution.models import models
|
|
12
|
-
from llama_stack.core.ui.page.distribution.scoring_functions import scoring_functions
|
|
13
|
-
from llama_stack.core.ui.page.distribution.shields import shields
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def resources_page():
|
|
17
|
-
options = [
|
|
18
|
-
"Models",
|
|
19
|
-
"Shields",
|
|
20
|
-
"Scoring Functions",
|
|
21
|
-
"Datasets",
|
|
22
|
-
"Benchmarks",
|
|
23
|
-
]
|
|
24
|
-
icons = ["magic", "shield", "file-bar-graph", "database", "list-task"]
|
|
25
|
-
selected_resource = option_menu(
|
|
26
|
-
None,
|
|
27
|
-
options,
|
|
28
|
-
icons=icons,
|
|
29
|
-
orientation="horizontal",
|
|
30
|
-
styles={
|
|
31
|
-
"nav-link": {
|
|
32
|
-
"font-size": "12px",
|
|
33
|
-
},
|
|
34
|
-
},
|
|
35
|
-
)
|
|
36
|
-
if selected_resource == "Benchmarks":
|
|
37
|
-
benchmarks()
|
|
38
|
-
elif selected_resource == "Datasets":
|
|
39
|
-
datasets()
|
|
40
|
-
elif selected_resource == "Models":
|
|
41
|
-
models()
|
|
42
|
-
elif selected_resource == "Scoring Functions":
|
|
43
|
-
scoring_functions()
|
|
44
|
-
elif selected_resource == "Shields":
|
|
45
|
-
shields()
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
resources_page()
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import streamlit as st
|
|
8
|
-
|
|
9
|
-
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def scoring_functions():
|
|
13
|
-
st.header("Scoring Functions")
|
|
14
|
-
|
|
15
|
-
scoring_functions_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.scoring_functions.list()}
|
|
16
|
-
|
|
17
|
-
selected_scoring_function = st.selectbox("Select a scoring function", list(scoring_functions_info.keys()))
|
|
18
|
-
st.json(scoring_functions_info[selected_scoring_function], expanded=True)
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import streamlit as st
|
|
8
|
-
|
|
9
|
-
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def shields():
|
|
13
|
-
# Shields Section
|
|
14
|
-
st.header("Shields")
|
|
15
|
-
|
|
16
|
-
shields_info = {s.identifier: s.to_dict() for s in llama_stack_api.client.shields.list()}
|
|
17
|
-
|
|
18
|
-
selected_shield = st.selectbox("Select a shield", list(shields_info.keys()))
|
|
19
|
-
st.json(shields_info[selected_shield])
|
|
@@ -1,143 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import json
|
|
8
|
-
|
|
9
|
-
import pandas as pd
|
|
10
|
-
import streamlit as st
|
|
11
|
-
|
|
12
|
-
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
13
|
-
from llama_stack.core.ui.modules.utils import process_dataset
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def application_evaluation_page():
|
|
17
|
-
st.set_page_config(page_title="Evaluations (Scoring)", page_icon="🦙")
|
|
18
|
-
st.title("📊 Evaluations (Scoring)")
|
|
19
|
-
|
|
20
|
-
# File uploader
|
|
21
|
-
uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"])
|
|
22
|
-
|
|
23
|
-
if uploaded_file is None:
|
|
24
|
-
st.error("No file uploaded")
|
|
25
|
-
return
|
|
26
|
-
|
|
27
|
-
# Process uploaded file
|
|
28
|
-
df = process_dataset(uploaded_file)
|
|
29
|
-
if df is None:
|
|
30
|
-
st.error("Error processing file")
|
|
31
|
-
return
|
|
32
|
-
|
|
33
|
-
# Display dataset information
|
|
34
|
-
st.success("Dataset loaded successfully!")
|
|
35
|
-
|
|
36
|
-
# Display dataframe preview
|
|
37
|
-
st.subheader("Dataset Preview")
|
|
38
|
-
st.dataframe(df)
|
|
39
|
-
|
|
40
|
-
# Select Scoring Functions to Run Evaluation On
|
|
41
|
-
st.subheader("Select Scoring Functions")
|
|
42
|
-
scoring_functions = llama_stack_api.client.scoring_functions.list()
|
|
43
|
-
scoring_functions = {sf.identifier: sf for sf in scoring_functions}
|
|
44
|
-
scoring_functions_names = list(scoring_functions.keys())
|
|
45
|
-
selected_scoring_functions = st.multiselect(
|
|
46
|
-
"Choose one or more scoring functions",
|
|
47
|
-
options=scoring_functions_names,
|
|
48
|
-
help="Choose one or more scoring functions.",
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
available_models = llama_stack_api.client.models.list()
|
|
52
|
-
available_models = [m.identifier for m in available_models]
|
|
53
|
-
|
|
54
|
-
scoring_params = {}
|
|
55
|
-
if selected_scoring_functions:
|
|
56
|
-
st.write("Selected:")
|
|
57
|
-
for scoring_fn_id in selected_scoring_functions:
|
|
58
|
-
scoring_fn = scoring_functions[scoring_fn_id]
|
|
59
|
-
st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}")
|
|
60
|
-
new_params = None
|
|
61
|
-
if scoring_fn.params:
|
|
62
|
-
new_params = {}
|
|
63
|
-
for param_name, param_value in scoring_fn.params.to_dict().items():
|
|
64
|
-
if param_name == "type":
|
|
65
|
-
new_params[param_name] = param_value
|
|
66
|
-
continue
|
|
67
|
-
|
|
68
|
-
if param_name == "judge_model":
|
|
69
|
-
value = st.selectbox(
|
|
70
|
-
f"Select **{param_name}** for {scoring_fn_id}",
|
|
71
|
-
options=available_models,
|
|
72
|
-
index=0,
|
|
73
|
-
key=f"{scoring_fn_id}_{param_name}",
|
|
74
|
-
)
|
|
75
|
-
new_params[param_name] = value
|
|
76
|
-
else:
|
|
77
|
-
value = st.text_area(
|
|
78
|
-
f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format",
|
|
79
|
-
value=json.dumps(param_value, indent=2),
|
|
80
|
-
height=80,
|
|
81
|
-
)
|
|
82
|
-
try:
|
|
83
|
-
new_params[param_name] = json.loads(value)
|
|
84
|
-
except json.JSONDecodeError:
|
|
85
|
-
st.error(f"Invalid JSON for **{param_name}** in {scoring_fn_id}")
|
|
86
|
-
|
|
87
|
-
st.json(new_params)
|
|
88
|
-
scoring_params[scoring_fn_id] = new_params
|
|
89
|
-
|
|
90
|
-
# Add run evaluation button & slider
|
|
91
|
-
total_rows = len(df)
|
|
92
|
-
num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows)
|
|
93
|
-
|
|
94
|
-
if st.button("Run Evaluation"):
|
|
95
|
-
progress_text = "Running evaluation..."
|
|
96
|
-
progress_bar = st.progress(0, text=progress_text)
|
|
97
|
-
rows = df.to_dict(orient="records")
|
|
98
|
-
if num_rows < total_rows:
|
|
99
|
-
rows = rows[:num_rows]
|
|
100
|
-
|
|
101
|
-
# Create separate containers for progress text and results
|
|
102
|
-
progress_text_container = st.empty()
|
|
103
|
-
results_container = st.empty()
|
|
104
|
-
output_res = {}
|
|
105
|
-
for i, r in enumerate(rows):
|
|
106
|
-
# Update progress
|
|
107
|
-
progress = i / len(rows)
|
|
108
|
-
progress_bar.progress(progress, text=progress_text)
|
|
109
|
-
|
|
110
|
-
# Run evaluation for current row
|
|
111
|
-
score_res = llama_stack_api.run_scoring(
|
|
112
|
-
r,
|
|
113
|
-
scoring_function_ids=selected_scoring_functions,
|
|
114
|
-
scoring_params=scoring_params,
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
for k in r.keys():
|
|
118
|
-
if k not in output_res:
|
|
119
|
-
output_res[k] = []
|
|
120
|
-
output_res[k].append(r[k])
|
|
121
|
-
|
|
122
|
-
for fn_id in selected_scoring_functions:
|
|
123
|
-
if fn_id not in output_res:
|
|
124
|
-
output_res[fn_id] = []
|
|
125
|
-
output_res[fn_id].append(score_res.results[fn_id].score_rows[0])
|
|
126
|
-
|
|
127
|
-
# Display current row results using separate containers
|
|
128
|
-
progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})")
|
|
129
|
-
results_container.json(
|
|
130
|
-
score_res.to_json(),
|
|
131
|
-
expanded=2,
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
progress_bar.progress(1.0, text="Evaluation complete!")
|
|
135
|
-
|
|
136
|
-
# Display results in dataframe
|
|
137
|
-
if output_res:
|
|
138
|
-
output_df = pd.DataFrame(output_res)
|
|
139
|
-
st.subheader("Evaluation Results")
|
|
140
|
-
st.dataframe(output_df)
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
application_evaluation_page()
|
|
@@ -1,253 +0,0 @@
|
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
-
# the root directory of this source tree.
|
|
6
|
-
|
|
7
|
-
import json
|
|
8
|
-
|
|
9
|
-
import pandas as pd
|
|
10
|
-
import streamlit as st
|
|
11
|
-
|
|
12
|
-
from llama_stack.core.ui.modules.api import llama_stack_api
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def select_benchmark_1():
|
|
16
|
-
# Select Benchmarks
|
|
17
|
-
st.subheader("1. Choose An Eval Task")
|
|
18
|
-
benchmarks = llama_stack_api.client.benchmarks.list()
|
|
19
|
-
benchmarks = {et.identifier: et for et in benchmarks}
|
|
20
|
-
benchmarks_names = list(benchmarks.keys())
|
|
21
|
-
selected_benchmark = st.selectbox(
|
|
22
|
-
"Choose an eval task.",
|
|
23
|
-
options=benchmarks_names,
|
|
24
|
-
help="Choose an eval task. Each eval task is parameterized by a dataset, and list of scoring functions.",
|
|
25
|
-
)
|
|
26
|
-
with st.expander("View Eval Task"):
|
|
27
|
-
st.json(benchmarks[selected_benchmark], expanded=True)
|
|
28
|
-
|
|
29
|
-
st.session_state["selected_benchmark"] = selected_benchmark
|
|
30
|
-
st.session_state["benchmarks"] = benchmarks
|
|
31
|
-
if st.button("Confirm", key="confirm_1"):
|
|
32
|
-
st.session_state["selected_benchmark_1_next"] = True
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def define_eval_candidate_2():
|
|
36
|
-
if not st.session_state.get("selected_benchmark_1_next", None):
|
|
37
|
-
return
|
|
38
|
-
|
|
39
|
-
st.subheader("2. Define Eval Candidate")
|
|
40
|
-
st.info(
|
|
41
|
-
"""
|
|
42
|
-
Define the configurations for the evaluation candidate model or agent used for generation.
|
|
43
|
-
Select "model" if you want to run generation with inference API, or "agent" if you want to run generation with agent API through specifying AgentConfig.
|
|
44
|
-
"""
|
|
45
|
-
)
|
|
46
|
-
with st.expander("Define Eval Candidate", expanded=True):
|
|
47
|
-
# Define Eval Candidate
|
|
48
|
-
candidate_type = st.radio("Candidate Type", ["model", "agent"])
|
|
49
|
-
|
|
50
|
-
available_models = llama_stack_api.client.models.list()
|
|
51
|
-
available_models = [model.identifier for model in available_models]
|
|
52
|
-
selected_model = st.selectbox(
|
|
53
|
-
"Choose a model",
|
|
54
|
-
available_models,
|
|
55
|
-
index=0,
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
# Sampling Parameters
|
|
59
|
-
st.markdown("##### Sampling Parameters")
|
|
60
|
-
temperature = st.slider(
|
|
61
|
-
"Temperature",
|
|
62
|
-
min_value=0.0,
|
|
63
|
-
max_value=1.0,
|
|
64
|
-
value=0.0,
|
|
65
|
-
step=0.1,
|
|
66
|
-
help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable",
|
|
67
|
-
)
|
|
68
|
-
top_p = st.slider(
|
|
69
|
-
"Top P",
|
|
70
|
-
min_value=0.0,
|
|
71
|
-
max_value=1.0,
|
|
72
|
-
value=0.95,
|
|
73
|
-
step=0.1,
|
|
74
|
-
)
|
|
75
|
-
max_tokens = st.slider(
|
|
76
|
-
"Max Tokens",
|
|
77
|
-
min_value=0,
|
|
78
|
-
max_value=4096,
|
|
79
|
-
value=512,
|
|
80
|
-
step=1,
|
|
81
|
-
help="The maximum number of tokens to generate",
|
|
82
|
-
)
|
|
83
|
-
repetition_penalty = st.slider(
|
|
84
|
-
"Repetition Penalty",
|
|
85
|
-
min_value=1.0,
|
|
86
|
-
max_value=2.0,
|
|
87
|
-
value=1.0,
|
|
88
|
-
step=0.1,
|
|
89
|
-
help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.",
|
|
90
|
-
)
|
|
91
|
-
if candidate_type == "model":
|
|
92
|
-
if temperature > 0.0:
|
|
93
|
-
strategy = {
|
|
94
|
-
"type": "top_p",
|
|
95
|
-
"temperature": temperature,
|
|
96
|
-
"top_p": top_p,
|
|
97
|
-
}
|
|
98
|
-
else:
|
|
99
|
-
strategy = {"type": "greedy"}
|
|
100
|
-
|
|
101
|
-
eval_candidate = {
|
|
102
|
-
"type": "model",
|
|
103
|
-
"model": selected_model,
|
|
104
|
-
"sampling_params": {
|
|
105
|
-
"strategy": strategy,
|
|
106
|
-
"max_tokens": max_tokens,
|
|
107
|
-
"repetition_penalty": repetition_penalty,
|
|
108
|
-
},
|
|
109
|
-
}
|
|
110
|
-
elif candidate_type == "agent":
|
|
111
|
-
system_prompt = st.text_area(
|
|
112
|
-
"System Prompt",
|
|
113
|
-
value="You are a helpful AI assistant.",
|
|
114
|
-
help="Initial instructions given to the AI to set its behavior and context",
|
|
115
|
-
)
|
|
116
|
-
tools_json = st.text_area(
|
|
117
|
-
"Tools Configuration (JSON)",
|
|
118
|
-
value=json.dumps(
|
|
119
|
-
[
|
|
120
|
-
{
|
|
121
|
-
"type": "brave_search",
|
|
122
|
-
"engine": "brave",
|
|
123
|
-
"api_key": "ENTER_BRAVE_API_KEY_HERE",
|
|
124
|
-
}
|
|
125
|
-
]
|
|
126
|
-
),
|
|
127
|
-
help="Enter tool configurations in JSON format. Each tool should have a name, description, and parameters.",
|
|
128
|
-
height=200,
|
|
129
|
-
)
|
|
130
|
-
try:
|
|
131
|
-
tools = json.loads(tools_json)
|
|
132
|
-
except json.JSONDecodeError:
|
|
133
|
-
st.error("Invalid JSON format for tools configuration")
|
|
134
|
-
tools = []
|
|
135
|
-
eval_candidate = {
|
|
136
|
-
"type": "agent",
|
|
137
|
-
"config": {
|
|
138
|
-
"model": selected_model,
|
|
139
|
-
"instructions": system_prompt,
|
|
140
|
-
"tools": tools,
|
|
141
|
-
"tool_choice": "auto",
|
|
142
|
-
"tool_prompt_format": "json",
|
|
143
|
-
"input_shields": [],
|
|
144
|
-
"output_shields": [],
|
|
145
|
-
"enable_session_persistence": False,
|
|
146
|
-
},
|
|
147
|
-
}
|
|
148
|
-
st.session_state["eval_candidate"] = eval_candidate
|
|
149
|
-
|
|
150
|
-
if st.button("Confirm", key="confirm_2"):
|
|
151
|
-
st.session_state["selected_eval_candidate_2_next"] = True
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def run_evaluation_3():
|
|
155
|
-
if not st.session_state.get("selected_eval_candidate_2_next", None):
|
|
156
|
-
return
|
|
157
|
-
|
|
158
|
-
st.subheader("3. Run Evaluation")
|
|
159
|
-
# Add info box to explain configurations being used
|
|
160
|
-
st.info(
|
|
161
|
-
"""
|
|
162
|
-
Review the configurations that will be used for this evaluation run, make any necessary changes, and then click the "Run Evaluation" button.
|
|
163
|
-
"""
|
|
164
|
-
)
|
|
165
|
-
selected_benchmark = st.session_state["selected_benchmark"]
|
|
166
|
-
benchmarks = st.session_state["benchmarks"]
|
|
167
|
-
eval_candidate = st.session_state["eval_candidate"]
|
|
168
|
-
|
|
169
|
-
dataset_id = benchmarks[selected_benchmark].dataset_id
|
|
170
|
-
rows = llama_stack_api.client.datasets.iterrows(
|
|
171
|
-
dataset_id=dataset_id,
|
|
172
|
-
)
|
|
173
|
-
total_rows = len(rows.data)
|
|
174
|
-
# Add number of examples control
|
|
175
|
-
num_rows = st.number_input(
|
|
176
|
-
"Number of Examples to Evaluate",
|
|
177
|
-
min_value=1,
|
|
178
|
-
max_value=total_rows,
|
|
179
|
-
value=5,
|
|
180
|
-
help="Number of examples from the dataset to evaluate. ",
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
benchmark_config = {
|
|
184
|
-
"type": "benchmark",
|
|
185
|
-
"eval_candidate": eval_candidate,
|
|
186
|
-
"scoring_params": {},
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
with st.expander("View Evaluation Task", expanded=True):
|
|
190
|
-
st.json(benchmarks[selected_benchmark], expanded=True)
|
|
191
|
-
with st.expander("View Evaluation Task Configuration", expanded=True):
|
|
192
|
-
st.json(benchmark_config, expanded=True)
|
|
193
|
-
|
|
194
|
-
# Add run button and handle evaluation
|
|
195
|
-
if st.button("Run Evaluation"):
|
|
196
|
-
progress_text = "Running evaluation..."
|
|
197
|
-
progress_bar = st.progress(0, text=progress_text)
|
|
198
|
-
rows = rows.data
|
|
199
|
-
if num_rows < total_rows:
|
|
200
|
-
rows = rows[:num_rows]
|
|
201
|
-
|
|
202
|
-
# Create separate containers for progress text and results
|
|
203
|
-
progress_text_container = st.empty()
|
|
204
|
-
results_container = st.empty()
|
|
205
|
-
output_res = {}
|
|
206
|
-
for i, r in enumerate(rows):
|
|
207
|
-
# Update progress
|
|
208
|
-
progress = i / len(rows)
|
|
209
|
-
progress_bar.progress(progress, text=progress_text)
|
|
210
|
-
# Run evaluation for current row
|
|
211
|
-
eval_res = llama_stack_api.client.eval.evaluate_rows(
|
|
212
|
-
benchmark_id=selected_benchmark,
|
|
213
|
-
input_rows=[r],
|
|
214
|
-
scoring_functions=benchmarks[selected_benchmark].scoring_functions,
|
|
215
|
-
benchmark_config=benchmark_config,
|
|
216
|
-
)
|
|
217
|
-
|
|
218
|
-
for k in r.keys():
|
|
219
|
-
if k not in output_res:
|
|
220
|
-
output_res[k] = []
|
|
221
|
-
output_res[k].append(r[k])
|
|
222
|
-
|
|
223
|
-
for k in eval_res.generations[0].keys():
|
|
224
|
-
if k not in output_res:
|
|
225
|
-
output_res[k] = []
|
|
226
|
-
output_res[k].append(eval_res.generations[0][k])
|
|
227
|
-
|
|
228
|
-
for scoring_fn in benchmarks[selected_benchmark].scoring_functions:
|
|
229
|
-
if scoring_fn not in output_res:
|
|
230
|
-
output_res[scoring_fn] = []
|
|
231
|
-
output_res[scoring_fn].append(eval_res.scores[scoring_fn].score_rows[0])
|
|
232
|
-
|
|
233
|
-
progress_text_container.write(f"Expand to see current processed result ({i + 1} / {len(rows)})")
|
|
234
|
-
results_container.json(eval_res, expanded=2)
|
|
235
|
-
|
|
236
|
-
progress_bar.progress(1.0, text="Evaluation complete!")
|
|
237
|
-
# Display results in dataframe
|
|
238
|
-
if output_res:
|
|
239
|
-
output_df = pd.DataFrame(output_res)
|
|
240
|
-
st.subheader("Evaluation Results")
|
|
241
|
-
st.dataframe(output_df)
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
def native_evaluation_page():
|
|
245
|
-
st.set_page_config(page_title="Evaluations (Generation + Scoring)", page_icon="🦙")
|
|
246
|
-
st.title("📊 Evaluations (Generation + Scoring)")
|
|
247
|
-
|
|
248
|
-
select_benchmark_1()
|
|
249
|
-
define_eval_candidate_2()
|
|
250
|
-
run_evaluation_3()
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
native_evaluation_page()
|