llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""FastAPI router for the Batches API.
|
|
8
|
+
|
|
9
|
+
This module defines the FastAPI router for the Batches API using standard
|
|
10
|
+
FastAPI route decorators. The router is defined in the API package to keep
|
|
11
|
+
all API-related code together.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Annotated
|
|
15
|
+
|
|
16
|
+
from fastapi import APIRouter, Body, Depends
|
|
17
|
+
|
|
18
|
+
from llama_stack_api.batches.models import (
|
|
19
|
+
CancelBatchRequest,
|
|
20
|
+
CreateBatchRequest,
|
|
21
|
+
ListBatchesRequest,
|
|
22
|
+
RetrieveBatchRequest,
|
|
23
|
+
)
|
|
24
|
+
from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
|
|
25
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1
|
|
26
|
+
|
|
27
|
+
from .api import Batches
|
|
28
|
+
from .models import BatchObject, ListBatchesResponse
|
|
29
|
+
|
|
30
|
+
# Automatically generate dependency functions from Pydantic models
|
|
31
|
+
# This ensures the models are the single source of truth for descriptions
|
|
32
|
+
get_retrieve_batch_request = create_path_dependency(RetrieveBatchRequest)
|
|
33
|
+
get_cancel_batch_request = create_path_dependency(CancelBatchRequest)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# Automatically generate dependency function from Pydantic model
|
|
37
|
+
# This ensures the model is the single source of truth for descriptions and defaults
|
|
38
|
+
get_list_batches_request = create_query_dependency(ListBatchesRequest)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def create_router(impl: Batches) -> APIRouter:
|
|
42
|
+
"""Create a FastAPI router for the Batches API.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
impl: The Batches implementation instance
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
APIRouter configured for the Batches API
|
|
49
|
+
"""
|
|
50
|
+
router = APIRouter(
|
|
51
|
+
prefix=f"/{LLAMA_STACK_API_V1}",
|
|
52
|
+
tags=["Batches"],
|
|
53
|
+
responses=standard_responses,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@router.post(
|
|
57
|
+
"/batches",
|
|
58
|
+
response_model=BatchObject,
|
|
59
|
+
summary="Create a new batch for processing multiple API requests.",
|
|
60
|
+
description="Create a new batch for processing multiple API requests.",
|
|
61
|
+
responses={
|
|
62
|
+
200: {"description": "The created batch object."},
|
|
63
|
+
409: {"description": "Conflict: The idempotency key was previously used with different parameters."},
|
|
64
|
+
},
|
|
65
|
+
)
|
|
66
|
+
async def create_batch(
|
|
67
|
+
request: Annotated[CreateBatchRequest, Body(...)],
|
|
68
|
+
) -> BatchObject:
|
|
69
|
+
return await impl.create_batch(request)
|
|
70
|
+
|
|
71
|
+
@router.get(
|
|
72
|
+
"/batches/{batch_id}",
|
|
73
|
+
response_model=BatchObject,
|
|
74
|
+
summary="Retrieve information about a specific batch.",
|
|
75
|
+
description="Retrieve information about a specific batch.",
|
|
76
|
+
responses={
|
|
77
|
+
200: {"description": "The batch object."},
|
|
78
|
+
},
|
|
79
|
+
)
|
|
80
|
+
async def retrieve_batch(
|
|
81
|
+
request: Annotated[RetrieveBatchRequest, Depends(get_retrieve_batch_request)],
|
|
82
|
+
) -> BatchObject:
|
|
83
|
+
return await impl.retrieve_batch(request)
|
|
84
|
+
|
|
85
|
+
@router.post(
|
|
86
|
+
"/batches/{batch_id}/cancel",
|
|
87
|
+
response_model=BatchObject,
|
|
88
|
+
summary="Cancel a batch that is in progress.",
|
|
89
|
+
description="Cancel a batch that is in progress.",
|
|
90
|
+
responses={
|
|
91
|
+
200: {"description": "The updated batch object."},
|
|
92
|
+
},
|
|
93
|
+
)
|
|
94
|
+
async def cancel_batch(
|
|
95
|
+
request: Annotated[CancelBatchRequest, Depends(get_cancel_batch_request)],
|
|
96
|
+
) -> BatchObject:
|
|
97
|
+
return await impl.cancel_batch(request)
|
|
98
|
+
|
|
99
|
+
@router.get(
|
|
100
|
+
"/batches",
|
|
101
|
+
response_model=ListBatchesResponse,
|
|
102
|
+
summary="List all batches for the current user.",
|
|
103
|
+
description="List all batches for the current user.",
|
|
104
|
+
responses={
|
|
105
|
+
200: {"description": "A list of batch objects."},
|
|
106
|
+
},
|
|
107
|
+
)
|
|
108
|
+
async def list_batches(
|
|
109
|
+
request: Annotated[ListBatchesRequest, Depends(get_list_batches_request)],
|
|
110
|
+
) -> ListBatchesResponse:
|
|
111
|
+
return await impl.list_batches(request)
|
|
112
|
+
|
|
113
|
+
return router
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Pydantic models for Batches API requests and responses.
|
|
8
|
+
|
|
9
|
+
This module defines the request and response models for the Batches API
|
|
10
|
+
using Pydantic with Field descriptions for OpenAPI schema generation.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Literal
|
|
14
|
+
|
|
15
|
+
from openai.types import Batch as BatchObject
|
|
16
|
+
from pydantic import BaseModel, Field
|
|
17
|
+
|
|
18
|
+
from llama_stack_api.schema_utils import json_schema_type
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@json_schema_type
|
|
22
|
+
class CreateBatchRequest(BaseModel):
|
|
23
|
+
"""Request model for creating a batch."""
|
|
24
|
+
|
|
25
|
+
input_file_id: str = Field(..., description="The ID of an uploaded file containing requests for the batch.")
|
|
26
|
+
endpoint: str = Field(..., description="The endpoint to be used for all requests in the batch.")
|
|
27
|
+
completion_window: Literal["24h"] = Field(
|
|
28
|
+
..., description="The time window within which the batch should be processed."
|
|
29
|
+
)
|
|
30
|
+
metadata: dict[str, str] | None = Field(default=None, description="Optional metadata for the batch.")
|
|
31
|
+
idempotency_key: str | None = Field(
|
|
32
|
+
default=None, description="Optional idempotency key. When provided, enables idempotent behavior."
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@json_schema_type
|
|
37
|
+
class ListBatchesRequest(BaseModel):
|
|
38
|
+
"""Request model for listing batches."""
|
|
39
|
+
|
|
40
|
+
after: str | None = Field(
|
|
41
|
+
default=None, description="Optional cursor for pagination. Returns batches after this ID."
|
|
42
|
+
)
|
|
43
|
+
limit: int = Field(default=20, description="Maximum number of batches to return. Defaults to 20.")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@json_schema_type
|
|
47
|
+
class RetrieveBatchRequest(BaseModel):
|
|
48
|
+
"""Request model for retrieving a batch."""
|
|
49
|
+
|
|
50
|
+
batch_id: str = Field(..., description="The ID of the batch to retrieve.")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@json_schema_type
|
|
54
|
+
class CancelBatchRequest(BaseModel):
|
|
55
|
+
"""Request model for canceling a batch."""
|
|
56
|
+
|
|
57
|
+
batch_id: str = Field(..., description="The ID of the batch to cancel.")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@json_schema_type
|
|
61
|
+
class ListBatchesResponse(BaseModel):
|
|
62
|
+
"""Response containing a list of batch objects."""
|
|
63
|
+
|
|
64
|
+
object: Literal["list"] = "list"
|
|
65
|
+
data: list[BatchObject] = Field(..., description="List of batch objects")
|
|
66
|
+
first_id: str | None = Field(default=None, description="ID of the first batch in the list")
|
|
67
|
+
last_id: str | None = Field(default=None, description="ID of the last batch in the list")
|
|
68
|
+
has_more: bool = Field(default=False, description="Whether there are more batches available")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
__all__ = [
|
|
72
|
+
"CreateBatchRequest",
|
|
73
|
+
"ListBatchesRequest",
|
|
74
|
+
"RetrieveBatchRequest",
|
|
75
|
+
"CancelBatchRequest",
|
|
76
|
+
"ListBatchesResponse",
|
|
77
|
+
"BatchObject",
|
|
78
|
+
]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Benchmarks API protocol and models.
|
|
8
|
+
|
|
9
|
+
This module contains the Benchmarks protocol definition.
|
|
10
|
+
Pydantic models are defined in llama_stack_api.benchmarks.models.
|
|
11
|
+
The FastAPI router is defined in llama_stack_api.benchmarks.fastapi_routes.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Import fastapi_routes for router factory access
|
|
15
|
+
from . import fastapi_routes
|
|
16
|
+
|
|
17
|
+
# Import protocol for re-export
|
|
18
|
+
from .api import Benchmarks
|
|
19
|
+
|
|
20
|
+
# Import models for re-export
|
|
21
|
+
from .models import (
|
|
22
|
+
Benchmark,
|
|
23
|
+
BenchmarkInput,
|
|
24
|
+
CommonBenchmarkFields,
|
|
25
|
+
GetBenchmarkRequest,
|
|
26
|
+
ListBenchmarksRequest,
|
|
27
|
+
ListBenchmarksResponse,
|
|
28
|
+
RegisterBenchmarkRequest,
|
|
29
|
+
UnregisterBenchmarkRequest,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"Benchmarks",
|
|
34
|
+
"Benchmark",
|
|
35
|
+
"BenchmarkInput",
|
|
36
|
+
"CommonBenchmarkFields",
|
|
37
|
+
"ListBenchmarksResponse",
|
|
38
|
+
"ListBenchmarksRequest",
|
|
39
|
+
"GetBenchmarkRequest",
|
|
40
|
+
"RegisterBenchmarkRequest",
|
|
41
|
+
"UnregisterBenchmarkRequest",
|
|
42
|
+
"fastapi_routes",
|
|
43
|
+
]
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from typing import Protocol, runtime_checkable
|
|
8
|
+
|
|
9
|
+
from .models import (
|
|
10
|
+
Benchmark,
|
|
11
|
+
GetBenchmarkRequest,
|
|
12
|
+
ListBenchmarksRequest,
|
|
13
|
+
ListBenchmarksResponse,
|
|
14
|
+
RegisterBenchmarkRequest,
|
|
15
|
+
UnregisterBenchmarkRequest,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@runtime_checkable
|
|
20
|
+
class Benchmarks(Protocol):
|
|
21
|
+
async def list_benchmarks(
|
|
22
|
+
self,
|
|
23
|
+
request: ListBenchmarksRequest,
|
|
24
|
+
) -> ListBenchmarksResponse: ...
|
|
25
|
+
|
|
26
|
+
async def get_benchmark(
|
|
27
|
+
self,
|
|
28
|
+
request: GetBenchmarkRequest,
|
|
29
|
+
) -> Benchmark: ...
|
|
30
|
+
|
|
31
|
+
async def register_benchmark(
|
|
32
|
+
self,
|
|
33
|
+
request: RegisterBenchmarkRequest,
|
|
34
|
+
) -> None: ...
|
|
35
|
+
|
|
36
|
+
async def unregister_benchmark(
|
|
37
|
+
self,
|
|
38
|
+
request: UnregisterBenchmarkRequest,
|
|
39
|
+
) -> None: ...
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""FastAPI router for the Benchmarks API.
|
|
8
|
+
|
|
9
|
+
This module defines the FastAPI router for the Benchmarks API using standard
|
|
10
|
+
FastAPI route decorators. The router is defined in the API package to keep
|
|
11
|
+
all API-related code together.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Annotated
|
|
15
|
+
|
|
16
|
+
from fastapi import APIRouter, Body, Depends
|
|
17
|
+
|
|
18
|
+
from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
|
|
19
|
+
from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
|
|
20
|
+
|
|
21
|
+
from .api import Benchmarks
|
|
22
|
+
from .models import (
|
|
23
|
+
Benchmark,
|
|
24
|
+
GetBenchmarkRequest,
|
|
25
|
+
ListBenchmarksRequest,
|
|
26
|
+
ListBenchmarksResponse,
|
|
27
|
+
RegisterBenchmarkRequest,
|
|
28
|
+
UnregisterBenchmarkRequest,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Automatically generate dependency functions from Pydantic models
|
|
32
|
+
# This ensures the models are the single source of truth for descriptions
|
|
33
|
+
get_list_benchmarks_request = create_query_dependency(ListBenchmarksRequest)
|
|
34
|
+
get_get_benchmark_request = create_path_dependency(GetBenchmarkRequest)
|
|
35
|
+
get_unregister_benchmark_request = create_path_dependency(UnregisterBenchmarkRequest)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def create_router(impl: Benchmarks) -> APIRouter:
|
|
39
|
+
"""Create a FastAPI router for the Benchmarks API.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
impl: The Benchmarks implementation instance
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
APIRouter configured for the Benchmarks API
|
|
46
|
+
"""
|
|
47
|
+
router = APIRouter(
|
|
48
|
+
prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
|
|
49
|
+
tags=["Benchmarks"],
|
|
50
|
+
responses=standard_responses,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@router.get(
|
|
54
|
+
"/eval/benchmarks",
|
|
55
|
+
response_model=ListBenchmarksResponse,
|
|
56
|
+
summary="List all benchmarks.",
|
|
57
|
+
description="List all benchmarks.",
|
|
58
|
+
responses={
|
|
59
|
+
200: {"description": "A ListBenchmarksResponse."},
|
|
60
|
+
},
|
|
61
|
+
)
|
|
62
|
+
async def list_benchmarks(
|
|
63
|
+
request: Annotated[ListBenchmarksRequest, Depends(get_list_benchmarks_request)],
|
|
64
|
+
) -> ListBenchmarksResponse:
|
|
65
|
+
return await impl.list_benchmarks(request)
|
|
66
|
+
|
|
67
|
+
@router.get(
|
|
68
|
+
"/eval/benchmarks/{benchmark_id}",
|
|
69
|
+
response_model=Benchmark,
|
|
70
|
+
summary="Get a benchmark by its ID.",
|
|
71
|
+
description="Get a benchmark by its ID.",
|
|
72
|
+
responses={
|
|
73
|
+
200: {"description": "A Benchmark."},
|
|
74
|
+
},
|
|
75
|
+
)
|
|
76
|
+
async def get_benchmark(
|
|
77
|
+
request: Annotated[GetBenchmarkRequest, Depends(get_get_benchmark_request)],
|
|
78
|
+
) -> Benchmark:
|
|
79
|
+
return await impl.get_benchmark(request)
|
|
80
|
+
|
|
81
|
+
@router.post(
|
|
82
|
+
"/eval/benchmarks",
|
|
83
|
+
summary="Register a benchmark.",
|
|
84
|
+
description="Register a benchmark.",
|
|
85
|
+
responses={
|
|
86
|
+
200: {"description": "The benchmark was successfully registered."},
|
|
87
|
+
},
|
|
88
|
+
deprecated=True,
|
|
89
|
+
)
|
|
90
|
+
async def register_benchmark(
|
|
91
|
+
request: Annotated[RegisterBenchmarkRequest, Body(...)],
|
|
92
|
+
) -> None:
|
|
93
|
+
return await impl.register_benchmark(request)
|
|
94
|
+
|
|
95
|
+
@router.delete(
|
|
96
|
+
"/eval/benchmarks/{benchmark_id}",
|
|
97
|
+
summary="Unregister a benchmark.",
|
|
98
|
+
description="Unregister a benchmark.",
|
|
99
|
+
responses={
|
|
100
|
+
200: {"description": "The benchmark was successfully unregistered."},
|
|
101
|
+
},
|
|
102
|
+
deprecated=True,
|
|
103
|
+
)
|
|
104
|
+
async def unregister_benchmark(
|
|
105
|
+
request: Annotated[UnregisterBenchmarkRequest, Depends(get_unregister_benchmark_request)],
|
|
106
|
+
) -> None:
|
|
107
|
+
return await impl.unregister_benchmark(request)
|
|
108
|
+
|
|
109
|
+
return router
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""Pydantic models for Benchmarks API requests and responses.
|
|
8
|
+
|
|
9
|
+
This module defines the request and response models for the Benchmarks API
|
|
10
|
+
using Pydantic with Field descriptions for OpenAPI schema generation.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Any, Literal
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, Field
|
|
16
|
+
|
|
17
|
+
from llama_stack_api.resource import Resource, ResourceType
|
|
18
|
+
from llama_stack_api.schema_utils import json_schema_type
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@json_schema_type
|
|
22
|
+
class ListBenchmarksRequest(BaseModel):
|
|
23
|
+
"""Request model for listing benchmarks."""
|
|
24
|
+
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@json_schema_type
|
|
29
|
+
class GetBenchmarkRequest(BaseModel):
|
|
30
|
+
"""Request model for getting a benchmark."""
|
|
31
|
+
|
|
32
|
+
benchmark_id: str = Field(..., description="The ID of the benchmark to get.")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@json_schema_type
|
|
36
|
+
class RegisterBenchmarkRequest(BaseModel):
|
|
37
|
+
"""Request model for registering a benchmark."""
|
|
38
|
+
|
|
39
|
+
benchmark_id: str = Field(..., description="The ID of the benchmark to register.")
|
|
40
|
+
dataset_id: str = Field(..., description="The ID of the dataset to use for the benchmark.")
|
|
41
|
+
scoring_functions: list[str] = Field(..., description="The scoring functions to use for the benchmark.")
|
|
42
|
+
provider_benchmark_id: str | None = Field(
|
|
43
|
+
default=None, description="The ID of the provider benchmark to use for the benchmark."
|
|
44
|
+
)
|
|
45
|
+
provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
|
|
46
|
+
metadata: dict[str, Any] | None = Field(default=None, description="The metadata to use for the benchmark.")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@json_schema_type
|
|
50
|
+
class UnregisterBenchmarkRequest(BaseModel):
|
|
51
|
+
"""Request model for unregistering a benchmark."""
|
|
52
|
+
|
|
53
|
+
benchmark_id: str = Field(..., description="The ID of the benchmark to unregister.")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class CommonBenchmarkFields(BaseModel):
|
|
57
|
+
dataset_id: str = Field(..., description="Identifier of the dataset to use for the benchmark evaluation.")
|
|
58
|
+
scoring_functions: list[str] = Field(
|
|
59
|
+
..., description="List of scoring function identifiers to apply during evaluation."
|
|
60
|
+
)
|
|
61
|
+
metadata: dict[str, Any] = Field(
|
|
62
|
+
default_factory=dict,
|
|
63
|
+
description="Metadata for this evaluation task.",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@json_schema_type
|
|
68
|
+
class Benchmark(CommonBenchmarkFields, Resource):
|
|
69
|
+
"""A benchmark resource for evaluating model performance."""
|
|
70
|
+
|
|
71
|
+
type: Literal[ResourceType.benchmark] = Field(
|
|
72
|
+
default=ResourceType.benchmark,
|
|
73
|
+
description="The resource type, always benchmark.",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def benchmark_id(self) -> str:
|
|
78
|
+
return self.identifier
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def provider_benchmark_id(self) -> str | None:
|
|
82
|
+
return self.provider_resource_id
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class BenchmarkInput(CommonBenchmarkFields, BaseModel):
|
|
86
|
+
benchmark_id: str = Field(..., description="The ID of the benchmark.")
|
|
87
|
+
provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
|
|
88
|
+
provider_benchmark_id: str | None = Field(
|
|
89
|
+
default=None, description="The ID of the provider benchmark to use for the benchmark."
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@json_schema_type
|
|
94
|
+
class ListBenchmarksResponse(BaseModel):
|
|
95
|
+
"""Response containing a list of benchmark objects."""
|
|
96
|
+
|
|
97
|
+
data: list[Benchmark] = Field(..., description="List of benchmark objects.")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
__all__ = [
|
|
101
|
+
"ListBenchmarksRequest",
|
|
102
|
+
"GetBenchmarkRequest",
|
|
103
|
+
"RegisterBenchmarkRequest",
|
|
104
|
+
"UnregisterBenchmarkRequest",
|
|
105
|
+
"CommonBenchmarkFields",
|
|
106
|
+
"Benchmark",
|
|
107
|
+
"BenchmarkInput",
|
|
108
|
+
"ListBenchmarksResponse",
|
|
109
|
+
]
|
|
@@ -4,13 +4,11 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from enum import Enum
|
|
8
7
|
from typing import Annotated, Literal
|
|
9
8
|
|
|
10
9
|
from pydantic import BaseModel, Field, model_validator
|
|
11
10
|
|
|
12
|
-
from
|
|
13
|
-
from llama_stack.schema_utils import json_schema_type, register_schema
|
|
11
|
+
from llama_stack_api.schema_utils import json_schema_type, register_schema
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
@json_schema_type
|
|
@@ -101,43 +99,3 @@ class ImageDelta(BaseModel):
|
|
|
101
99
|
|
|
102
100
|
type: Literal["image"] = "image"
|
|
103
101
|
image: bytes
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
class ToolCallParseStatus(Enum):
|
|
107
|
-
"""Status of tool call parsing during streaming.
|
|
108
|
-
:cvar started: Tool call parsing has begun
|
|
109
|
-
:cvar in_progress: Tool call parsing is ongoing
|
|
110
|
-
:cvar failed: Tool call parsing failed
|
|
111
|
-
:cvar succeeded: Tool call parsing completed successfully
|
|
112
|
-
"""
|
|
113
|
-
|
|
114
|
-
started = "started"
|
|
115
|
-
in_progress = "in_progress"
|
|
116
|
-
failed = "failed"
|
|
117
|
-
succeeded = "succeeded"
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
@json_schema_type
|
|
121
|
-
class ToolCallDelta(BaseModel):
|
|
122
|
-
"""A tool call content delta for streaming responses.
|
|
123
|
-
|
|
124
|
-
:param type: Discriminator type of the delta. Always "tool_call"
|
|
125
|
-
:param tool_call: Either an in-progress tool call string or the final parsed tool call
|
|
126
|
-
:param parse_status: Current parsing status of the tool call
|
|
127
|
-
"""
|
|
128
|
-
|
|
129
|
-
type: Literal["tool_call"] = "tool_call"
|
|
130
|
-
|
|
131
|
-
# you either send an in-progress tool call so the client can stream a long
|
|
132
|
-
# code generation or you send the final parsed tool call at the end of the
|
|
133
|
-
# stream
|
|
134
|
-
tool_call: str | ToolCall
|
|
135
|
-
parse_status: ToolCallParseStatus
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
# streaming completions send a stream of ContentDeltas
|
|
139
|
-
ContentDelta = Annotated[
|
|
140
|
-
TextDelta | ImageDelta | ToolCallDelta,
|
|
141
|
-
Field(discriminator="type"),
|
|
142
|
-
]
|
|
143
|
-
register_schema(ContentDelta, name="ContentDelta")
|
|
@@ -56,14 +56,6 @@ class ToolGroupNotFoundError(ResourceNotFoundError):
|
|
|
56
56
|
super().__init__(toolgroup_name, "Tool Group", "client.toolgroups.list()")
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
class SessionNotFoundError(ValueError):
|
|
60
|
-
"""raised when Llama Stack cannot find a referenced session or access is denied"""
|
|
61
|
-
|
|
62
|
-
def __init__(self, session_name: str) -> None:
|
|
63
|
-
message = f"Session '{session_name}' not found or access denied."
|
|
64
|
-
super().__init__(message)
|
|
65
|
-
|
|
66
|
-
|
|
67
59
|
class ModelTypeError(TypeError):
|
|
68
60
|
"""raised when a model is present but not the correct type"""
|
|
69
61
|
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
from llama_stack_api.schema_utils import json_schema_type
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Order(Enum):
|
|
16
|
+
"""Sort order for paginated responses.
|
|
17
|
+
:cvar asc: Ascending order
|
|
18
|
+
:cvar desc: Descending order
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
asc = "asc"
|
|
22
|
+
desc = "desc"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@json_schema_type
|
|
26
|
+
class PaginatedResponse(BaseModel):
|
|
27
|
+
"""A generic paginated response that follows a simple format.
|
|
28
|
+
|
|
29
|
+
:param data: The list of items for the current page
|
|
30
|
+
:param has_more: Whether there are more items available after this set
|
|
31
|
+
:param url: The URL for accessing this list
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
data: list[dict[str, Any]]
|
|
35
|
+
has_more: bool
|
|
36
|
+
url: str | None = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# This is a short term solution to allow inference API to return metrics
|
|
40
|
+
# The ideal way to do this is to have a way for all response types to include metrics
|
|
41
|
+
# and all metric events logged to the telemetry API to be included with the response
|
|
42
|
+
# To do this, we will need to augment all response types with a metrics field.
|
|
43
|
+
# We have hit a blocker from stainless SDK that prevents us from doing this.
|
|
44
|
+
# The blocker is that if we were to augment the response types that have a data field
|
|
45
|
+
# in them like so
|
|
46
|
+
# class ListModelsResponse(BaseModel):
|
|
47
|
+
# metrics: Optional[List[MetricEvent]] = None
|
|
48
|
+
# data: List[Models]
|
|
49
|
+
# ...
|
|
50
|
+
# The client SDK will need to access the data by using a .data field, which is not
|
|
51
|
+
# ergonomic. Stainless SDK does support unwrapping the response type, but it
|
|
52
|
+
# requires that the response type to only have a single field.
|
|
53
|
+
|
|
54
|
+
# We will need a way in the client SDK to signal that the metrics are needed
|
|
55
|
+
# and if they are needed, the client SDK has to return the full response type
|
|
56
|
+
# without unwrapping it.
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@json_schema_type
|
|
60
|
+
class MetricInResponse(BaseModel):
|
|
61
|
+
"""A metric value included in API responses.
|
|
62
|
+
:param metric: The name of the metric
|
|
63
|
+
:param value: The numeric value of the metric
|
|
64
|
+
:param unit: (Optional) The unit of measurement for the metric value
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
metric: str
|
|
68
|
+
value: int | float
|
|
69
|
+
unit: str | None = None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class MetricResponseMixin(BaseModel):
|
|
73
|
+
"""Mixin class for API responses that can include metrics.
|
|
74
|
+
:param metrics: (Optional) List of metrics associated with the API response
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
metrics: list[MetricInResponse] | None = None
|