llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
llama_stack/core/datatypes.py
CHANGED
|
@@ -11,27 +11,40 @@ from urllib.parse import urlparse
|
|
|
11
11
|
|
|
12
12
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
13
13
|
|
|
14
|
-
from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
|
|
15
|
-
from llama_stack.apis.datasetio import DatasetIO
|
|
16
|
-
from llama_stack.apis.datasets import Dataset, DatasetInput
|
|
17
|
-
from llama_stack.apis.eval import Eval
|
|
18
|
-
from llama_stack.apis.inference import Inference
|
|
19
|
-
from llama_stack.apis.models import Model, ModelInput
|
|
20
|
-
from llama_stack.apis.resource import Resource
|
|
21
|
-
from llama_stack.apis.safety import Safety
|
|
22
|
-
from llama_stack.apis.scoring import Scoring
|
|
23
|
-
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
|
|
24
|
-
from llama_stack.apis.shields import Shield, ShieldInput
|
|
25
|
-
from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
|
|
26
|
-
from llama_stack.apis.vector_io import VectorIO
|
|
27
|
-
from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
|
|
28
14
|
from llama_stack.core.access_control.datatypes import AccessRule
|
|
29
15
|
from llama_stack.core.storage.datatypes import (
|
|
30
16
|
KVStoreReference,
|
|
31
17
|
StorageBackendType,
|
|
32
18
|
StorageConfig,
|
|
33
19
|
)
|
|
34
|
-
from llama_stack.
|
|
20
|
+
from llama_stack.log import LoggingConfig
|
|
21
|
+
from llama_stack_api import (
|
|
22
|
+
Api,
|
|
23
|
+
Benchmark,
|
|
24
|
+
BenchmarkInput,
|
|
25
|
+
ConnectorInput,
|
|
26
|
+
Dataset,
|
|
27
|
+
DatasetInput,
|
|
28
|
+
DatasetIO,
|
|
29
|
+
Eval,
|
|
30
|
+
Inference,
|
|
31
|
+
Model,
|
|
32
|
+
ModelInput,
|
|
33
|
+
ProviderSpec,
|
|
34
|
+
Resource,
|
|
35
|
+
Safety,
|
|
36
|
+
Scoring,
|
|
37
|
+
ScoringFn,
|
|
38
|
+
ScoringFnInput,
|
|
39
|
+
Shield,
|
|
40
|
+
ShieldInput,
|
|
41
|
+
ToolGroup,
|
|
42
|
+
ToolGroupInput,
|
|
43
|
+
ToolRuntime,
|
|
44
|
+
VectorIO,
|
|
45
|
+
VectorStore,
|
|
46
|
+
VectorStoreInput,
|
|
47
|
+
)
|
|
35
48
|
|
|
36
49
|
LLAMA_STACK_BUILD_CONFIG_VERSION = 2
|
|
37
50
|
LLAMA_STACK_RUN_CONFIG_VERSION = 2
|
|
@@ -179,30 +192,6 @@ class DistributionSpec(BaseModel):
|
|
|
179
192
|
)
|
|
180
193
|
|
|
181
194
|
|
|
182
|
-
class TelemetryConfig(BaseModel):
|
|
183
|
-
"""
|
|
184
|
-
Configuration for telemetry.
|
|
185
|
-
|
|
186
|
-
Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/
|
|
187
|
-
for env variables to configure the OpenTelemetry SDK.
|
|
188
|
-
|
|
189
|
-
Example:
|
|
190
|
-
```bash
|
|
191
|
-
OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter
|
|
192
|
-
```
|
|
193
|
-
"""
|
|
194
|
-
|
|
195
|
-
enabled: bool = Field(default=False, description="enable or disable telemetry")
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
class LoggingConfig(BaseModel):
|
|
199
|
-
category_levels: dict[str, str] = Field(
|
|
200
|
-
default_factory=dict,
|
|
201
|
-
description="""
|
|
202
|
-
Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""",
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
|
|
206
195
|
class OAuth2JWKSConfig(BaseModel):
|
|
207
196
|
# The JWKS URI for collecting public keys
|
|
208
197
|
uri: str
|
|
@@ -361,6 +350,201 @@ class QualifiedModel(BaseModel):
|
|
|
361
350
|
model_id: str
|
|
362
351
|
|
|
363
352
|
|
|
353
|
+
class RewriteQueryParams(BaseModel):
|
|
354
|
+
"""Parameters for query rewriting/expansion."""
|
|
355
|
+
|
|
356
|
+
model: QualifiedModel | None = Field(
|
|
357
|
+
default=None,
|
|
358
|
+
description="LLM model for query rewriting/expansion in vector search.",
|
|
359
|
+
)
|
|
360
|
+
prompt: str = Field(
|
|
361
|
+
default="Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:",
|
|
362
|
+
description="Prompt template for query rewriting. Use {query} as placeholder for the original query.",
|
|
363
|
+
)
|
|
364
|
+
max_tokens: int = Field(
|
|
365
|
+
default=100,
|
|
366
|
+
description="Maximum number of tokens for query expansion responses.",
|
|
367
|
+
)
|
|
368
|
+
temperature: float = Field(
|
|
369
|
+
default=0.3,
|
|
370
|
+
description="Temperature for query expansion model (0.0 = deterministic, 1.0 = creative).",
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
@field_validator("prompt")
|
|
374
|
+
@classmethod
|
|
375
|
+
def validate_prompt(cls, v: str) -> str:
|
|
376
|
+
if "{query}" not in v:
|
|
377
|
+
raise ValueError("prompt must contain {query} placeholder")
|
|
378
|
+
return v
|
|
379
|
+
|
|
380
|
+
@field_validator("max_tokens")
|
|
381
|
+
@classmethod
|
|
382
|
+
def validate_max_tokens(cls, v: int) -> int:
|
|
383
|
+
if v <= 0:
|
|
384
|
+
raise ValueError("max_tokens must be positive")
|
|
385
|
+
if v > 4096:
|
|
386
|
+
raise ValueError("max_tokens should not exceed 4096")
|
|
387
|
+
return v
|
|
388
|
+
|
|
389
|
+
@field_validator("temperature")
|
|
390
|
+
@classmethod
|
|
391
|
+
def validate_temperature(cls, v: float) -> float:
|
|
392
|
+
if v < 0.0 or v > 2.0:
|
|
393
|
+
raise ValueError("temperature must be between 0.0 and 2.0")
|
|
394
|
+
return v
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
class FileSearchParams(BaseModel):
|
|
398
|
+
"""Configuration for file search tool output formatting."""
|
|
399
|
+
|
|
400
|
+
header_template: str = Field(
|
|
401
|
+
default="knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n",
|
|
402
|
+
description="Template for the header text shown before search results. Available placeholders: {num_chunks} number of chunks found.",
|
|
403
|
+
)
|
|
404
|
+
footer_template: str = Field(
|
|
405
|
+
default="END of knowledge_search tool results.\n",
|
|
406
|
+
description="Template for the footer text shown after search results.",
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
@field_validator("header_template")
|
|
410
|
+
@classmethod
|
|
411
|
+
def validate_header_template(cls, v: str) -> str:
|
|
412
|
+
if len(v) == 0:
|
|
413
|
+
raise ValueError("header_template must not be empty")
|
|
414
|
+
if "{num_chunks}" not in v:
|
|
415
|
+
raise ValueError("header_template must contain {num_chunks} placeholder")
|
|
416
|
+
if "knowledge_search" not in v.lower():
|
|
417
|
+
raise ValueError(
|
|
418
|
+
"header_template must contain 'knowledge_search' keyword to ensure proper tool identification"
|
|
419
|
+
)
|
|
420
|
+
return v
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
class ContextPromptParams(BaseModel):
|
|
424
|
+
"""Configuration for LLM prompt content and chunk formatting."""
|
|
425
|
+
|
|
426
|
+
chunk_annotation_template: str = Field(
|
|
427
|
+
default="Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
|
|
428
|
+
description="Template for formatting individual chunks in search results. Available placeholders: {index} 1-based chunk index, {chunk.content} chunk content, {metadata} chunk metadata dict.",
|
|
429
|
+
)
|
|
430
|
+
context_template: str = Field(
|
|
431
|
+
default='The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query. {annotation_instruction}\n',
|
|
432
|
+
description="Template for explaining the search results to the model. Available placeholders: {query} user's query, {num_chunks} number of chunks.",
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
@field_validator("chunk_annotation_template")
|
|
436
|
+
@classmethod
|
|
437
|
+
def validate_chunk_annotation_template(cls, v: str) -> str:
|
|
438
|
+
if len(v) == 0:
|
|
439
|
+
raise ValueError("chunk_annotation_template must not be empty")
|
|
440
|
+
if "{chunk.content}" not in v:
|
|
441
|
+
raise ValueError("chunk_annotation_template must contain {chunk.content} placeholder")
|
|
442
|
+
if "{index}" not in v:
|
|
443
|
+
raise ValueError("chunk_annotation_template must contain {index} placeholder")
|
|
444
|
+
return v
|
|
445
|
+
|
|
446
|
+
@field_validator("context_template")
|
|
447
|
+
@classmethod
|
|
448
|
+
def validate_context_template(cls, v: str) -> str:
|
|
449
|
+
if len(v) == 0:
|
|
450
|
+
raise ValueError("context_template must not be empty")
|
|
451
|
+
if "{query}" not in v:
|
|
452
|
+
raise ValueError("context_template must contain {query} placeholder")
|
|
453
|
+
return v
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
class AnnotationPromptParams(BaseModel):
|
|
457
|
+
"""Configuration for source annotation and attribution features."""
|
|
458
|
+
|
|
459
|
+
enable_annotations: bool = Field(
|
|
460
|
+
default=True,
|
|
461
|
+
description="Whether to include annotation information in results.",
|
|
462
|
+
)
|
|
463
|
+
annotation_instruction_template: str = Field(
|
|
464
|
+
default="Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent new ones.",
|
|
465
|
+
description="Instructions for how the model should cite sources. Used when enable_annotations is True.",
|
|
466
|
+
)
|
|
467
|
+
chunk_annotation_template: str = Field(
|
|
468
|
+
default="[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n",
|
|
469
|
+
description="Template for chunks with annotation information. Available placeholders: {index} 1-based chunk index, {metadata_text} formatted metadata, {file_id} document identifier, {chunk_text} chunk content.",
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
@field_validator("chunk_annotation_template")
|
|
473
|
+
@classmethod
|
|
474
|
+
def validate_chunk_annotation_template(cls, v: str) -> str:
|
|
475
|
+
if len(v) == 0:
|
|
476
|
+
raise ValueError("chunk_annotation_template must not be empty")
|
|
477
|
+
if "{index}" not in v:
|
|
478
|
+
raise ValueError("chunk_annotation_template must contain {index} placeholder")
|
|
479
|
+
if "{chunk_text}" not in v:
|
|
480
|
+
raise ValueError("chunk_annotation_template must contain {chunk_text} placeholder")
|
|
481
|
+
if "{file_id}" not in v:
|
|
482
|
+
raise ValueError("chunk_annotation_template must contain {file_id} placeholder")
|
|
483
|
+
return v
|
|
484
|
+
|
|
485
|
+
@field_validator("annotation_instruction_template")
|
|
486
|
+
@classmethod
|
|
487
|
+
def validate_annotation_instruction_template(cls, v: str) -> str:
|
|
488
|
+
if len(v) == 0:
|
|
489
|
+
raise ValueError("annotation_instruction_template must not be empty")
|
|
490
|
+
return v
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
class FileIngestionParams(BaseModel):
|
|
494
|
+
"""Configuration for file processing during ingestion."""
|
|
495
|
+
|
|
496
|
+
default_chunk_size_tokens: int = Field(
|
|
497
|
+
default=512,
|
|
498
|
+
description="Default chunk size for RAG tool operations when not specified",
|
|
499
|
+
)
|
|
500
|
+
default_chunk_overlap_tokens: int = Field(
|
|
501
|
+
default=128,
|
|
502
|
+
description="Default overlap in tokens between chunks (original default: 512 // 4 = 128)",
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
class ChunkRetrievalParams(BaseModel):
|
|
507
|
+
"""Configuration for chunk retrieval and ranking during search."""
|
|
508
|
+
|
|
509
|
+
chunk_multiplier: int = Field(
|
|
510
|
+
default=5,
|
|
511
|
+
description="Multiplier for OpenAI API over-retrieval (affects all providers)",
|
|
512
|
+
)
|
|
513
|
+
max_tokens_in_context: int = Field(
|
|
514
|
+
default=4000,
|
|
515
|
+
description="Maximum tokens allowed in RAG context before truncation",
|
|
516
|
+
)
|
|
517
|
+
default_reranker_strategy: str = Field(
|
|
518
|
+
default="rrf",
|
|
519
|
+
description="Default reranker when not specified: 'rrf', 'weighted', or 'normalized'",
|
|
520
|
+
)
|
|
521
|
+
rrf_impact_factor: float = Field(
|
|
522
|
+
default=60.0,
|
|
523
|
+
description="Impact factor for RRF (Reciprocal Rank Fusion) reranking",
|
|
524
|
+
)
|
|
525
|
+
weighted_search_alpha: float = Field(
|
|
526
|
+
default=0.5,
|
|
527
|
+
description="Alpha weight for weighted search reranking (0.0-1.0)",
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
class FileBatchParams(BaseModel):
|
|
532
|
+
"""Configuration for file batch processing."""
|
|
533
|
+
|
|
534
|
+
max_concurrent_files_per_batch: int = Field(
|
|
535
|
+
default=3,
|
|
536
|
+
description="Maximum files processed concurrently in file batches",
|
|
537
|
+
)
|
|
538
|
+
file_batch_chunk_size: int = Field(
|
|
539
|
+
default=10,
|
|
540
|
+
description="Number of files to process in each batch chunk",
|
|
541
|
+
)
|
|
542
|
+
cleanup_interval_seconds: int = Field(
|
|
543
|
+
default=86400, # 24 hours
|
|
544
|
+
description="Interval for cleaning up expired file batches (seconds)",
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
|
|
364
548
|
class VectorStoresConfig(BaseModel):
|
|
365
549
|
"""Configuration for vector stores in the stack."""
|
|
366
550
|
|
|
@@ -372,6 +556,44 @@ class VectorStoresConfig(BaseModel):
|
|
|
372
556
|
default=None,
|
|
373
557
|
description="Default embedding model configuration for vector stores.",
|
|
374
558
|
)
|
|
559
|
+
rewrite_query_params: RewriteQueryParams | None = Field(
|
|
560
|
+
default=None,
|
|
561
|
+
description="Parameters for query rewriting/expansion. None disables query rewriting.",
|
|
562
|
+
)
|
|
563
|
+
file_search_params: FileSearchParams = Field(
|
|
564
|
+
default_factory=FileSearchParams,
|
|
565
|
+
description="Configuration for file search tool output formatting.",
|
|
566
|
+
)
|
|
567
|
+
context_prompt_params: ContextPromptParams = Field(
|
|
568
|
+
default_factory=ContextPromptParams,
|
|
569
|
+
description="Configuration for LLM prompt content and chunk formatting.",
|
|
570
|
+
)
|
|
571
|
+
annotation_prompt_params: AnnotationPromptParams = Field(
|
|
572
|
+
default_factory=AnnotationPromptParams,
|
|
573
|
+
description="Configuration for source annotation and attribution features.",
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
file_ingestion_params: FileIngestionParams = Field(
|
|
577
|
+
default_factory=FileIngestionParams,
|
|
578
|
+
description="Configuration for file processing during ingestion.",
|
|
579
|
+
)
|
|
580
|
+
chunk_retrieval_params: ChunkRetrievalParams = Field(
|
|
581
|
+
default_factory=ChunkRetrievalParams,
|
|
582
|
+
description="Configuration for chunk retrieval and ranking during search.",
|
|
583
|
+
)
|
|
584
|
+
file_batch_params: FileBatchParams = Field(
|
|
585
|
+
default_factory=FileBatchParams,
|
|
586
|
+
description="Configuration for file batch processing.",
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
class SafetyConfig(BaseModel):
|
|
591
|
+
"""Configuration for default moderations model."""
|
|
592
|
+
|
|
593
|
+
default_shield_id: str | None = Field(
|
|
594
|
+
default=None,
|
|
595
|
+
description="ID of the shield to use for when `model` is not specified in the `moderations` API request.",
|
|
596
|
+
)
|
|
375
597
|
|
|
376
598
|
|
|
377
599
|
class QuotaPeriod(StrEnum):
|
|
@@ -432,6 +654,7 @@ class RegisteredResources(BaseModel):
|
|
|
432
654
|
scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
|
|
433
655
|
benchmarks: list[BenchmarkInput] = Field(default_factory=list)
|
|
434
656
|
tool_groups: list[ToolGroupInput] = Field(default_factory=list)
|
|
657
|
+
connectors: list[ConnectorInput] = Field(default_factory=list)
|
|
435
658
|
|
|
436
659
|
|
|
437
660
|
class ServerConfig(BaseModel):
|
|
@@ -477,7 +700,7 @@ class ServerConfig(BaseModel):
|
|
|
477
700
|
)
|
|
478
701
|
|
|
479
702
|
|
|
480
|
-
class
|
|
703
|
+
class StackConfig(BaseModel):
|
|
481
704
|
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
|
482
705
|
|
|
483
706
|
image_name: str = Field(
|
|
@@ -504,6 +727,7 @@ can be instantiated multiple times (with different configs) if necessary.
|
|
|
504
727
|
""",
|
|
505
728
|
)
|
|
506
729
|
storage: StorageConfig = Field(
|
|
730
|
+
default_factory=StorageConfig,
|
|
507
731
|
description="Catalog of named storage backends and references available to the stack",
|
|
508
732
|
)
|
|
509
733
|
|
|
@@ -514,8 +738,6 @@ can be instantiated multiple times (with different configs) if necessary.
|
|
|
514
738
|
|
|
515
739
|
logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
|
|
516
740
|
|
|
517
|
-
telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry")
|
|
518
|
-
|
|
519
741
|
server: ServerConfig = Field(
|
|
520
742
|
default_factory=ServerConfig,
|
|
521
743
|
description="Configuration for the HTTP(S) server",
|
|
@@ -536,6 +758,11 @@ can be instantiated multiple times (with different configs) if necessary.
|
|
|
536
758
|
description="Configuration for vector stores, including default embedding model",
|
|
537
759
|
)
|
|
538
760
|
|
|
761
|
+
safety: SafetyConfig | None = Field(
|
|
762
|
+
default=None,
|
|
763
|
+
description="Configuration for default moderations model",
|
|
764
|
+
)
|
|
765
|
+
|
|
539
766
|
@field_validator("external_providers_dir")
|
|
540
767
|
@classmethod
|
|
541
768
|
def validate_external_providers_dir(cls, v):
|
|
@@ -546,7 +773,7 @@ can be instantiated multiple times (with different configs) if necessary.
|
|
|
546
773
|
return v
|
|
547
774
|
|
|
548
775
|
@model_validator(mode="after")
|
|
549
|
-
def validate_server_stores(self) -> "
|
|
776
|
+
def validate_server_stores(self) -> "StackConfig":
|
|
550
777
|
backend_map = self.storage.backends
|
|
551
778
|
stores = self.storage.stores
|
|
552
779
|
kv_backends = {
|
|
@@ -586,40 +813,5 @@ can be instantiated multiple times (with different configs) if necessary.
|
|
|
586
813
|
_ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
|
|
587
814
|
_ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
|
|
588
815
|
_ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
|
|
816
|
+
_ensure_backend(stores.prompts, kv_backends, "storage.stores.prompts")
|
|
589
817
|
return self
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
class BuildConfig(BaseModel):
|
|
593
|
-
version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
|
|
594
|
-
|
|
595
|
-
distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
|
|
596
|
-
image_type: str = Field(
|
|
597
|
-
default="venv",
|
|
598
|
-
description="Type of package to build (container | venv)",
|
|
599
|
-
)
|
|
600
|
-
image_name: str | None = Field(
|
|
601
|
-
default=None,
|
|
602
|
-
description="Name of the distribution to build",
|
|
603
|
-
)
|
|
604
|
-
external_providers_dir: Path | None = Field(
|
|
605
|
-
default=None,
|
|
606
|
-
description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. "
|
|
607
|
-
"pip_packages MUST contain the provider package name.",
|
|
608
|
-
)
|
|
609
|
-
additional_pip_packages: list[str] = Field(
|
|
610
|
-
default_factory=list,
|
|
611
|
-
description="Additional pip packages to install in the distribution. These packages will be installed in the distribution environment.",
|
|
612
|
-
)
|
|
613
|
-
external_apis_dir: Path | None = Field(
|
|
614
|
-
default=None,
|
|
615
|
-
description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
|
|
616
|
-
)
|
|
617
|
-
|
|
618
|
-
@field_validator("external_providers_dir")
|
|
619
|
-
@classmethod
|
|
620
|
-
def validate_external_providers_dir(cls, v):
|
|
621
|
-
if v is None:
|
|
622
|
-
return None
|
|
623
|
-
if isinstance(v, str):
|
|
624
|
-
return Path(v)
|
|
625
|
-
return v
|
llama_stack/core/distribution.py
CHANGED
|
@@ -12,10 +12,10 @@ from typing import Any
|
|
|
12
12
|
import yaml
|
|
13
13
|
from pydantic import BaseModel
|
|
14
14
|
|
|
15
|
-
from llama_stack.core.datatypes import
|
|
15
|
+
from llama_stack.core.datatypes import StackConfig
|
|
16
16
|
from llama_stack.core.external import load_external_apis
|
|
17
17
|
from llama_stack.log import get_logger
|
|
18
|
-
from
|
|
18
|
+
from llama_stack_api import (
|
|
19
19
|
Api,
|
|
20
20
|
InlineProviderSpec,
|
|
21
21
|
ProviderSpec,
|
|
@@ -25,7 +25,7 @@ from llama_stack.providers.datatypes import (
|
|
|
25
25
|
logger = get_logger(name=__name__, category="core")
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations, Api.
|
|
28
|
+
INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations, Api.connectors, Api.admin}
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
def stack_apis() -> list[Api]:
|
|
@@ -85,7 +85,9 @@ def _load_inline_provider_spec(spec_data: dict[str, Any], api: Api, provider_nam
|
|
|
85
85
|
return spec
|
|
86
86
|
|
|
87
87
|
|
|
88
|
-
def get_provider_registry(
|
|
88
|
+
def get_provider_registry(
|
|
89
|
+
config: StackConfig | None = None, listing: bool = False
|
|
90
|
+
) -> dict[Api, dict[str, ProviderSpec]]:
|
|
89
91
|
"""Get the provider registry, optionally including external providers.
|
|
90
92
|
|
|
91
93
|
This function loads both built-in providers and external providers from YAML files or from their provided modules.
|
|
@@ -109,13 +111,13 @@ def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
|
|
|
109
111
|
safety/
|
|
110
112
|
llama-guard.yaml
|
|
111
113
|
|
|
112
|
-
This method is overloaded in that it can be called from a variety of places: during
|
|
113
|
-
So when
|
|
114
|
+
This method is overloaded in that it can be called from a variety of places: during list-deps, during run, during stack construction.
|
|
115
|
+
So when listing external providers from a module, there are scenarios where the pip package required to import the module might not be available yet.
|
|
114
116
|
There is special handling for all of the potential cases this method can be called from.
|
|
115
117
|
|
|
116
118
|
Args:
|
|
117
119
|
config: Optional object containing the external providers directory path
|
|
118
|
-
|
|
120
|
+
listing: Optional bool delineating whether or not this is being called from a list-deps process
|
|
119
121
|
|
|
120
122
|
Returns:
|
|
121
123
|
A dictionary mapping APIs to their available providers
|
|
@@ -161,7 +163,7 @@ def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
|
|
|
161
163
|
registry = get_external_providers_from_module(
|
|
162
164
|
registry=registry,
|
|
163
165
|
config=config,
|
|
164
|
-
|
|
166
|
+
listing=listing,
|
|
165
167
|
)
|
|
166
168
|
|
|
167
169
|
return registry
|
|
@@ -220,13 +222,10 @@ def get_external_providers_from_dir(
|
|
|
220
222
|
|
|
221
223
|
|
|
222
224
|
def get_external_providers_from_module(
|
|
223
|
-
registry: dict[Api, dict[str, ProviderSpec]], config,
|
|
225
|
+
registry: dict[Api, dict[str, ProviderSpec]], config, listing: bool
|
|
224
226
|
) -> dict[Api, dict[str, ProviderSpec]]:
|
|
225
227
|
provider_list = None
|
|
226
|
-
|
|
227
|
-
provider_list = config.distribution_spec.providers.items()
|
|
228
|
-
else:
|
|
229
|
-
provider_list = config.providers.items()
|
|
228
|
+
provider_list = config.providers.items()
|
|
230
229
|
if provider_list is None:
|
|
231
230
|
logger.warning("Could not get list of providers from config")
|
|
232
231
|
return registry
|
|
@@ -236,14 +235,14 @@ def get_external_providers_from_module(
|
|
|
236
235
|
continue
|
|
237
236
|
# get provider using module
|
|
238
237
|
try:
|
|
239
|
-
if not
|
|
238
|
+
if not listing:
|
|
240
239
|
package_name = provider.module.split("==")[0]
|
|
241
240
|
module = importlib.import_module(f"{package_name}.provider")
|
|
242
241
|
# if config class is wrong you will get an error saying module could not be imported
|
|
243
242
|
spec = module.get_provider_spec()
|
|
244
243
|
else:
|
|
245
|
-
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon
|
|
246
|
-
# in the case we are
|
|
244
|
+
# pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon list-deps and run
|
|
245
|
+
# in the case we are listing we CANNOT import this module of course because it has not been installed.
|
|
247
246
|
spec = ProviderSpec(
|
|
248
247
|
api=Api(provider_api),
|
|
249
248
|
provider_type=provider.provider_type,
|
llama_stack/core/external.py
CHANGED
|
@@ -7,14 +7,14 @@
|
|
|
7
7
|
|
|
8
8
|
import yaml
|
|
9
9
|
|
|
10
|
-
from llama_stack.
|
|
11
|
-
from llama_stack.core.datatypes import BuildConfig, StackRunConfig
|
|
10
|
+
from llama_stack.core.datatypes import StackConfig
|
|
12
11
|
from llama_stack.log import get_logger
|
|
12
|
+
from llama_stack_api import Api, ExternalApiSpec
|
|
13
13
|
|
|
14
14
|
logger = get_logger(name=__name__, category="core")
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def load_external_apis(config:
|
|
17
|
+
def load_external_apis(config: StackConfig | None) -> dict[Api, ExternalApiSpec]:
|
|
18
18
|
"""Load external API specifications from the configured directory.
|
|
19
19
|
|
|
20
20
|
Args:
|