llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +12 -21
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.4.dist-info/RECORD +0 -625
- llama_stack-0.3.4.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -15,7 +15,7 @@ class User(Protocol):
|
|
|
15
15
|
class ProtectedResource(Protocol):
|
|
16
16
|
type: str
|
|
17
17
|
identifier: str
|
|
18
|
-
owner: User
|
|
18
|
+
owner: User | None
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class Condition(Protocol):
|
|
@@ -38,13 +38,13 @@ class UserInOwnersList:
|
|
|
38
38
|
return None
|
|
39
39
|
|
|
40
40
|
def matches(self, resource: ProtectedResource, user: User) -> bool:
|
|
41
|
-
|
|
42
|
-
if not
|
|
43
|
-
return
|
|
41
|
+
defined = self.owners_values(resource)
|
|
42
|
+
if not defined:
|
|
43
|
+
return False
|
|
44
44
|
if not user.attributes or self.name not in user.attributes or not user.attributes[self.name]:
|
|
45
45
|
return False
|
|
46
46
|
user_values = user.attributes[self.name]
|
|
47
|
-
for value in
|
|
47
|
+
for value in defined:
|
|
48
48
|
if value in user_values:
|
|
49
49
|
return True
|
|
50
50
|
return False
|
|
@@ -106,6 +106,14 @@ class UserIsNotOwner:
|
|
|
106
106
|
return "user is not owner"
|
|
107
107
|
|
|
108
108
|
|
|
109
|
+
class ResourceIsUnowned:
|
|
110
|
+
def matches(self, resource: ProtectedResource, user: User) -> bool:
|
|
111
|
+
return not resource.owner
|
|
112
|
+
|
|
113
|
+
def __repr__(self):
|
|
114
|
+
return "resource is unowned"
|
|
115
|
+
|
|
116
|
+
|
|
109
117
|
def parse_condition(condition: str) -> Condition:
|
|
110
118
|
words = condition.split()
|
|
111
119
|
match words:
|
|
@@ -121,6 +129,8 @@ def parse_condition(condition: str) -> Condition:
|
|
|
121
129
|
return UserInOwnersList(name)
|
|
122
130
|
case ["user", "not", "in", "owners", name]:
|
|
123
131
|
return UserNotInOwnersList(name)
|
|
132
|
+
case ["resource", "is", "unowned"]:
|
|
133
|
+
return ResourceIsUnowned()
|
|
124
134
|
case _:
|
|
125
135
|
raise ValueError(f"Invalid condition: {condition}")
|
|
126
136
|
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
from importlib.metadata import version
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
|
|
13
|
+
from llama_stack.core.datatypes import StackConfig
|
|
14
|
+
from llama_stack.core.external import load_external_apis
|
|
15
|
+
from llama_stack.core.server.fastapi_router_registry import (
|
|
16
|
+
_ROUTER_FACTORIES,
|
|
17
|
+
build_fastapi_router,
|
|
18
|
+
get_router_routes,
|
|
19
|
+
)
|
|
20
|
+
from llama_stack.core.server.routes import get_all_api_routes
|
|
21
|
+
from llama_stack.core.utils.config import redact_sensitive_fields
|
|
22
|
+
from llama_stack.log import get_logger
|
|
23
|
+
from llama_stack_api import (
|
|
24
|
+
Admin,
|
|
25
|
+
Api,
|
|
26
|
+
HealthInfo,
|
|
27
|
+
HealthResponse,
|
|
28
|
+
HealthStatus,
|
|
29
|
+
InspectProviderRequest,
|
|
30
|
+
ListProvidersResponse,
|
|
31
|
+
ListRoutesRequest,
|
|
32
|
+
ListRoutesResponse,
|
|
33
|
+
ProviderInfo,
|
|
34
|
+
RouteInfo,
|
|
35
|
+
VersionInfo,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
logger = get_logger(name=__name__, category="core")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class AdminImplConfig(BaseModel):
|
|
42
|
+
config: StackConfig
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def get_provider_impl(config, deps):
|
|
46
|
+
impl = AdminImpl(config, deps)
|
|
47
|
+
await impl.initialize()
|
|
48
|
+
return impl
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class AdminImpl(Admin):
|
|
52
|
+
def __init__(self, config: AdminImplConfig, deps):
|
|
53
|
+
self.config = config
|
|
54
|
+
self.deps = deps
|
|
55
|
+
|
|
56
|
+
async def initialize(self) -> None:
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
async def shutdown(self) -> None:
|
|
60
|
+
logger.debug("AdminImpl.shutdown")
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
# Provider management methods
|
|
64
|
+
async def list_providers(self) -> ListProvidersResponse:
|
|
65
|
+
config = self.config.config
|
|
66
|
+
safe_config = StackConfig(**redact_sensitive_fields(config.model_dump()))
|
|
67
|
+
providers_health = await self.get_providers_health()
|
|
68
|
+
ret = []
|
|
69
|
+
for api, providers in safe_config.providers.items():
|
|
70
|
+
for p in providers:
|
|
71
|
+
# Skip providers that are not enabled
|
|
72
|
+
if p.provider_id is None:
|
|
73
|
+
continue
|
|
74
|
+
ret.append(
|
|
75
|
+
ProviderInfo(
|
|
76
|
+
api=api,
|
|
77
|
+
provider_id=p.provider_id,
|
|
78
|
+
provider_type=p.provider_type,
|
|
79
|
+
config=p.config,
|
|
80
|
+
health=providers_health.get(api, {}).get(
|
|
81
|
+
p.provider_id,
|
|
82
|
+
HealthResponse(
|
|
83
|
+
status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check"
|
|
84
|
+
),
|
|
85
|
+
),
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return ListProvidersResponse(data=ret)
|
|
90
|
+
|
|
91
|
+
async def inspect_provider(self, request: InspectProviderRequest) -> ProviderInfo:
|
|
92
|
+
all_providers = await self.list_providers()
|
|
93
|
+
for p in all_providers.data:
|
|
94
|
+
if p.provider_id == request.provider_id:
|
|
95
|
+
return p
|
|
96
|
+
|
|
97
|
+
raise ValueError(f"Provider {request.provider_id} not found")
|
|
98
|
+
|
|
99
|
+
async def get_providers_health(self) -> dict[str, dict[str, HealthResponse]]:
|
|
100
|
+
"""Get health status for all providers.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Dict[str, Dict[str, HealthResponse]]: A dictionary mapping API names to provider health statuses.
|
|
104
|
+
Each API maps to a dictionary of provider IDs to their health responses.
|
|
105
|
+
"""
|
|
106
|
+
providers_health: dict[str, dict[str, HealthResponse]] = {}
|
|
107
|
+
|
|
108
|
+
# The timeout has to be long enough to allow all the providers to be checked, especially in
|
|
109
|
+
# the case of the inference router health check since it checks all registered inference
|
|
110
|
+
# providers.
|
|
111
|
+
# The timeout must not be equal to the one set by health method for a given implementation,
|
|
112
|
+
# otherwise we will miss some providers.
|
|
113
|
+
timeout = 3.0
|
|
114
|
+
|
|
115
|
+
async def check_provider_health(impl: Any) -> tuple[str, HealthResponse] | None:
|
|
116
|
+
# Skip special implementations (inspect/providers/admin) that don't have provider specs
|
|
117
|
+
if not hasattr(impl, "__provider_spec__"):
|
|
118
|
+
return None
|
|
119
|
+
api_name = impl.__provider_spec__.api.name
|
|
120
|
+
if not hasattr(impl, "health"):
|
|
121
|
+
return (
|
|
122
|
+
api_name,
|
|
123
|
+
HealthResponse(
|
|
124
|
+
status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check"
|
|
125
|
+
),
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
health = await asyncio.wait_for(impl.health(), timeout=timeout)
|
|
130
|
+
return api_name, health
|
|
131
|
+
except TimeoutError:
|
|
132
|
+
return (
|
|
133
|
+
api_name,
|
|
134
|
+
HealthResponse(
|
|
135
|
+
status=HealthStatus.ERROR, message=f"Health check timed out after {timeout} seconds"
|
|
136
|
+
),
|
|
137
|
+
)
|
|
138
|
+
except Exception as e:
|
|
139
|
+
return (
|
|
140
|
+
api_name,
|
|
141
|
+
HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}"),
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Create tasks for all providers
|
|
145
|
+
tasks = [check_provider_health(impl) for impl in self.deps.values()]
|
|
146
|
+
|
|
147
|
+
# Wait for all health checks to complete
|
|
148
|
+
results = await asyncio.gather(*tasks)
|
|
149
|
+
|
|
150
|
+
# Organize results by API and provider ID
|
|
151
|
+
for result in results:
|
|
152
|
+
if result is None: # Skip special implementations
|
|
153
|
+
continue
|
|
154
|
+
api_name, health_response = result
|
|
155
|
+
providers_health[api_name] = health_response
|
|
156
|
+
|
|
157
|
+
return providers_health
|
|
158
|
+
|
|
159
|
+
# Inspect methods
|
|
160
|
+
async def list_routes(self, request: ListRoutesRequest) -> ListRoutesResponse:
|
|
161
|
+
config: StackConfig = self.config.config
|
|
162
|
+
api_filter = request.api_filter
|
|
163
|
+
|
|
164
|
+
# Helper function to determine if a route should be included based on api_filter
|
|
165
|
+
# TODO: remove this once we've migrated all APIs to FastAPI routers
|
|
166
|
+
def should_include_route(webmethod) -> bool:
|
|
167
|
+
if api_filter is None:
|
|
168
|
+
# Default: only non-deprecated APIs
|
|
169
|
+
return not webmethod.deprecated
|
|
170
|
+
elif api_filter == "deprecated":
|
|
171
|
+
# Special filter: show deprecated routes regardless of their actual level
|
|
172
|
+
return bool(webmethod.deprecated)
|
|
173
|
+
else:
|
|
174
|
+
# Filter by API level (non-deprecated routes only)
|
|
175
|
+
return not webmethod.deprecated and webmethod.level == api_filter
|
|
176
|
+
|
|
177
|
+
# Helper function to get provider types for an API
|
|
178
|
+
def _get_provider_types(api: Api) -> list[str]:
|
|
179
|
+
if api.value in ["providers", "inspect", "admin"]:
|
|
180
|
+
return [] # These APIs don't have "real" providers - they're internal to the stack
|
|
181
|
+
providers = config.providers.get(api.value, [])
|
|
182
|
+
return [p.provider_type for p in providers] if providers else []
|
|
183
|
+
|
|
184
|
+
# Helper function to determine if a router route should be included based on api_filter
|
|
185
|
+
def _should_include_router_route(route, router_prefix: str | None) -> bool:
|
|
186
|
+
"""Check if a router-based route should be included based on api_filter."""
|
|
187
|
+
# Check deprecated status
|
|
188
|
+
route_deprecated = getattr(route, "deprecated", False) or False
|
|
189
|
+
|
|
190
|
+
if api_filter is None:
|
|
191
|
+
# Default: only non-deprecated routes
|
|
192
|
+
return not route_deprecated
|
|
193
|
+
elif api_filter == "deprecated":
|
|
194
|
+
# Special filter: show deprecated routes regardless of their actual level
|
|
195
|
+
return route_deprecated
|
|
196
|
+
else:
|
|
197
|
+
# Filter by API level (non-deprecated routes only)
|
|
198
|
+
# Extract level from router prefix (e.g., "/v1" -> "v1")
|
|
199
|
+
if router_prefix:
|
|
200
|
+
prefix_level = router_prefix.lstrip("/")
|
|
201
|
+
return not route_deprecated and prefix_level == api_filter
|
|
202
|
+
return not route_deprecated
|
|
203
|
+
|
|
204
|
+
ret = []
|
|
205
|
+
external_apis = load_external_apis(config)
|
|
206
|
+
all_endpoints = get_all_api_routes(external_apis)
|
|
207
|
+
|
|
208
|
+
# Process routes from APIs with FastAPI routers
|
|
209
|
+
for api_name in _ROUTER_FACTORIES.keys():
|
|
210
|
+
api = Api(api_name)
|
|
211
|
+
router = build_fastapi_router(api, None) # we don't need the impl here, just the routes
|
|
212
|
+
if router:
|
|
213
|
+
router_routes = get_router_routes(router)
|
|
214
|
+
for route in router_routes:
|
|
215
|
+
if _should_include_router_route(route, router.prefix):
|
|
216
|
+
if route.methods is not None:
|
|
217
|
+
available_methods = [m for m in route.methods if m != "HEAD"]
|
|
218
|
+
if available_methods:
|
|
219
|
+
ret.append(
|
|
220
|
+
RouteInfo(
|
|
221
|
+
route=route.path,
|
|
222
|
+
method=available_methods[0],
|
|
223
|
+
provider_types=_get_provider_types(api),
|
|
224
|
+
)
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Process routes from legacy webmethod-based APIs
|
|
228
|
+
for api, endpoints in all_endpoints.items():
|
|
229
|
+
# Skip APIs that have routers (already processed above)
|
|
230
|
+
if api.value in _ROUTER_FACTORIES:
|
|
231
|
+
continue
|
|
232
|
+
|
|
233
|
+
# Always include provider, inspect, and admin APIs, filter others based on run config
|
|
234
|
+
if api.value in ["providers", "inspect", "admin"]:
|
|
235
|
+
ret.extend(
|
|
236
|
+
[
|
|
237
|
+
RouteInfo(
|
|
238
|
+
route=e.path,
|
|
239
|
+
method=next(iter([m for m in e.methods if m != "HEAD"])),
|
|
240
|
+
provider_types=[], # These APIs don't have "real" providers - they're internal to the stack
|
|
241
|
+
)
|
|
242
|
+
for e, webmethod in endpoints
|
|
243
|
+
if e.methods is not None and should_include_route(webmethod)
|
|
244
|
+
]
|
|
245
|
+
)
|
|
246
|
+
else:
|
|
247
|
+
providers = config.providers.get(api.value, [])
|
|
248
|
+
if providers: # Only process if there are providers for this API
|
|
249
|
+
ret.extend(
|
|
250
|
+
[
|
|
251
|
+
RouteInfo(
|
|
252
|
+
route=e.path,
|
|
253
|
+
method=next(iter([m for m in e.methods if m != "HEAD"])),
|
|
254
|
+
provider_types=[p.provider_type for p in providers],
|
|
255
|
+
)
|
|
256
|
+
for e, webmethod in endpoints
|
|
257
|
+
if e.methods is not None and should_include_route(webmethod)
|
|
258
|
+
]
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
return ListRoutesResponse(data=ret)
|
|
262
|
+
|
|
263
|
+
async def health(self) -> HealthInfo:
|
|
264
|
+
return HealthInfo(status=HealthStatus.OK)
|
|
265
|
+
|
|
266
|
+
async def version(self) -> VersionInfo:
|
|
267
|
+
return VersionInfo(version=version("llama-stack"))
|
llama_stack/core/build.py
CHANGED
|
@@ -4,20 +4,16 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
import importlib.resources
|
|
8
7
|
import sys
|
|
9
8
|
|
|
10
9
|
from pydantic import BaseModel
|
|
11
10
|
from termcolor import cprint
|
|
12
11
|
|
|
13
|
-
from llama_stack.core.datatypes import
|
|
12
|
+
from llama_stack.core.datatypes import StackConfig
|
|
14
13
|
from llama_stack.core.distribution import get_provider_registry
|
|
15
|
-
from llama_stack.core.external import load_external_apis
|
|
16
|
-
from llama_stack.core.utils.exec import run_command
|
|
17
|
-
from llama_stack.core.utils.image_types import LlamaStackImageType
|
|
18
14
|
from llama_stack.distributions.template import DistributionTemplate
|
|
19
15
|
from llama_stack.log import get_logger
|
|
20
|
-
from
|
|
16
|
+
from llama_stack_api import Api
|
|
21
17
|
|
|
22
18
|
log = get_logger(name=__name__, category="core")
|
|
23
19
|
|
|
@@ -40,18 +36,17 @@ class ApiInput(BaseModel):
|
|
|
40
36
|
|
|
41
37
|
|
|
42
38
|
def get_provider_dependencies(
|
|
43
|
-
config:
|
|
39
|
+
config: StackConfig,
|
|
44
40
|
) -> tuple[list[str], list[str], list[str]]:
|
|
45
41
|
"""Get normal and special dependencies from provider configuration."""
|
|
46
42
|
if isinstance(config, DistributionTemplate):
|
|
47
43
|
config = config.build_config()
|
|
48
44
|
|
|
49
|
-
providers = config.
|
|
50
|
-
additional_pip_packages = config.additional_pip_packages
|
|
45
|
+
providers = config.providers
|
|
51
46
|
|
|
52
47
|
deps = []
|
|
53
48
|
external_provider_deps = []
|
|
54
|
-
registry = get_provider_registry(config)
|
|
49
|
+
registry = get_provider_registry(config=config, listing=True)
|
|
55
50
|
for api_str, provider_or_providers in providers.items():
|
|
56
51
|
providers_for_api = registry[Api(api_str)]
|
|
57
52
|
|
|
@@ -85,12 +80,10 @@ def get_provider_dependencies(
|
|
|
85
80
|
else:
|
|
86
81
|
normal_deps.append(package)
|
|
87
82
|
|
|
88
|
-
normal_deps.extend(additional_pip_packages or [])
|
|
89
|
-
|
|
90
83
|
return list(set(normal_deps)), list(set(special_deps)), list(set(external_provider_deps))
|
|
91
84
|
|
|
92
85
|
|
|
93
|
-
def print_pip_install_help(config:
|
|
86
|
+
def print_pip_install_help(config: StackConfig):
|
|
94
87
|
normal_deps, special_deps, _ = get_provider_dependencies(config)
|
|
95
88
|
|
|
96
89
|
cprint(
|
|
@@ -101,64 +94,3 @@ def print_pip_install_help(config: BuildConfig):
|
|
|
101
94
|
for special_dep in special_deps:
|
|
102
95
|
cprint(f"uv pip install {special_dep}", color="yellow", file=sys.stderr)
|
|
103
96
|
print()
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def build_image(
|
|
107
|
-
build_config: BuildConfig,
|
|
108
|
-
image_name: str,
|
|
109
|
-
distro_or_config: str,
|
|
110
|
-
run_config: str | None = None,
|
|
111
|
-
):
|
|
112
|
-
container_base = build_config.distribution_spec.container_image or "python:3.12-slim"
|
|
113
|
-
|
|
114
|
-
normal_deps, special_deps, external_provider_deps = get_provider_dependencies(build_config)
|
|
115
|
-
normal_deps += SERVER_DEPENDENCIES
|
|
116
|
-
if build_config.external_apis_dir:
|
|
117
|
-
external_apis = load_external_apis(build_config)
|
|
118
|
-
if external_apis:
|
|
119
|
-
for _, api_spec in external_apis.items():
|
|
120
|
-
normal_deps.extend(api_spec.pip_packages)
|
|
121
|
-
|
|
122
|
-
if build_config.image_type == LlamaStackImageType.CONTAINER.value:
|
|
123
|
-
script = str(importlib.resources.files("llama_stack") / "core/build_container.sh")
|
|
124
|
-
args = [
|
|
125
|
-
script,
|
|
126
|
-
"--distro-or-config",
|
|
127
|
-
distro_or_config,
|
|
128
|
-
"--image-name",
|
|
129
|
-
image_name,
|
|
130
|
-
"--container-base",
|
|
131
|
-
container_base,
|
|
132
|
-
"--normal-deps",
|
|
133
|
-
" ".join(normal_deps),
|
|
134
|
-
]
|
|
135
|
-
# When building from a config file (not a template), include the run config path in the
|
|
136
|
-
# build arguments
|
|
137
|
-
if run_config is not None:
|
|
138
|
-
args.extend(["--run-config", run_config])
|
|
139
|
-
else:
|
|
140
|
-
script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh")
|
|
141
|
-
args = [
|
|
142
|
-
script,
|
|
143
|
-
"--env-name",
|
|
144
|
-
str(image_name),
|
|
145
|
-
"--normal-deps",
|
|
146
|
-
" ".join(normal_deps),
|
|
147
|
-
]
|
|
148
|
-
|
|
149
|
-
# Always pass both arguments, even if empty, to maintain consistent positional arguments
|
|
150
|
-
if special_deps:
|
|
151
|
-
args.extend(["--optional-deps", "#".join(special_deps)])
|
|
152
|
-
if external_provider_deps:
|
|
153
|
-
args.extend(
|
|
154
|
-
["--external-provider-deps", "#".join(external_provider_deps)]
|
|
155
|
-
) # the script will install external provider module, get its deps, and install those too.
|
|
156
|
-
|
|
157
|
-
return_code = run_command(args)
|
|
158
|
-
|
|
159
|
-
if return_code != 0:
|
|
160
|
-
log.error(
|
|
161
|
-
f"Failed to build target {image_name} with return code {return_code}",
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
return return_code
|
llama_stack/core/client.py
CHANGED
llama_stack/core/configure.py
CHANGED
|
@@ -10,7 +10,7 @@ from llama_stack.core.datatypes import (
|
|
|
10
10
|
LLAMA_STACK_RUN_CONFIG_VERSION,
|
|
11
11
|
DistributionSpec,
|
|
12
12
|
Provider,
|
|
13
|
-
|
|
13
|
+
StackConfig,
|
|
14
14
|
)
|
|
15
15
|
from llama_stack.core.distribution import (
|
|
16
16
|
builtin_automatically_routed_apis,
|
|
@@ -20,7 +20,7 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
|
|
|
20
20
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
|
21
21
|
from llama_stack.core.utils.prompt_for_config import prompt_for_config
|
|
22
22
|
from llama_stack.log import get_logger
|
|
23
|
-
from
|
|
23
|
+
from llama_stack_api import Api, ProviderSpec
|
|
24
24
|
|
|
25
25
|
logger = get_logger(name=__name__, category="core")
|
|
26
26
|
|
|
@@ -44,7 +44,7 @@ def configure_single_provider(registry: dict[str, ProviderSpec], provider: Provi
|
|
|
44
44
|
)
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
def configure_api_providers(config:
|
|
47
|
+
def configure_api_providers(config: StackConfig, build_spec: DistributionSpec) -> StackConfig:
|
|
48
48
|
is_nux = len(config.providers) == 0
|
|
49
49
|
|
|
50
50
|
if is_nux:
|
|
@@ -63,7 +63,7 @@ def configure_api_providers(config: StackRunConfig, build_spec: DistributionSpec
|
|
|
63
63
|
if config.apis:
|
|
64
64
|
apis_to_serve = config.apis
|
|
65
65
|
else:
|
|
66
|
-
apis_to_serve = [a.value for a in Api if a not in (Api.inspect, Api.providers)]
|
|
66
|
+
apis_to_serve = [a.value for a in Api if a not in (Api.inspect, Api.providers, Api.admin)]
|
|
67
67
|
|
|
68
68
|
for api_str in apis_to_serve:
|
|
69
69
|
api = Api(api_str)
|
|
@@ -192,7 +192,7 @@ def upgrade_from_routing_table(
|
|
|
192
192
|
return config_dict
|
|
193
193
|
|
|
194
194
|
|
|
195
|
-
def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) ->
|
|
195
|
+
def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackConfig:
|
|
196
196
|
if "routing_table" in config_dict:
|
|
197
197
|
logger.info("Upgrading config...")
|
|
198
198
|
config_dict = upgrade_from_routing_table(config_dict)
|
|
@@ -200,4 +200,4 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi
|
|
|
200
200
|
config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION
|
|
201
201
|
|
|
202
202
|
processed_config_dict = replace_env_vars(config_dict)
|
|
203
|
-
return
|
|
203
|
+
return StackConfig(**cast_image_name_to_string(processed_config_dict))
|
|
@@ -6,25 +6,25 @@
|
|
|
6
6
|
|
|
7
7
|
import secrets
|
|
8
8
|
import time
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any, Literal
|
|
10
10
|
|
|
11
|
-
from openai import NOT_GIVEN
|
|
12
11
|
from pydantic import BaseModel, TypeAdapter
|
|
13
12
|
|
|
14
|
-
from llama_stack.
|
|
13
|
+
from llama_stack.core.datatypes import AccessRule, StackConfig
|
|
14
|
+
from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
15
|
+
from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
|
|
16
|
+
from llama_stack.log import get_logger
|
|
17
|
+
from llama_stack_api import (
|
|
15
18
|
Conversation,
|
|
16
19
|
ConversationDeletedResource,
|
|
17
20
|
ConversationItem,
|
|
18
21
|
ConversationItemDeletedResource,
|
|
22
|
+
ConversationItemInclude,
|
|
19
23
|
ConversationItemList,
|
|
20
24
|
Conversations,
|
|
21
25
|
Metadata,
|
|
22
26
|
)
|
|
23
|
-
from
|
|
24
|
-
from llama_stack.log import get_logger
|
|
25
|
-
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
|
26
|
-
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
27
|
-
from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
|
|
27
|
+
from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
|
|
28
28
|
|
|
29
29
|
logger = get_logger(name=__name__, category="openai_conversations")
|
|
30
30
|
|
|
@@ -36,7 +36,7 @@ class ConversationServiceConfig(BaseModel):
|
|
|
36
36
|
:param policy: Access control rules
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
|
-
|
|
39
|
+
config: StackConfig
|
|
40
40
|
policy: list[AccessRule] = []
|
|
41
41
|
|
|
42
42
|
|
|
@@ -56,7 +56,7 @@ class ConversationServiceImpl(Conversations):
|
|
|
56
56
|
self.policy = config.policy
|
|
57
57
|
|
|
58
58
|
# Use conversations store reference from run config
|
|
59
|
-
conversations_ref = config.
|
|
59
|
+
conversations_ref = config.config.storage.stores.conversations
|
|
60
60
|
if not conversations_ref:
|
|
61
61
|
raise ValueError("storage.stores.conversations must be configured in run config")
|
|
62
62
|
|
|
@@ -203,16 +203,11 @@ class ConversationServiceImpl(Conversations):
|
|
|
203
203
|
"item_data": item_dict,
|
|
204
204
|
}
|
|
205
205
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
await self.sql_store.update(
|
|
212
|
-
table="conversation_items",
|
|
213
|
-
data={"created_at": created_at, "item_data": item_dict},
|
|
214
|
-
where={"id": item_id},
|
|
215
|
-
)
|
|
206
|
+
await self.sql_store.upsert(
|
|
207
|
+
table="conversation_items",
|
|
208
|
+
data=item_record,
|
|
209
|
+
conflict_columns=["id"],
|
|
210
|
+
)
|
|
216
211
|
|
|
217
212
|
created_items.append(item_dict)
|
|
218
213
|
|
|
@@ -247,7 +242,14 @@ class ConversationServiceImpl(Conversations):
|
|
|
247
242
|
adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem)
|
|
248
243
|
return adapter.validate_python(record["item_data"])
|
|
249
244
|
|
|
250
|
-
async def
|
|
245
|
+
async def list_items(
|
|
246
|
+
self,
|
|
247
|
+
conversation_id: str,
|
|
248
|
+
after: str | None = None,
|
|
249
|
+
include: list[ConversationItemInclude] | None = None,
|
|
250
|
+
limit: int | None = None,
|
|
251
|
+
order: Literal["asc", "desc"] | None = None,
|
|
252
|
+
) -> ConversationItemList:
|
|
251
253
|
"""List items in the conversation."""
|
|
252
254
|
if not conversation_id:
|
|
253
255
|
raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
|
|
@@ -258,14 +260,12 @@ class ConversationServiceImpl(Conversations):
|
|
|
258
260
|
result = await self.sql_store.fetch_all(table="conversation_items", where={"conversation_id": conversation_id})
|
|
259
261
|
records = result.data
|
|
260
262
|
|
|
261
|
-
if order
|
|
263
|
+
if order is not None and order == "asc":
|
|
262
264
|
records.sort(key=lambda x: x["created_at"])
|
|
263
265
|
else:
|
|
264
266
|
records.sort(key=lambda x: x["created_at"], reverse=True)
|
|
265
267
|
|
|
266
|
-
actual_limit = 20
|
|
267
|
-
if limit != NOT_GIVEN and isinstance(limit, int):
|
|
268
|
-
actual_limit = limit
|
|
268
|
+
actual_limit = limit or 20
|
|
269
269
|
|
|
270
270
|
records = records[:actual_limit]
|
|
271
271
|
items = [record["item_data"] for record in records]
|
|
@@ -307,3 +307,6 @@ class ConversationServiceImpl(Conversations):
|
|
|
307
307
|
|
|
308
308
|
logger.debug(f"Deleted item {item_id} from conversation {conversation_id}")
|
|
309
309
|
return ConversationItemDeletedResource(id=item_id)
|
|
310
|
+
|
|
311
|
+
async def shutdown(self) -> None:
|
|
312
|
+
pass
|