llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +12 -21
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.4.dist-info/RECORD +0 -625
- llama_stack-0.3.4.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -4,53 +4,35 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
import uuid
|
|
8
|
-
from collections.abc import AsyncGenerator
|
|
9
|
-
from datetime import UTC, datetime
|
|
10
7
|
|
|
11
|
-
from llama_stack.
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
8
|
+
from llama_stack.core.datatypes import AccessRule
|
|
9
|
+
from llama_stack.core.storage.kvstore import InmemoryKVStoreImpl, kvstore_impl
|
|
10
|
+
from llama_stack.log import get_logger
|
|
11
|
+
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
|
12
|
+
from llama_stack_api import (
|
|
15
13
|
Agents,
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
AgentTurnCreateRequest,
|
|
20
|
-
AgentTurnResumeRequest,
|
|
21
|
-
Document,
|
|
14
|
+
Conversations,
|
|
15
|
+
Files,
|
|
16
|
+
Inference,
|
|
22
17
|
ListOpenAIResponseInputItem,
|
|
23
18
|
ListOpenAIResponseObject,
|
|
19
|
+
OpenAIDeleteResponseObject,
|
|
24
20
|
OpenAIResponseInput,
|
|
25
21
|
OpenAIResponseInputTool,
|
|
22
|
+
OpenAIResponseInputToolChoice,
|
|
26
23
|
OpenAIResponseObject,
|
|
24
|
+
OpenAIResponsePrompt,
|
|
25
|
+
OpenAIResponseText,
|
|
27
26
|
Order,
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
Prompts,
|
|
28
|
+
ResponseGuardrail,
|
|
29
|
+
Safety,
|
|
30
|
+
ToolGroups,
|
|
31
|
+
ToolRuntime,
|
|
32
|
+
VectorIO,
|
|
30
33
|
)
|
|
31
|
-
from llama_stack.apis.agents.agents import ResponseGuardrail
|
|
32
|
-
from llama_stack.apis.agents.openai_responses import OpenAIResponseText
|
|
33
|
-
from llama_stack.apis.common.responses import PaginatedResponse
|
|
34
|
-
from llama_stack.apis.conversations import Conversations
|
|
35
|
-
from llama_stack.apis.inference import (
|
|
36
|
-
Inference,
|
|
37
|
-
ToolConfig,
|
|
38
|
-
ToolResponse,
|
|
39
|
-
ToolResponseMessage,
|
|
40
|
-
UserMessage,
|
|
41
|
-
)
|
|
42
|
-
from llama_stack.apis.safety import Safety
|
|
43
|
-
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
|
44
|
-
from llama_stack.apis.vector_io import VectorIO
|
|
45
|
-
from llama_stack.core.datatypes import AccessRule
|
|
46
|
-
from llama_stack.log import get_logger
|
|
47
|
-
from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
|
|
48
|
-
from llama_stack.providers.utils.pagination import paginate_records
|
|
49
|
-
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
|
50
34
|
|
|
51
|
-
from .agent_instance import ChatAgent
|
|
52
35
|
from .config import MetaReferenceAgentsImplConfig
|
|
53
|
-
from .persistence import AgentInfo
|
|
54
36
|
from .responses.openai_responses import OpenAIResponsesImpl
|
|
55
37
|
|
|
56
38
|
logger = get_logger(name=__name__, category="agents::meta_reference")
|
|
@@ -62,12 +44,13 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
62
44
|
config: MetaReferenceAgentsImplConfig,
|
|
63
45
|
inference_api: Inference,
|
|
64
46
|
vector_io_api: VectorIO,
|
|
65
|
-
safety_api: Safety,
|
|
47
|
+
safety_api: Safety | None,
|
|
66
48
|
tool_runtime_api: ToolRuntime,
|
|
67
49
|
tool_groups_api: ToolGroups,
|
|
68
50
|
conversations_api: Conversations,
|
|
51
|
+
prompts_api: Prompts,
|
|
52
|
+
files_api: Files,
|
|
69
53
|
policy: list[AccessRule],
|
|
70
|
-
telemetry_enabled: bool = False,
|
|
71
54
|
):
|
|
72
55
|
self.config = config
|
|
73
56
|
self.inference_api = inference_api
|
|
@@ -76,8 +59,8 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
76
59
|
self.tool_runtime_api = tool_runtime_api
|
|
77
60
|
self.tool_groups_api = tool_groups_api
|
|
78
61
|
self.conversations_api = conversations_api
|
|
79
|
-
self.
|
|
80
|
-
|
|
62
|
+
self.prompts_api = prompts_api
|
|
63
|
+
self.files_api = files_api
|
|
81
64
|
self.in_memory_store = InmemoryKVStoreImpl()
|
|
82
65
|
self.openai_responses_impl: OpenAIResponsesImpl | None = None
|
|
83
66
|
self.policy = policy
|
|
@@ -94,227 +77,11 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
94
77
|
vector_io_api=self.vector_io_api,
|
|
95
78
|
safety_api=self.safety_api,
|
|
96
79
|
conversations_api=self.conversations_api,
|
|
80
|
+
prompts_api=self.prompts_api,
|
|
81
|
+
files_api=self.files_api,
|
|
82
|
+
vector_stores_config=self.config.vector_stores_config,
|
|
97
83
|
)
|
|
98
84
|
|
|
99
|
-
async def create_agent(
|
|
100
|
-
self,
|
|
101
|
-
agent_config: AgentConfig,
|
|
102
|
-
) -> AgentCreateResponse:
|
|
103
|
-
agent_id = str(uuid.uuid4())
|
|
104
|
-
created_at = datetime.now(UTC)
|
|
105
|
-
|
|
106
|
-
agent_info = AgentInfo(
|
|
107
|
-
**agent_config.model_dump(),
|
|
108
|
-
created_at=created_at,
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
# Store the agent info
|
|
112
|
-
await self.persistence_store.set(
|
|
113
|
-
key=f"agent:{agent_id}",
|
|
114
|
-
value=agent_info.model_dump_json(),
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
return AgentCreateResponse(
|
|
118
|
-
agent_id=agent_id,
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
async def _get_agent_impl(self, agent_id: str) -> ChatAgent:
|
|
122
|
-
agent_info_json = await self.persistence_store.get(
|
|
123
|
-
key=f"agent:{agent_id}",
|
|
124
|
-
)
|
|
125
|
-
if not agent_info_json:
|
|
126
|
-
raise ValueError(f"Could not find agent info for {agent_id}")
|
|
127
|
-
|
|
128
|
-
try:
|
|
129
|
-
agent_info = AgentInfo.model_validate_json(agent_info_json)
|
|
130
|
-
except Exception as e:
|
|
131
|
-
raise ValueError(f"Could not validate agent info for {agent_id}") from e
|
|
132
|
-
|
|
133
|
-
return ChatAgent(
|
|
134
|
-
agent_id=agent_id,
|
|
135
|
-
agent_config=agent_info,
|
|
136
|
-
inference_api=self.inference_api,
|
|
137
|
-
safety_api=self.safety_api,
|
|
138
|
-
vector_io_api=self.vector_io_api,
|
|
139
|
-
tool_runtime_api=self.tool_runtime_api,
|
|
140
|
-
tool_groups_api=self.tool_groups_api,
|
|
141
|
-
persistence_store=(
|
|
142
|
-
self.persistence_store if agent_info.enable_session_persistence else self.in_memory_store
|
|
143
|
-
),
|
|
144
|
-
created_at=agent_info.created_at,
|
|
145
|
-
policy=self.policy,
|
|
146
|
-
telemetry_enabled=self.telemetry_enabled,
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
async def create_agent_session(
|
|
150
|
-
self,
|
|
151
|
-
agent_id: str,
|
|
152
|
-
session_name: str,
|
|
153
|
-
) -> AgentSessionCreateResponse:
|
|
154
|
-
agent = await self._get_agent_impl(agent_id)
|
|
155
|
-
|
|
156
|
-
session_id = await agent.create_session(session_name)
|
|
157
|
-
return AgentSessionCreateResponse(
|
|
158
|
-
session_id=session_id,
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
async def create_agent_turn(
|
|
162
|
-
self,
|
|
163
|
-
agent_id: str,
|
|
164
|
-
session_id: str,
|
|
165
|
-
messages: list[UserMessage | ToolResponseMessage],
|
|
166
|
-
toolgroups: list[AgentToolGroup] | None = None,
|
|
167
|
-
documents: list[Document] | None = None,
|
|
168
|
-
stream: bool | None = False,
|
|
169
|
-
tool_config: ToolConfig | None = None,
|
|
170
|
-
) -> AsyncGenerator:
|
|
171
|
-
request = AgentTurnCreateRequest(
|
|
172
|
-
agent_id=agent_id,
|
|
173
|
-
session_id=session_id,
|
|
174
|
-
messages=messages,
|
|
175
|
-
stream=True,
|
|
176
|
-
toolgroups=toolgroups,
|
|
177
|
-
documents=documents,
|
|
178
|
-
tool_config=tool_config,
|
|
179
|
-
)
|
|
180
|
-
if stream:
|
|
181
|
-
return self._create_agent_turn_streaming(request)
|
|
182
|
-
else:
|
|
183
|
-
raise NotImplementedError("Non-streaming agent turns not yet implemented")
|
|
184
|
-
|
|
185
|
-
async def _create_agent_turn_streaming(
|
|
186
|
-
self,
|
|
187
|
-
request: AgentTurnCreateRequest,
|
|
188
|
-
) -> AsyncGenerator:
|
|
189
|
-
agent = await self._get_agent_impl(request.agent_id)
|
|
190
|
-
async for event in agent.create_and_execute_turn(request):
|
|
191
|
-
yield event
|
|
192
|
-
|
|
193
|
-
async def resume_agent_turn(
|
|
194
|
-
self,
|
|
195
|
-
agent_id: str,
|
|
196
|
-
session_id: str,
|
|
197
|
-
turn_id: str,
|
|
198
|
-
tool_responses: list[ToolResponse],
|
|
199
|
-
stream: bool | None = False,
|
|
200
|
-
) -> AsyncGenerator:
|
|
201
|
-
request = AgentTurnResumeRequest(
|
|
202
|
-
agent_id=agent_id,
|
|
203
|
-
session_id=session_id,
|
|
204
|
-
turn_id=turn_id,
|
|
205
|
-
tool_responses=tool_responses,
|
|
206
|
-
stream=stream,
|
|
207
|
-
)
|
|
208
|
-
if stream:
|
|
209
|
-
return self._continue_agent_turn_streaming(request)
|
|
210
|
-
else:
|
|
211
|
-
raise NotImplementedError("Non-streaming agent turns not yet implemented")
|
|
212
|
-
|
|
213
|
-
async def _continue_agent_turn_streaming(
|
|
214
|
-
self,
|
|
215
|
-
request: AgentTurnResumeRequest,
|
|
216
|
-
) -> AsyncGenerator:
|
|
217
|
-
agent = await self._get_agent_impl(request.agent_id)
|
|
218
|
-
async for event in agent.resume_turn(request):
|
|
219
|
-
yield event
|
|
220
|
-
|
|
221
|
-
async def get_agents_turn(self, agent_id: str, session_id: str, turn_id: str) -> Turn:
|
|
222
|
-
agent = await self._get_agent_impl(agent_id)
|
|
223
|
-
turn = await agent.storage.get_session_turn(session_id, turn_id)
|
|
224
|
-
return turn
|
|
225
|
-
|
|
226
|
-
async def get_agents_step(self, agent_id: str, session_id: str, turn_id: str, step_id: str) -> AgentStepResponse:
|
|
227
|
-
turn = await self.get_agents_turn(agent_id, session_id, turn_id)
|
|
228
|
-
for step in turn.steps:
|
|
229
|
-
if step.step_id == step_id:
|
|
230
|
-
return AgentStepResponse(step=step)
|
|
231
|
-
raise ValueError(f"Provided step_id {step_id} could not be found")
|
|
232
|
-
|
|
233
|
-
async def get_agents_session(
|
|
234
|
-
self,
|
|
235
|
-
agent_id: str,
|
|
236
|
-
session_id: str,
|
|
237
|
-
turn_ids: list[str] | None = None,
|
|
238
|
-
) -> Session:
|
|
239
|
-
agent = await self._get_agent_impl(agent_id)
|
|
240
|
-
|
|
241
|
-
session_info = await agent.storage.get_session_info(session_id)
|
|
242
|
-
turns = await agent.storage.get_session_turns(session_id)
|
|
243
|
-
if turn_ids:
|
|
244
|
-
turns = [turn for turn in turns if turn.turn_id in turn_ids]
|
|
245
|
-
return Session(
|
|
246
|
-
session_name=session_info.session_name,
|
|
247
|
-
session_id=session_id,
|
|
248
|
-
turns=turns,
|
|
249
|
-
started_at=session_info.started_at,
|
|
250
|
-
)
|
|
251
|
-
|
|
252
|
-
async def delete_agents_session(self, agent_id: str, session_id: str) -> None:
|
|
253
|
-
agent = await self._get_agent_impl(agent_id)
|
|
254
|
-
|
|
255
|
-
# Delete turns first, then the session
|
|
256
|
-
await agent.storage.delete_session_turns(session_id)
|
|
257
|
-
await agent.storage.delete_session(session_id)
|
|
258
|
-
|
|
259
|
-
async def delete_agent(self, agent_id: str) -> None:
|
|
260
|
-
# First get all sessions for this agent
|
|
261
|
-
agent = await self._get_agent_impl(agent_id)
|
|
262
|
-
sessions = await agent.storage.list_sessions()
|
|
263
|
-
|
|
264
|
-
# Delete all sessions
|
|
265
|
-
for session in sessions:
|
|
266
|
-
await self.delete_agents_session(agent_id, session.session_id)
|
|
267
|
-
|
|
268
|
-
# Finally delete the agent itself
|
|
269
|
-
await self.persistence_store.delete(f"agent:{agent_id}")
|
|
270
|
-
|
|
271
|
-
async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
|
|
272
|
-
agent_keys = await self.persistence_store.keys_in_range("agent:", "agent:\xff")
|
|
273
|
-
agent_list: list[Agent] = []
|
|
274
|
-
for agent_key in agent_keys:
|
|
275
|
-
agent_id = agent_key.split(":")[1]
|
|
276
|
-
|
|
277
|
-
# Get the agent info using the key
|
|
278
|
-
agent_info_json = await self.persistence_store.get(agent_key)
|
|
279
|
-
if not agent_info_json:
|
|
280
|
-
logger.error(f"Could not find agent info for key {agent_key}")
|
|
281
|
-
continue
|
|
282
|
-
|
|
283
|
-
try:
|
|
284
|
-
agent_info = AgentInfo.model_validate_json(agent_info_json)
|
|
285
|
-
agent_list.append(
|
|
286
|
-
Agent(
|
|
287
|
-
agent_id=agent_id,
|
|
288
|
-
agent_config=agent_info,
|
|
289
|
-
created_at=agent_info.created_at,
|
|
290
|
-
)
|
|
291
|
-
)
|
|
292
|
-
except Exception as e:
|
|
293
|
-
logger.error(f"Error parsing agent info for {agent_id}: {e}")
|
|
294
|
-
continue
|
|
295
|
-
|
|
296
|
-
# Convert Agent objects to dictionaries
|
|
297
|
-
agent_dicts = [agent.model_dump() for agent in agent_list]
|
|
298
|
-
return paginate_records(agent_dicts, start_index, limit)
|
|
299
|
-
|
|
300
|
-
async def get_agent(self, agent_id: str) -> Agent:
|
|
301
|
-
chat_agent = await self._get_agent_impl(agent_id)
|
|
302
|
-
agent = Agent(
|
|
303
|
-
agent_id=agent_id,
|
|
304
|
-
agent_config=chat_agent.agent_config,
|
|
305
|
-
created_at=chat_agent.created_at,
|
|
306
|
-
)
|
|
307
|
-
return agent
|
|
308
|
-
|
|
309
|
-
async def list_agent_sessions(
|
|
310
|
-
self, agent_id: str, start_index: int | None = None, limit: int | None = None
|
|
311
|
-
) -> PaginatedResponse:
|
|
312
|
-
agent = await self._get_agent_impl(agent_id)
|
|
313
|
-
sessions = await agent.storage.list_sessions()
|
|
314
|
-
# Convert Session objects to dictionaries
|
|
315
|
-
session_dicts = [session.model_dump() for session in sessions]
|
|
316
|
-
return paginate_records(session_dicts, start_index, limit)
|
|
317
|
-
|
|
318
85
|
async def shutdown(self) -> None:
|
|
319
86
|
pass
|
|
320
87
|
|
|
@@ -323,27 +90,35 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
323
90
|
self,
|
|
324
91
|
response_id: str,
|
|
325
92
|
) -> OpenAIResponseObject:
|
|
93
|
+
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
326
94
|
return await self.openai_responses_impl.get_openai_response(response_id)
|
|
327
95
|
|
|
328
96
|
async def create_openai_response(
|
|
329
97
|
self,
|
|
330
98
|
input: str | list[OpenAIResponseInput],
|
|
331
99
|
model: str,
|
|
100
|
+
prompt: OpenAIResponsePrompt | None = None,
|
|
332
101
|
instructions: str | None = None,
|
|
102
|
+
parallel_tool_calls: bool | None = True,
|
|
333
103
|
previous_response_id: str | None = None,
|
|
334
104
|
conversation: str | None = None,
|
|
335
105
|
store: bool | None = True,
|
|
336
106
|
stream: bool | None = False,
|
|
337
107
|
temperature: float | None = None,
|
|
338
108
|
text: OpenAIResponseText | None = None,
|
|
109
|
+
tool_choice: OpenAIResponseInputToolChoice | None = None,
|
|
339
110
|
tools: list[OpenAIResponseInputTool] | None = None,
|
|
340
111
|
include: list[str] | None = None,
|
|
341
112
|
max_infer_iters: int | None = 10,
|
|
342
113
|
guardrails: list[ResponseGuardrail] | None = None,
|
|
114
|
+
max_tool_calls: int | None = None,
|
|
115
|
+
metadata: dict[str, str] | None = None,
|
|
343
116
|
) -> OpenAIResponseObject:
|
|
344
|
-
|
|
117
|
+
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
118
|
+
result = await self.openai_responses_impl.create_openai_response(
|
|
345
119
|
input,
|
|
346
120
|
model,
|
|
121
|
+
prompt,
|
|
347
122
|
instructions,
|
|
348
123
|
previous_response_id,
|
|
349
124
|
conversation,
|
|
@@ -351,11 +126,16 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
351
126
|
stream,
|
|
352
127
|
temperature,
|
|
353
128
|
text,
|
|
129
|
+
tool_choice,
|
|
354
130
|
tools,
|
|
355
131
|
include,
|
|
356
132
|
max_infer_iters,
|
|
357
133
|
guardrails,
|
|
134
|
+
parallel_tool_calls,
|
|
135
|
+
max_tool_calls,
|
|
136
|
+
metadata,
|
|
358
137
|
)
|
|
138
|
+
return result # type: ignore[no-any-return]
|
|
359
139
|
|
|
360
140
|
async def list_openai_responses(
|
|
361
141
|
self,
|
|
@@ -364,6 +144,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
364
144
|
model: str | None = None,
|
|
365
145
|
order: Order | None = Order.desc,
|
|
366
146
|
) -> ListOpenAIResponseObject:
|
|
147
|
+
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
367
148
|
return await self.openai_responses_impl.list_openai_responses(after, limit, model, order)
|
|
368
149
|
|
|
369
150
|
async def list_openai_response_input_items(
|
|
@@ -375,9 +156,11 @@ class MetaReferenceAgentsImpl(Agents):
|
|
|
375
156
|
limit: int | None = 20,
|
|
376
157
|
order: Order | None = Order.desc,
|
|
377
158
|
) -> ListOpenAIResponseInputItem:
|
|
159
|
+
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
378
160
|
return await self.openai_responses_impl.list_openai_response_input_items(
|
|
379
161
|
response_id, after, before, include, limit, order
|
|
380
162
|
)
|
|
381
163
|
|
|
382
|
-
async def delete_openai_response(self, response_id: str) ->
|
|
164
|
+
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
|
165
|
+
assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
|
|
383
166
|
return await self.openai_responses_impl.delete_openai_response(response_id)
|
|
@@ -6,8 +6,9 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
|
+
from llama_stack.core.datatypes import VectorStoresConfig
|
|
11
12
|
from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
|
|
12
13
|
|
|
13
14
|
|
|
@@ -20,6 +21,10 @@ class AgentPersistenceConfig(BaseModel):
|
|
|
20
21
|
|
|
21
22
|
class MetaReferenceAgentsImplConfig(BaseModel):
|
|
22
23
|
persistence: AgentPersistenceConfig
|
|
24
|
+
vector_stores_config: VectorStoresConfig | None = Field(
|
|
25
|
+
default=None,
|
|
26
|
+
description="Configuration for vector store prompt templates and behavior",
|
|
27
|
+
)
|
|
23
28
|
|
|
24
29
|
@classmethod
|
|
25
30
|
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|