llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +12 -21
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.4.dist-info/RECORD +0 -625
- llama_stack-0.3.4.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -6,35 +6,45 @@
|
|
|
6
6
|
|
|
7
7
|
import uuid
|
|
8
8
|
from datetime import UTC, datetime
|
|
9
|
-
from typing import
|
|
9
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
10
10
|
|
|
11
11
|
import boto3
|
|
12
12
|
from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError
|
|
13
|
-
from fastapi import
|
|
13
|
+
from fastapi import Response, UploadFile
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
from
|
|
17
|
-
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from mypy_boto3_s3.client import S3Client
|
|
17
|
+
|
|
18
|
+
from llama_stack.core.access_control.datatypes import Action
|
|
19
|
+
from llama_stack.core.datatypes import AccessRule
|
|
20
|
+
from llama_stack.core.id_generation import generate_object_id
|
|
21
|
+
from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
22
|
+
from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl
|
|
23
|
+
from llama_stack_api import (
|
|
18
24
|
ExpiresAfter,
|
|
19
25
|
Files,
|
|
20
26
|
ListOpenAIFileResponse,
|
|
21
27
|
OpenAIFileDeleteResponse,
|
|
22
28
|
OpenAIFileObject,
|
|
23
29
|
OpenAIFilePurpose,
|
|
30
|
+
Order,
|
|
31
|
+
ResourceNotFoundError,
|
|
24
32
|
)
|
|
25
|
-
from
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
33
|
+
from llama_stack_api.files.models import (
|
|
34
|
+
DeleteFileRequest,
|
|
35
|
+
ListFilesRequest,
|
|
36
|
+
RetrieveFileContentRequest,
|
|
37
|
+
RetrieveFileRequest,
|
|
38
|
+
UploadFileRequest,
|
|
39
|
+
)
|
|
40
|
+
from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
|
|
31
41
|
|
|
32
42
|
from .config import S3FilesImplConfig
|
|
33
43
|
|
|
34
44
|
# TODO: provider data for S3 credentials
|
|
35
45
|
|
|
36
46
|
|
|
37
|
-
def _create_s3_client(config: S3FilesImplConfig) ->
|
|
47
|
+
def _create_s3_client(config: S3FilesImplConfig) -> "S3Client":
|
|
38
48
|
try:
|
|
39
49
|
s3_config = {
|
|
40
50
|
"region_name": config.region,
|
|
@@ -52,13 +62,16 @@ def _create_s3_client(config: S3FilesImplConfig) -> boto3.client:
|
|
|
52
62
|
}
|
|
53
63
|
)
|
|
54
64
|
|
|
55
|
-
|
|
65
|
+
# Both cast and type:ignore are needed here:
|
|
66
|
+
# - cast tells mypy the return type for downstream usage (S3Client vs generic client)
|
|
67
|
+
# - type:ignore suppresses the call-overload error from boto3's complex overloaded signatures
|
|
68
|
+
return cast("S3Client", boto3.client("s3", **s3_config)) # type: ignore[call-overload]
|
|
56
69
|
|
|
57
70
|
except (BotoCoreError, NoCredentialsError) as e:
|
|
58
71
|
raise RuntimeError(f"Failed to initialize S3 client: {e}") from e
|
|
59
72
|
|
|
60
73
|
|
|
61
|
-
async def _create_bucket_if_not_exists(client:
|
|
74
|
+
async def _create_bucket_if_not_exists(client: "S3Client", config: S3FilesImplConfig) -> None:
|
|
62
75
|
try:
|
|
63
76
|
client.head_bucket(Bucket=config.bucket_name)
|
|
64
77
|
except ClientError as e:
|
|
@@ -76,7 +89,7 @@ async def _create_bucket_if_not_exists(client: boto3.client, config: S3FilesImpl
|
|
|
76
89
|
else:
|
|
77
90
|
client.create_bucket(
|
|
78
91
|
Bucket=config.bucket_name,
|
|
79
|
-
CreateBucketConfiguration={"LocationConstraint": config.region},
|
|
92
|
+
CreateBucketConfiguration=cast(Any, {"LocationConstraint": config.region}),
|
|
80
93
|
)
|
|
81
94
|
except ClientError as create_error:
|
|
82
95
|
raise RuntimeError(
|
|
@@ -128,18 +141,20 @@ class S3FilesImpl(Files):
|
|
|
128
141
|
def __init__(self, config: S3FilesImplConfig, policy: list[AccessRule]) -> None:
|
|
129
142
|
self._config = config
|
|
130
143
|
self.policy = policy
|
|
131
|
-
self._client:
|
|
144
|
+
self._client: S3Client | None = None
|
|
132
145
|
self._sql_store: AuthorizedSqlStore | None = None
|
|
133
146
|
|
|
134
147
|
def _now(self) -> int:
|
|
135
148
|
"""Return current UTC timestamp as int seconds."""
|
|
136
149
|
return int(datetime.now(UTC).timestamp())
|
|
137
150
|
|
|
138
|
-
async def _get_file(
|
|
151
|
+
async def _get_file(
|
|
152
|
+
self, file_id: str, return_expired: bool = False, action: Action = Action.READ
|
|
153
|
+
) -> dict[str, Any]:
|
|
139
154
|
where: dict[str, str | dict] = {"id": file_id}
|
|
140
155
|
if not return_expired:
|
|
141
156
|
where["expires_at"] = {">": self._now()}
|
|
142
|
-
if not (row := await self.sql_store.fetch_one("openai_files", where=where)):
|
|
157
|
+
if not (row := await self.sql_store.fetch_one("openai_files", where=where, action=action)):
|
|
143
158
|
raise ResourceNotFoundError(file_id, "File", "files.list()")
|
|
144
159
|
return row
|
|
145
160
|
|
|
@@ -184,7 +199,7 @@ class S3FilesImpl(Files):
|
|
|
184
199
|
pass
|
|
185
200
|
|
|
186
201
|
@property
|
|
187
|
-
def client(self) ->
|
|
202
|
+
def client(self) -> "S3Client":
|
|
188
203
|
assert self._client is not None, "Provider not initialized"
|
|
189
204
|
return self._client
|
|
190
205
|
|
|
@@ -195,10 +210,12 @@ class S3FilesImpl(Files):
|
|
|
195
210
|
|
|
196
211
|
async def openai_upload_file(
|
|
197
212
|
self,
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
expires_after: Annotated[ExpiresAfter | None, Depends(parse_expires_after)] = None,
|
|
213
|
+
request: UploadFileRequest,
|
|
214
|
+
file: UploadFile,
|
|
201
215
|
) -> OpenAIFileObject:
|
|
216
|
+
purpose = request.purpose
|
|
217
|
+
expires_after = request.expires_after
|
|
218
|
+
|
|
202
219
|
file_id = generate_object_id("file", lambda: f"file-{uuid.uuid4().hex}")
|
|
203
220
|
|
|
204
221
|
filename = getattr(file, "filename", None) or "uploaded_file"
|
|
@@ -246,11 +263,13 @@ class S3FilesImpl(Files):
|
|
|
246
263
|
|
|
247
264
|
async def openai_list_files(
|
|
248
265
|
self,
|
|
249
|
-
|
|
250
|
-
limit: int | None = 10000,
|
|
251
|
-
order: Order | None = Order.desc,
|
|
252
|
-
purpose: OpenAIFilePurpose | None = None,
|
|
266
|
+
request: ListFilesRequest,
|
|
253
267
|
) -> ListOpenAIFileResponse:
|
|
268
|
+
after = request.after
|
|
269
|
+
limit = request.limit
|
|
270
|
+
order = request.order
|
|
271
|
+
purpose = request.purpose
|
|
272
|
+
|
|
254
273
|
# this purely defensive. it should not happen because the router also default to Order.desc.
|
|
255
274
|
if not order:
|
|
256
275
|
order = Order.desc
|
|
@@ -277,18 +296,21 @@ class S3FilesImpl(Files):
|
|
|
277
296
|
last_id=files[-1].id if files else "",
|
|
278
297
|
)
|
|
279
298
|
|
|
280
|
-
async def openai_retrieve_file(self,
|
|
299
|
+
async def openai_retrieve_file(self, request: RetrieveFileRequest) -> OpenAIFileObject:
|
|
300
|
+
file_id = request.file_id
|
|
281
301
|
await self._delete_if_expired(file_id)
|
|
282
302
|
row = await self._get_file(file_id)
|
|
283
303
|
return _make_file_object(**row)
|
|
284
304
|
|
|
285
|
-
async def openai_delete_file(self,
|
|
305
|
+
async def openai_delete_file(self, request: DeleteFileRequest) -> OpenAIFileDeleteResponse:
|
|
306
|
+
file_id = request.file_id
|
|
286
307
|
await self._delete_if_expired(file_id)
|
|
287
|
-
_ = await self._get_file(file_id) # raises if not found
|
|
308
|
+
_ = await self._get_file(file_id, action=Action.DELETE) # raises if not found
|
|
288
309
|
await self._delete_file(file_id)
|
|
289
310
|
return OpenAIFileDeleteResponse(id=file_id, deleted=True)
|
|
290
311
|
|
|
291
|
-
async def openai_retrieve_file_content(self,
|
|
312
|
+
async def openai_retrieve_file_content(self, request: RetrieveFileContentRequest) -> Response:
|
|
313
|
+
file_id = request.file_id
|
|
292
314
|
await self._delete_if_expired(file_id)
|
|
293
315
|
|
|
294
316
|
row = await self._get_file(file_id)
|
|
@@ -33,4 +33,5 @@ class AnthropicInferenceAdapter(OpenAIMixin):
|
|
|
33
33
|
return "https://api.anthropic.com/v1"
|
|
34
34
|
|
|
35
35
|
async def list_provider_model_ids(self) -> Iterable[str]:
|
|
36
|
-
|
|
36
|
+
api_key = self._get_api_key_from_config_or_provider_data()
|
|
37
|
+
return [m.id async for m in AsyncAnthropic(api_key=api_key).models.list()]
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class AnthropicProviderDataValidator(BaseModel):
|
|
@@ -4,8 +4,6 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from urllib.parse import urljoin
|
|
8
|
-
|
|
9
7
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
10
8
|
|
|
11
9
|
from .config import AzureConfig
|
|
@@ -22,4 +20,4 @@ class AzureInferenceAdapter(OpenAIMixin):
|
|
|
22
20
|
|
|
23
21
|
Returns the Azure API base URL from the configuration.
|
|
24
22
|
"""
|
|
25
|
-
return
|
|
23
|
+
return str(self.config.base_url)
|
|
@@ -7,14 +7,14 @@
|
|
|
7
7
|
import os
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
|
-
from pydantic import BaseModel, Field, HttpUrl
|
|
10
|
+
from pydantic import BaseModel, Field, HttpUrl
|
|
11
11
|
|
|
12
12
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
13
|
-
from
|
|
13
|
+
from llama_stack_api import json_schema_type
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class AzureProviderDataValidator(BaseModel):
|
|
17
|
-
azure_api_key:
|
|
17
|
+
azure_api_key: str = Field(
|
|
18
18
|
description="Azure API key for Azure",
|
|
19
19
|
)
|
|
20
20
|
azure_api_base: HttpUrl = Field(
|
|
@@ -32,8 +32,9 @@ class AzureProviderDataValidator(BaseModel):
|
|
|
32
32
|
|
|
33
33
|
@json_schema_type
|
|
34
34
|
class AzureConfig(RemoteInferenceProviderConfig):
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
base_url: HttpUrl | None = Field(
|
|
36
|
+
default=None,
|
|
37
|
+
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1)",
|
|
37
38
|
)
|
|
38
39
|
api_version: str | None = Field(
|
|
39
40
|
default_factory=lambda: os.getenv("AZURE_API_VERSION"),
|
|
@@ -48,14 +49,14 @@ class AzureConfig(RemoteInferenceProviderConfig):
|
|
|
48
49
|
def sample_run_config(
|
|
49
50
|
cls,
|
|
50
51
|
api_key: str = "${env.AZURE_API_KEY:=}",
|
|
51
|
-
|
|
52
|
+
base_url: str = "${env.AZURE_API_BASE:=}",
|
|
52
53
|
api_version: str = "${env.AZURE_API_VERSION:=}",
|
|
53
54
|
api_type: str = "${env.AZURE_API_TYPE:=}",
|
|
54
55
|
**kwargs,
|
|
55
56
|
) -> dict[str, Any]:
|
|
56
57
|
return {
|
|
57
58
|
"api_key": api_key,
|
|
58
|
-
"
|
|
59
|
+
"base_url": base_url,
|
|
59
60
|
"api_version": api_version,
|
|
60
61
|
"api_type": api_type,
|
|
61
62
|
}
|
|
@@ -11,7 +11,7 @@ async def get_adapter_impl(config: BedrockConfig, _deps):
|
|
|
11
11
|
|
|
12
12
|
assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}"
|
|
13
13
|
|
|
14
|
-
impl = BedrockInferenceAdapter(config)
|
|
14
|
+
impl = BedrockInferenceAdapter(config=config)
|
|
15
15
|
|
|
16
16
|
await impl.initialize()
|
|
17
17
|
|
|
@@ -4,139 +4,116 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
import
|
|
8
|
-
from collections.abc import AsyncIterator
|
|
7
|
+
from collections.abc import AsyncIterator, Iterable
|
|
9
8
|
|
|
10
|
-
from
|
|
9
|
+
from openai import AuthenticationError
|
|
11
10
|
|
|
12
|
-
from llama_stack.
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
from llama_stack.log import get_logger
|
|
12
|
+
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
13
|
+
from llama_stack_api import (
|
|
14
|
+
OpenAIChatCompletion,
|
|
15
|
+
OpenAIChatCompletionChunk,
|
|
15
16
|
OpenAIChatCompletionRequestWithExtraBody,
|
|
17
|
+
OpenAICompletion,
|
|
16
18
|
OpenAICompletionRequestWithExtraBody,
|
|
17
19
|
OpenAIEmbeddingsRequestWithExtraBody,
|
|
18
20
|
OpenAIEmbeddingsResponse,
|
|
19
21
|
)
|
|
20
|
-
from llama_stack.apis.inference.inference import (
|
|
21
|
-
OpenAIChatCompletion,
|
|
22
|
-
OpenAIChatCompletionChunk,
|
|
23
|
-
OpenAICompletion,
|
|
24
|
-
)
|
|
25
|
-
from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
|
|
26
|
-
from llama_stack.providers.utils.bedrock.client import create_bedrock_client
|
|
27
|
-
from llama_stack.providers.utils.inference.model_registry import (
|
|
28
|
-
ModelRegistryHelper,
|
|
29
|
-
)
|
|
30
|
-
from llama_stack.providers.utils.inference.openai_compat import (
|
|
31
|
-
get_sampling_strategy_options,
|
|
32
|
-
)
|
|
33
|
-
from llama_stack.providers.utils.inference.prompt_adapter import (
|
|
34
|
-
chat_completion_request_to_prompt,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
from .models import MODEL_ENTRIES
|
|
38
|
-
|
|
39
|
-
REGION_PREFIX_MAP = {
|
|
40
|
-
"us": "us.",
|
|
41
|
-
"eu": "eu.",
|
|
42
|
-
"ap": "ap.",
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def _get_region_prefix(region: str | None) -> str:
|
|
47
|
-
# AWS requires region prefixes for inference profiles
|
|
48
|
-
if region is None:
|
|
49
|
-
return "us." # default to US when we don't know
|
|
50
|
-
|
|
51
|
-
# Handle case insensitive region matching
|
|
52
|
-
region_lower = region.lower()
|
|
53
|
-
for prefix in REGION_PREFIX_MAP:
|
|
54
|
-
if region_lower.startswith(f"{prefix}-"):
|
|
55
|
-
return REGION_PREFIX_MAP[prefix]
|
|
56
|
-
|
|
57
|
-
# Fallback to US for anything we don't recognize
|
|
58
|
-
return "us."
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def _to_inference_profile_id(model_id: str, region: str = None) -> str:
|
|
62
|
-
# Return ARNs unchanged
|
|
63
|
-
if model_id.startswith("arn:"):
|
|
64
|
-
return model_id
|
|
65
|
-
|
|
66
|
-
# Return inference profile IDs that already have regional prefixes
|
|
67
|
-
if any(model_id.startswith(p) for p in REGION_PREFIX_MAP.values()):
|
|
68
|
-
return model_id
|
|
69
|
-
|
|
70
|
-
# Default to US East when no region is provided
|
|
71
|
-
if region is None:
|
|
72
|
-
region = "us-east-1"
|
|
73
|
-
|
|
74
|
-
return _get_region_prefix(region) + model_id
|
|
75
|
-
|
|
76
22
|
|
|
77
|
-
|
|
78
|
-
ModelRegistryHelper,
|
|
79
|
-
Inference,
|
|
80
|
-
):
|
|
81
|
-
def __init__(self, config: BedrockConfig) -> None:
|
|
82
|
-
ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
|
|
83
|
-
self._config = config
|
|
84
|
-
self._client = None
|
|
23
|
+
from .config import BedrockConfig
|
|
85
24
|
|
|
86
|
-
|
|
87
|
-
def client(self) -> BaseClient:
|
|
88
|
-
if self._client is None:
|
|
89
|
-
self._client = create_bedrock_client(self._config)
|
|
90
|
-
return self._client
|
|
25
|
+
logger = get_logger(name=__name__, category="inference::bedrock")
|
|
91
26
|
|
|
92
|
-
async def initialize(self) -> None:
|
|
93
|
-
pass
|
|
94
27
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
28
|
+
class BedrockInferenceAdapter(OpenAIMixin):
|
|
29
|
+
"""
|
|
30
|
+
Adapter for AWS Bedrock's OpenAI-compatible API endpoints.
|
|
98
31
|
|
|
99
|
-
|
|
100
|
-
bedrock_model = request.model
|
|
32
|
+
Supports Llama models across regions and GPT-OSS models (us-west-2 only).
|
|
101
33
|
|
|
102
|
-
|
|
103
|
-
|
|
34
|
+
Note: Bedrock's OpenAI-compatible endpoint does not support /v1/models
|
|
35
|
+
for dynamic model discovery. Models must be pre-registered in the config.
|
|
36
|
+
"""
|
|
104
37
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
if sampling_params.repetition_penalty > 0:
|
|
108
|
-
options["repetition_penalty"] = sampling_params.repetition_penalty
|
|
38
|
+
config: BedrockConfig
|
|
39
|
+
provider_data_api_key_field: str = "aws_bearer_token_bedrock"
|
|
109
40
|
|
|
110
|
-
|
|
41
|
+
def get_base_url(self) -> str:
|
|
42
|
+
"""Get base URL for OpenAI client."""
|
|
43
|
+
return f"https://bedrock-runtime.{self.config.region_name}.amazonaws.com/openai/v1"
|
|
111
44
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
45
|
+
async def list_provider_model_ids(self) -> Iterable[str]:
|
|
46
|
+
"""
|
|
47
|
+
Bedrock's OpenAI-compatible endpoint does not support the /v1/models endpoint.
|
|
48
|
+
Returns empty list since models must be pre-registered in the config.
|
|
49
|
+
"""
|
|
50
|
+
return []
|
|
115
51
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
}
|
|
123
|
-
),
|
|
124
|
-
}
|
|
52
|
+
async def check_model_availability(self, model: str) -> bool:
|
|
53
|
+
"""
|
|
54
|
+
Bedrock doesn't support dynamic model listing via /v1/models.
|
|
55
|
+
Always return True to accept all models registered in the config.
|
|
56
|
+
"""
|
|
57
|
+
return True
|
|
125
58
|
|
|
126
59
|
async def openai_embeddings(
|
|
127
60
|
self,
|
|
128
61
|
params: OpenAIEmbeddingsRequestWithExtraBody,
|
|
129
62
|
) -> OpenAIEmbeddingsResponse:
|
|
130
|
-
|
|
63
|
+
"""Bedrock's OpenAI-compatible API does not support the /v1/embeddings endpoint."""
|
|
64
|
+
raise NotImplementedError(
|
|
65
|
+
"Bedrock's OpenAI-compatible API does not support /v1/embeddings endpoint. "
|
|
66
|
+
"See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-chat-completions.html"
|
|
67
|
+
)
|
|
131
68
|
|
|
132
69
|
async def openai_completion(
|
|
133
70
|
self,
|
|
134
71
|
params: OpenAICompletionRequestWithExtraBody,
|
|
135
|
-
) -> OpenAICompletion:
|
|
136
|
-
|
|
72
|
+
) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
|
|
73
|
+
"""Bedrock's OpenAI-compatible API does not support the /v1/completions endpoint."""
|
|
74
|
+
raise NotImplementedError(
|
|
75
|
+
"Bedrock's OpenAI-compatible API does not support /v1/completions endpoint. "
|
|
76
|
+
"Only /v1/chat/completions is supported. "
|
|
77
|
+
"See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-chat-completions.html"
|
|
78
|
+
)
|
|
137
79
|
|
|
138
80
|
async def openai_chat_completion(
|
|
139
81
|
self,
|
|
140
82
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
|
141
83
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
|
142
|
-
|
|
84
|
+
"""Override to handle authentication errors and null responses."""
|
|
85
|
+
try:
|
|
86
|
+
logger.debug(f"Calling Bedrock OpenAI API with model={params.model}, stream={params.stream}")
|
|
87
|
+
result = await super().openai_chat_completion(params=params)
|
|
88
|
+
logger.debug(f"Bedrock API returned: {type(result).__name__ if result is not None else 'None'}")
|
|
89
|
+
|
|
90
|
+
if result is None:
|
|
91
|
+
logger.error(f"Bedrock OpenAI client returned None for model={params.model}, stream={params.stream}")
|
|
92
|
+
raise RuntimeError(
|
|
93
|
+
f"Bedrock API returned no response for model '{params.model}'. "
|
|
94
|
+
"This may indicate the model is not supported or a network/API issue occurred."
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return result
|
|
98
|
+
except AuthenticationError as e:
|
|
99
|
+
error_msg = str(e)
|
|
100
|
+
|
|
101
|
+
# Check if this is a token expiration error
|
|
102
|
+
if "expired" in error_msg.lower() or "Bearer Token has expired" in error_msg:
|
|
103
|
+
logger.error(f"AWS Bedrock authentication token expired: {error_msg}")
|
|
104
|
+
raise ValueError(
|
|
105
|
+
"AWS Bedrock authentication failed: Bearer token has expired. "
|
|
106
|
+
"The AWS_BEARER_TOKEN_BEDROCK environment variable contains an expired pre-signed URL. "
|
|
107
|
+
"Please refresh your token by generating a new pre-signed URL with AWS credentials. "
|
|
108
|
+
"Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints."
|
|
109
|
+
) from e
|
|
110
|
+
else:
|
|
111
|
+
logger.error(f"AWS Bedrock authentication failed: {error_msg}")
|
|
112
|
+
raise ValueError(
|
|
113
|
+
f"AWS Bedrock authentication failed: {error_msg}. "
|
|
114
|
+
"Please verify your API key is correct in the provider config or x-llamastack-provider-data header. "
|
|
115
|
+
"The API key should be a valid AWS pre-signed URL for Bedrock's OpenAI-compatible endpoint."
|
|
116
|
+
) from e
|
|
117
|
+
except Exception as e:
|
|
118
|
+
logger.error(f"Unexpected error calling Bedrock API: {type(e).__name__}: {e}", exc_info=True)
|
|
119
|
+
raise
|
|
@@ -4,8 +4,29 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
import os
|
|
8
8
|
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
|
|
11
|
+
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BedrockProviderDataValidator(BaseModel):
|
|
15
|
+
aws_bearer_token_bedrock: str | None = Field(
|
|
16
|
+
default=None,
|
|
17
|
+
description="API Key (Bearer token) for Amazon Bedrock",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class BedrockConfig(RemoteInferenceProviderConfig):
|
|
22
|
+
region_name: str = Field(
|
|
23
|
+
default_factory=lambda: os.getenv("AWS_DEFAULT_REGION", "us-east-2"),
|
|
24
|
+
description="AWS Region for the Bedrock Runtime endpoint",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def sample_run_config(cls, **kwargs):
|
|
29
|
+
return {
|
|
30
|
+
"api_key": "${env.AWS_BEARER_TOKEN_BEDROCK:=}",
|
|
31
|
+
"region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}",
|
|
32
|
+
}
|
|
@@ -4,13 +4,11 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
8
|
-
|
|
9
|
-
from llama_stack.apis.inference import (
|
|
7
|
+
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
8
|
+
from llama_stack_api import (
|
|
10
9
|
OpenAIEmbeddingsRequestWithExtraBody,
|
|
11
10
|
OpenAIEmbeddingsResponse,
|
|
12
11
|
)
|
|
13
|
-
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
14
12
|
|
|
15
13
|
from .config import CerebrasImplConfig
|
|
16
14
|
|
|
@@ -18,8 +16,10 @@ from .config import CerebrasImplConfig
|
|
|
18
16
|
class CerebrasInferenceAdapter(OpenAIMixin):
|
|
19
17
|
config: CerebrasImplConfig
|
|
20
18
|
|
|
19
|
+
provider_data_api_key_field: str = "cerebras_api_key"
|
|
20
|
+
|
|
21
21
|
def get_base_url(self) -> str:
|
|
22
|
-
return
|
|
22
|
+
return str(self.config.base_url)
|
|
23
23
|
|
|
24
24
|
async def openai_embeddings(
|
|
25
25
|
self,
|
|
@@ -7,18 +7,25 @@
|
|
|
7
7
|
import os
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
|
-
from pydantic import Field
|
|
10
|
+
from pydantic import BaseModel, Field, HttpUrl
|
|
11
11
|
|
|
12
12
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
13
|
-
from
|
|
13
|
+
from llama_stack_api import json_schema_type
|
|
14
14
|
|
|
15
|
-
DEFAULT_BASE_URL = "https://api.cerebras.ai"
|
|
15
|
+
DEFAULT_BASE_URL = "https://api.cerebras.ai/v1"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CerebrasProviderDataValidator(BaseModel):
|
|
19
|
+
cerebras_api_key: str | None = Field(
|
|
20
|
+
default=None,
|
|
21
|
+
description="API key for Cerebras models",
|
|
22
|
+
)
|
|
16
23
|
|
|
17
24
|
|
|
18
25
|
@json_schema_type
|
|
19
26
|
class CerebrasImplConfig(RemoteInferenceProviderConfig):
|
|
20
|
-
base_url:
|
|
21
|
-
default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
|
|
27
|
+
base_url: HttpUrl | None = Field(
|
|
28
|
+
default=HttpUrl(os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL)),
|
|
22
29
|
description="Base URL for the Cerebras API",
|
|
23
30
|
)
|
|
24
31
|
|
|
@@ -6,17 +6,24 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from pydantic import Field, SecretStr
|
|
9
|
+
from pydantic import BaseModel, Field, HttpUrl, SecretStr
|
|
10
10
|
|
|
11
11
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DatabricksProviderDataValidator(BaseModel):
|
|
16
|
+
databricks_api_token: str | None = Field(
|
|
17
|
+
default=None,
|
|
18
|
+
description="API token for Databricks models",
|
|
19
|
+
)
|
|
13
20
|
|
|
14
21
|
|
|
15
22
|
@json_schema_type
|
|
16
23
|
class DatabricksImplConfig(RemoteInferenceProviderConfig):
|
|
17
|
-
|
|
24
|
+
base_url: HttpUrl | None = Field(
|
|
18
25
|
default=None,
|
|
19
|
-
description="The URL for the Databricks model serving endpoint",
|
|
26
|
+
description="The URL for the Databricks model serving endpoint (should include /serving-endpoints path)",
|
|
20
27
|
)
|
|
21
28
|
auth_credential: SecretStr | None = Field(
|
|
22
29
|
default=None,
|
|
@@ -27,11 +34,11 @@ class DatabricksImplConfig(RemoteInferenceProviderConfig):
|
|
|
27
34
|
@classmethod
|
|
28
35
|
def sample_run_config(
|
|
29
36
|
cls,
|
|
30
|
-
|
|
37
|
+
base_url: str = "${env.DATABRICKS_HOST:=}",
|
|
31
38
|
api_token: str = "${env.DATABRICKS_TOKEN:=}",
|
|
32
39
|
**kwargs: Any,
|
|
33
40
|
) -> dict[str, Any]:
|
|
34
41
|
return {
|
|
35
|
-
"
|
|
42
|
+
"base_url": base_url,
|
|
36
43
|
"api_token": api_token,
|
|
37
44
|
}
|