llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -4,13 +4,13 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from collections.abc import Iterable
|
|
7
|
+
from collections.abc import AsyncIterator, Iterable
|
|
8
8
|
|
|
9
9
|
from databricks.sdk import WorkspaceClient
|
|
10
10
|
|
|
11
|
-
from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
|
|
12
11
|
from llama_stack.log import get_logger
|
|
13
12
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
13
|
+
from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
|
|
14
14
|
|
|
15
15
|
from .config import DatabricksImplConfig
|
|
16
16
|
|
|
@@ -20,6 +20,8 @@ logger = get_logger(name=__name__, category="inference::databricks")
|
|
|
20
20
|
class DatabricksInferenceAdapter(OpenAIMixin):
|
|
21
21
|
config: DatabricksImplConfig
|
|
22
22
|
|
|
23
|
+
provider_data_api_key_field: str = "databricks_api_token"
|
|
24
|
+
|
|
23
25
|
# source: https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/supported-models
|
|
24
26
|
embedding_model_metadata: dict[str, dict[str, int]] = {
|
|
25
27
|
"databricks-gte-large-en": {"embedding_dimension": 1024, "context_length": 8192},
|
|
@@ -27,18 +29,26 @@ class DatabricksInferenceAdapter(OpenAIMixin):
|
|
|
27
29
|
}
|
|
28
30
|
|
|
29
31
|
def get_base_url(self) -> str:
|
|
30
|
-
return
|
|
32
|
+
return str(self.config.base_url)
|
|
31
33
|
|
|
32
34
|
async def list_provider_model_ids(self) -> Iterable[str]:
|
|
35
|
+
# Filter out None values from endpoint names
|
|
36
|
+
api_token = self._get_api_key_from_config_or_provider_data()
|
|
37
|
+
# WorkspaceClient expects base host without /serving-endpoints suffix
|
|
38
|
+
base_url_str = str(self.config.base_url)
|
|
39
|
+
if base_url_str.endswith("/serving-endpoints"):
|
|
40
|
+
host = base_url_str[:-18] # Remove '/serving-endpoints'
|
|
41
|
+
else:
|
|
42
|
+
host = base_url_str
|
|
33
43
|
return [
|
|
34
|
-
endpoint.name
|
|
44
|
+
endpoint.name # type: ignore[misc]
|
|
35
45
|
for endpoint in WorkspaceClient(
|
|
36
|
-
host=
|
|
46
|
+
host=host, token=api_token
|
|
37
47
|
).serving_endpoints.list() # TODO: this is not async
|
|
38
48
|
]
|
|
39
49
|
|
|
40
50
|
async def openai_completion(
|
|
41
51
|
self,
|
|
42
52
|
params: OpenAICompletionRequestWithExtraBody,
|
|
43
|
-
) -> OpenAICompletion:
|
|
53
|
+
) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
|
|
44
54
|
raise NotImplementedError()
|
|
@@ -6,22 +6,22 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from pydantic import Field
|
|
9
|
+
from pydantic import Field, HttpUrl
|
|
10
10
|
|
|
11
11
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|
|
16
16
|
class FireworksImplConfig(RemoteInferenceProviderConfig):
|
|
17
|
-
|
|
18
|
-
default="https://api.fireworks.ai/inference/v1",
|
|
17
|
+
base_url: HttpUrl | None = Field(
|
|
18
|
+
default=HttpUrl("https://api.fireworks.ai/inference/v1"),
|
|
19
19
|
description="The URL for the Fireworks server",
|
|
20
20
|
)
|
|
21
21
|
|
|
22
22
|
@classmethod
|
|
23
23
|
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
|
24
24
|
return {
|
|
25
|
-
"
|
|
25
|
+
"base_url": "https://api.fireworks.ai/inference/v1",
|
|
26
26
|
"api_key": api_key,
|
|
27
27
|
}
|
|
@@ -9,7 +9,7 @@ from typing import Any
|
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class GeminiProviderDataValidator(BaseModel):
|
|
@@ -4,15 +4,15 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
|
-
from llama_stack.
|
|
9
|
+
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
10
|
+
from llama_stack_api import (
|
|
10
11
|
OpenAIEmbeddingData,
|
|
11
12
|
OpenAIEmbeddingsRequestWithExtraBody,
|
|
12
13
|
OpenAIEmbeddingsResponse,
|
|
13
14
|
OpenAIEmbeddingUsage,
|
|
14
15
|
)
|
|
15
|
-
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
16
16
|
|
|
17
17
|
from .config import GeminiConfig
|
|
18
18
|
|
|
@@ -37,21 +37,20 @@ class GeminiInferenceAdapter(OpenAIMixin):
|
|
|
37
37
|
Override embeddings method to handle Gemini's missing usage statistics.
|
|
38
38
|
Gemini's embedding API doesn't return usage information, so we provide default values.
|
|
39
39
|
"""
|
|
40
|
-
#
|
|
41
|
-
request_params = {
|
|
40
|
+
# Build request params conditionally to avoid NotGiven/Omit type mismatch
|
|
41
|
+
request_params: dict[str, Any] = {
|
|
42
42
|
"model": await self._get_provider_model_id(params.model),
|
|
43
43
|
"input": params.input,
|
|
44
|
-
"encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN,
|
|
45
|
-
"dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN,
|
|
46
|
-
"user": params.user if params.user is not None else NOT_GIVEN,
|
|
47
44
|
}
|
|
45
|
+
if params.encoding_format is not None:
|
|
46
|
+
request_params["encoding_format"] = params.encoding_format
|
|
47
|
+
if params.dimensions is not None:
|
|
48
|
+
request_params["dimensions"] = params.dimensions
|
|
49
|
+
if params.user is not None:
|
|
50
|
+
request_params["user"] = params.user
|
|
51
|
+
if params.model_extra:
|
|
52
|
+
request_params["extra_body"] = params.model_extra
|
|
48
53
|
|
|
49
|
-
# Add extra_body if present
|
|
50
|
-
extra_body = params.model_extra
|
|
51
|
-
if extra_body:
|
|
52
|
-
request_params["extra_body"] = extra_body
|
|
53
|
-
|
|
54
|
-
# Call OpenAI embeddings API with properly typed parameters
|
|
55
54
|
response = await self.client.embeddings.create(**request_params)
|
|
56
55
|
|
|
57
56
|
data = []
|
|
@@ -6,10 +6,10 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel, Field
|
|
9
|
+
from pydantic import BaseModel, Field, HttpUrl
|
|
10
10
|
|
|
11
11
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class GroqProviderDataValidator(BaseModel):
|
|
@@ -21,14 +21,14 @@ class GroqProviderDataValidator(BaseModel):
|
|
|
21
21
|
|
|
22
22
|
@json_schema_type
|
|
23
23
|
class GroqConfig(RemoteInferenceProviderConfig):
|
|
24
|
-
|
|
25
|
-
default="https://api.groq.com",
|
|
24
|
+
base_url: HttpUrl | None = Field(
|
|
25
|
+
default=HttpUrl("https://api.groq.com/openai/v1"),
|
|
26
26
|
description="The URL for the Groq AI server",
|
|
27
27
|
)
|
|
28
28
|
|
|
29
29
|
@classmethod
|
|
30
30
|
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
|
31
31
|
return {
|
|
32
|
-
"
|
|
32
|
+
"base_url": "https://api.groq.com/openai/v1",
|
|
33
33
|
"api_key": api_key,
|
|
34
34
|
}
|
|
@@ -6,10 +6,10 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel, Field
|
|
9
|
+
from pydantic import BaseModel, Field, HttpUrl
|
|
10
10
|
|
|
11
11
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class LlamaProviderDataValidator(BaseModel):
|
|
@@ -21,14 +21,14 @@ class LlamaProviderDataValidator(BaseModel):
|
|
|
21
21
|
|
|
22
22
|
@json_schema_type
|
|
23
23
|
class LlamaCompatConfig(RemoteInferenceProviderConfig):
|
|
24
|
-
|
|
25
|
-
default="https://api.llama.com/compat/v1/",
|
|
24
|
+
base_url: HttpUrl | None = Field(
|
|
25
|
+
default=HttpUrl("https://api.llama.com/compat/v1/"),
|
|
26
26
|
description="The URL for the Llama API server",
|
|
27
27
|
)
|
|
28
28
|
|
|
29
29
|
@classmethod
|
|
30
30
|
def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
|
|
31
31
|
return {
|
|
32
|
-
"
|
|
32
|
+
"base_url": "https://api.llama.com/compat/v1/",
|
|
33
33
|
"api_key": api_key,
|
|
34
34
|
}
|
|
@@ -4,15 +4,17 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from collections.abc import AsyncIterator
|
|
8
|
+
|
|
9
|
+
from llama_stack.log import get_logger
|
|
10
|
+
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
|
|
11
|
+
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
12
|
+
from llama_stack_api import (
|
|
8
13
|
OpenAICompletion,
|
|
9
14
|
OpenAICompletionRequestWithExtraBody,
|
|
10
15
|
OpenAIEmbeddingsRequestWithExtraBody,
|
|
11
16
|
OpenAIEmbeddingsResponse,
|
|
12
17
|
)
|
|
13
|
-
from llama_stack.log import get_logger
|
|
14
|
-
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
|
|
15
|
-
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
16
18
|
|
|
17
19
|
logger = get_logger(name=__name__, category="inference::llama_openai_compat")
|
|
18
20
|
|
|
@@ -31,12 +33,12 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
|
|
|
31
33
|
|
|
32
34
|
:return: The Llama API base URL
|
|
33
35
|
"""
|
|
34
|
-
return self.config.
|
|
36
|
+
return str(self.config.base_url)
|
|
35
37
|
|
|
36
38
|
async def openai_completion(
|
|
37
39
|
self,
|
|
38
40
|
params: OpenAICompletionRequestWithExtraBody,
|
|
39
|
-
) -> OpenAICompletion:
|
|
41
|
+
) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
|
|
40
42
|
raise NotImplementedError()
|
|
41
43
|
|
|
42
44
|
async def openai_embeddings(
|
|
@@ -7,10 +7,17 @@
|
|
|
7
7
|
import os
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
|
-
from pydantic import Field
|
|
10
|
+
from pydantic import BaseModel, Field, HttpUrl
|
|
11
11
|
|
|
12
12
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
13
|
-
from
|
|
13
|
+
from llama_stack_api import json_schema_type
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class NVIDIAProviderDataValidator(BaseModel):
|
|
17
|
+
nvidia_api_key: str | None = Field(
|
|
18
|
+
default=None,
|
|
19
|
+
description="API key for NVIDIA NIM models",
|
|
20
|
+
)
|
|
14
21
|
|
|
15
22
|
|
|
16
23
|
@json_schema_type
|
|
@@ -21,6 +28,7 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
|
|
|
21
28
|
Attributes:
|
|
22
29
|
url (str): A base url for accessing the NVIDIA NIM, e.g. http://localhost:8000
|
|
23
30
|
api_key (str): The access key for the hosted NIM endpoints
|
|
31
|
+
rerank_model_to_url (dict[str, str]): Mapping of rerank model identifiers to their API endpoints
|
|
24
32
|
|
|
25
33
|
There are two ways to access NVIDIA NIMs -
|
|
26
34
|
0. Hosted: Preview APIs hosted at https://integrate.api.nvidia.com
|
|
@@ -36,29 +44,31 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
|
|
|
36
44
|
URL of your running NVIDIA NIM and do not need to set the api_key.
|
|
37
45
|
"""
|
|
38
46
|
|
|
39
|
-
|
|
40
|
-
default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com"),
|
|
47
|
+
base_url: HttpUrl | None = Field(
|
|
48
|
+
default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1"),
|
|
41
49
|
description="A base url for accessing the NVIDIA NIM",
|
|
42
50
|
)
|
|
43
51
|
timeout: int = Field(
|
|
44
52
|
default=60,
|
|
45
53
|
description="Timeout for the HTTP requests",
|
|
46
54
|
)
|
|
47
|
-
|
|
48
|
-
default_factory=lambda:
|
|
49
|
-
|
|
55
|
+
rerank_model_to_url: dict[str, str] = Field(
|
|
56
|
+
default_factory=lambda: {
|
|
57
|
+
"nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
|
|
58
|
+
"nvidia/nv-rerankqa-mistral-4b-v3": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking",
|
|
59
|
+
"nvidia/llama-3.2-nv-rerankqa-1b-v2": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking",
|
|
60
|
+
},
|
|
61
|
+
description="Mapping of rerank model identifiers to their API endpoints. ",
|
|
50
62
|
)
|
|
51
63
|
|
|
52
64
|
@classmethod
|
|
53
65
|
def sample_run_config(
|
|
54
66
|
cls,
|
|
55
|
-
|
|
67
|
+
base_url: HttpUrl | None = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}",
|
|
56
68
|
api_key: str = "${env.NVIDIA_API_KEY:=}",
|
|
57
|
-
append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}",
|
|
58
69
|
**kwargs,
|
|
59
70
|
) -> dict[str, Any]:
|
|
60
71
|
return {
|
|
61
|
-
"
|
|
72
|
+
"base_url": base_url,
|
|
62
73
|
"api_key": api_key,
|
|
63
|
-
"append_api_version": append_api_version,
|
|
64
74
|
}
|
|
@@ -5,8 +5,20 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
from collections.abc import Iterable
|
|
9
|
+
|
|
10
|
+
import aiohttp
|
|
11
|
+
|
|
8
12
|
from llama_stack.log import get_logger
|
|
9
13
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
14
|
+
from llama_stack_api import (
|
|
15
|
+
Model,
|
|
16
|
+
ModelType,
|
|
17
|
+
OpenAIChatCompletionContentPartImageParam,
|
|
18
|
+
OpenAIChatCompletionContentPartTextParam,
|
|
19
|
+
RerankData,
|
|
20
|
+
RerankResponse,
|
|
21
|
+
)
|
|
10
22
|
|
|
11
23
|
from . import NVIDIAConfig
|
|
12
24
|
from .utils import _is_nvidia_hosted
|
|
@@ -17,6 +29,8 @@ logger = get_logger(name=__name__, category="inference::nvidia")
|
|
|
17
29
|
class NVIDIAInferenceAdapter(OpenAIMixin):
|
|
18
30
|
config: NVIDIAConfig
|
|
19
31
|
|
|
32
|
+
provider_data_api_key_field: str = "nvidia_api_key"
|
|
33
|
+
|
|
20
34
|
"""
|
|
21
35
|
NVIDIA Inference Adapter for Llama Stack.
|
|
22
36
|
"""
|
|
@@ -30,11 +44,11 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
|
|
|
30
44
|
}
|
|
31
45
|
|
|
32
46
|
async def initialize(self) -> None:
|
|
33
|
-
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.
|
|
47
|
+
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.base_url})...")
|
|
34
48
|
|
|
35
49
|
if _is_nvidia_hosted(self.config):
|
|
36
50
|
if not self.config.auth_credential:
|
|
37
|
-
|
|
51
|
+
logger.error(
|
|
38
52
|
"API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM."
|
|
39
53
|
)
|
|
40
54
|
|
|
@@ -58,4 +72,102 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
|
|
|
58
72
|
|
|
59
73
|
:return: The NVIDIA API base URL
|
|
60
74
|
"""
|
|
61
|
-
return
|
|
75
|
+
return str(self.config.base_url)
|
|
76
|
+
|
|
77
|
+
async def list_provider_model_ids(self) -> Iterable[str]:
|
|
78
|
+
"""
|
|
79
|
+
Return both dynamic model IDs and statically configured rerank model IDs.
|
|
80
|
+
"""
|
|
81
|
+
dynamic_ids: Iterable[str] = []
|
|
82
|
+
try:
|
|
83
|
+
dynamic_ids = await super().list_provider_model_ids()
|
|
84
|
+
except Exception:
|
|
85
|
+
# If the dynamic listing fails, proceed with just configured rerank IDs
|
|
86
|
+
dynamic_ids = []
|
|
87
|
+
|
|
88
|
+
configured_rerank_ids = list(self.config.rerank_model_to_url.keys())
|
|
89
|
+
return list(dict.fromkeys(list(dynamic_ids) + configured_rerank_ids)) # remove duplicates
|
|
90
|
+
|
|
91
|
+
def construct_model_from_identifier(self, identifier: str) -> Model:
|
|
92
|
+
"""
|
|
93
|
+
Classify rerank models from config; otherwise use the base behavior.
|
|
94
|
+
"""
|
|
95
|
+
if identifier in self.config.rerank_model_to_url:
|
|
96
|
+
return Model(
|
|
97
|
+
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
|
98
|
+
provider_resource_id=identifier,
|
|
99
|
+
identifier=identifier,
|
|
100
|
+
model_type=ModelType.rerank,
|
|
101
|
+
)
|
|
102
|
+
return super().construct_model_from_identifier(identifier)
|
|
103
|
+
|
|
104
|
+
async def rerank(
|
|
105
|
+
self,
|
|
106
|
+
model: str,
|
|
107
|
+
query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
|
|
108
|
+
items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
|
|
109
|
+
max_num_results: int | None = None,
|
|
110
|
+
) -> RerankResponse:
|
|
111
|
+
provider_model_id = await self._get_provider_model_id(model)
|
|
112
|
+
|
|
113
|
+
ranking_url = self.get_base_url()
|
|
114
|
+
|
|
115
|
+
if _is_nvidia_hosted(self.config) and provider_model_id in self.config.rerank_model_to_url:
|
|
116
|
+
ranking_url = self.config.rerank_model_to_url[provider_model_id]
|
|
117
|
+
|
|
118
|
+
logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}")
|
|
119
|
+
|
|
120
|
+
# Convert query to text format
|
|
121
|
+
if isinstance(query, str):
|
|
122
|
+
query_text = query
|
|
123
|
+
elif isinstance(query, OpenAIChatCompletionContentPartTextParam):
|
|
124
|
+
query_text = query.text
|
|
125
|
+
else:
|
|
126
|
+
raise ValueError("Query must be a string or text content part")
|
|
127
|
+
|
|
128
|
+
# Convert items to text format
|
|
129
|
+
passages = []
|
|
130
|
+
for item in items:
|
|
131
|
+
if isinstance(item, str):
|
|
132
|
+
passages.append({"text": item})
|
|
133
|
+
elif isinstance(item, OpenAIChatCompletionContentPartTextParam):
|
|
134
|
+
passages.append({"text": item.text})
|
|
135
|
+
else:
|
|
136
|
+
raise ValueError("Items must be strings or text content parts")
|
|
137
|
+
|
|
138
|
+
payload = {
|
|
139
|
+
"model": provider_model_id,
|
|
140
|
+
"query": {"text": query_text},
|
|
141
|
+
"passages": passages,
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
headers = {
|
|
145
|
+
"Authorization": f"Bearer {self.get_api_key()}",
|
|
146
|
+
"Content-Type": "application/json",
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
async with aiohttp.ClientSession() as session:
|
|
151
|
+
async with session.post(ranking_url, headers=headers, json=payload) as response:
|
|
152
|
+
if response.status != 200:
|
|
153
|
+
response_text = await response.text()
|
|
154
|
+
raise ConnectionError(
|
|
155
|
+
f"NVIDIA rerank API request failed with status {response.status}: {response_text}"
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
result = await response.json()
|
|
159
|
+
rankings = result.get("rankings", [])
|
|
160
|
+
|
|
161
|
+
# Convert to RerankData format
|
|
162
|
+
rerank_data = []
|
|
163
|
+
for ranking in rankings:
|
|
164
|
+
rerank_data.append(RerankData(index=ranking["index"], relevance_score=ranking["logit"]))
|
|
165
|
+
|
|
166
|
+
# Apply max_num_results limit
|
|
167
|
+
if max_num_results is not None:
|
|
168
|
+
rerank_data = rerank_data[:max_num_results]
|
|
169
|
+
|
|
170
|
+
return RerankResponse(data=rerank_data)
|
|
171
|
+
|
|
172
|
+
except aiohttp.ClientError as e:
|
|
173
|
+
raise ConnectionError(f"Failed to connect to NVIDIA rerank API at {ranking_url}: {e}") from e
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from llama_stack_api import InferenceProvider
|
|
8
|
+
|
|
9
|
+
from .config import OCIConfig
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def get_adapter_impl(config: OCIConfig, _deps) -> InferenceProvider:
|
|
13
|
+
from .oci import OCIInferenceAdapter
|
|
14
|
+
|
|
15
|
+
adapter = OCIInferenceAdapter(config=config)
|
|
16
|
+
await adapter.initialize()
|
|
17
|
+
return adapter
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from collections.abc import Generator, Mapping
|
|
8
|
+
from typing import Any, override
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
import oci
|
|
12
|
+
import requests
|
|
13
|
+
from oci.config import DEFAULT_LOCATION, DEFAULT_PROFILE
|
|
14
|
+
|
|
15
|
+
OciAuthSigner = type[oci.signer.AbstractBaseSigner]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class HttpxOciAuth(httpx.Auth):
|
|
19
|
+
"""
|
|
20
|
+
Custom HTTPX authentication class that implements OCI request signing.
|
|
21
|
+
|
|
22
|
+
This class handles the authentication flow for HTTPX requests by signing them
|
|
23
|
+
using the OCI Signer, which adds the necessary authentication headers for
|
|
24
|
+
OCI API calls.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
signer (oci.signer.Signer): The OCI signer instance used for request signing
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, signer: OciAuthSigner):
|
|
31
|
+
self.signer = signer
|
|
32
|
+
|
|
33
|
+
@override
|
|
34
|
+
def auth_flow(self, request: httpx.Request) -> Generator[httpx.Request, httpx.Response, None]:
|
|
35
|
+
# Read the request content to handle streaming requests properly
|
|
36
|
+
try:
|
|
37
|
+
content = request.content
|
|
38
|
+
except httpx.RequestNotRead:
|
|
39
|
+
# For streaming requests, we need to read the content first
|
|
40
|
+
content = request.read()
|
|
41
|
+
|
|
42
|
+
req = requests.Request(
|
|
43
|
+
method=request.method,
|
|
44
|
+
url=str(request.url),
|
|
45
|
+
headers=dict(request.headers),
|
|
46
|
+
data=content,
|
|
47
|
+
)
|
|
48
|
+
prepared_request = req.prepare()
|
|
49
|
+
|
|
50
|
+
# Sign the request using the OCI Signer
|
|
51
|
+
self.signer.do_request_sign(prepared_request) # type: ignore
|
|
52
|
+
|
|
53
|
+
# Update the original HTTPX request with the signed headers
|
|
54
|
+
request.headers.update(prepared_request.headers)
|
|
55
|
+
|
|
56
|
+
yield request
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class OciInstancePrincipalAuth(HttpxOciAuth):
|
|
60
|
+
def __init__(self, **kwargs: Mapping[str, Any]):
|
|
61
|
+
self.signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner(**kwargs)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class OciUserPrincipalAuth(HttpxOciAuth):
|
|
65
|
+
def __init__(self, config_file: str = DEFAULT_LOCATION, profile_name: str = DEFAULT_PROFILE):
|
|
66
|
+
config = oci.config.from_file(config_file, profile_name)
|
|
67
|
+
oci.config.validate_config(config) # type: ignore
|
|
68
|
+
key_content = ""
|
|
69
|
+
with open(config["key_file"]) as f:
|
|
70
|
+
key_content = f.read()
|
|
71
|
+
|
|
72
|
+
self.signer = oci.signer.Signer(
|
|
73
|
+
tenancy=config["tenancy"],
|
|
74
|
+
user=config["user"],
|
|
75
|
+
fingerprint=config["fingerprint"],
|
|
76
|
+
private_key_file_location=config.get("key_file"),
|
|
77
|
+
pass_phrase="none", # type: ignore
|
|
78
|
+
private_key_content=key_content,
|
|
79
|
+
)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
13
|
+
from llama_stack_api import json_schema_type
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class OCIProviderDataValidator(BaseModel):
|
|
17
|
+
oci_auth_type: str = Field(
|
|
18
|
+
description="OCI authentication type (must be one of: instance_principal, config_file)",
|
|
19
|
+
)
|
|
20
|
+
oci_region: str = Field(
|
|
21
|
+
description="OCI region (e.g., us-ashburn-1)",
|
|
22
|
+
)
|
|
23
|
+
oci_compartment_id: str = Field(
|
|
24
|
+
description="OCI compartment ID for the Generative AI service",
|
|
25
|
+
)
|
|
26
|
+
oci_config_file_path: str | None = Field(
|
|
27
|
+
default="~/.oci/config",
|
|
28
|
+
description="OCI config file path (required if oci_auth_type is config_file)",
|
|
29
|
+
)
|
|
30
|
+
oci_config_profile: str | None = Field(
|
|
31
|
+
default="DEFAULT",
|
|
32
|
+
description="OCI config profile (required if oci_auth_type is config_file)",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@json_schema_type
|
|
37
|
+
class OCIConfig(RemoteInferenceProviderConfig):
|
|
38
|
+
oci_auth_type: str = Field(
|
|
39
|
+
description="OCI authentication type (must be one of: instance_principal, config_file)",
|
|
40
|
+
default_factory=lambda: os.getenv("OCI_AUTH_TYPE", "instance_principal"),
|
|
41
|
+
)
|
|
42
|
+
oci_region: str = Field(
|
|
43
|
+
default_factory=lambda: os.getenv("OCI_REGION", "us-ashburn-1"),
|
|
44
|
+
description="OCI region (e.g., us-ashburn-1)",
|
|
45
|
+
)
|
|
46
|
+
oci_compartment_id: str = Field(
|
|
47
|
+
default_factory=lambda: os.getenv("OCI_COMPARTMENT_OCID", ""),
|
|
48
|
+
description="OCI compartment ID for the Generative AI service",
|
|
49
|
+
)
|
|
50
|
+
oci_config_file_path: str = Field(
|
|
51
|
+
default_factory=lambda: os.getenv("OCI_CONFIG_FILE_PATH", "~/.oci/config"),
|
|
52
|
+
description="OCI config file path (required if oci_auth_type is config_file)",
|
|
53
|
+
)
|
|
54
|
+
oci_config_profile: str = Field(
|
|
55
|
+
default_factory=lambda: os.getenv("OCI_CLI_PROFILE", "DEFAULT"),
|
|
56
|
+
description="OCI config profile (required if oci_auth_type is config_file)",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def sample_run_config(
|
|
61
|
+
cls,
|
|
62
|
+
oci_auth_type: str = "${env.OCI_AUTH_TYPE:=instance_principal}",
|
|
63
|
+
oci_config_file_path: str = "${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}",
|
|
64
|
+
oci_config_profile: str = "${env.OCI_CLI_PROFILE:=DEFAULT}",
|
|
65
|
+
oci_region: str = "${env.OCI_REGION:=us-ashburn-1}",
|
|
66
|
+
oci_compartment_id: str = "${env.OCI_COMPARTMENT_OCID:=}",
|
|
67
|
+
**kwargs,
|
|
68
|
+
) -> dict[str, Any]:
|
|
69
|
+
return {
|
|
70
|
+
"oci_auth_type": oci_auth_type,
|
|
71
|
+
"oci_config_file_path": oci_config_file_path,
|
|
72
|
+
"oci_config_profile": oci_config_profile,
|
|
73
|
+
"oci_region": oci_region,
|
|
74
|
+
"oci_compartment_id": oci_compartment_id,
|
|
75
|
+
}
|