llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
from collections.abc import Iterable
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
import oci
|
|
13
|
+
from oci.generative_ai.generative_ai_client import GenerativeAiClient
|
|
14
|
+
from oci.generative_ai.models import ModelCollection
|
|
15
|
+
from openai._base_client import DefaultAsyncHttpxClient
|
|
16
|
+
|
|
17
|
+
from llama_stack.log import get_logger
|
|
18
|
+
from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
|
|
19
|
+
from llama_stack.providers.remote.inference.oci.config import OCIConfig
|
|
20
|
+
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
21
|
+
from llama_stack_api import Model, ModelType
|
|
22
|
+
|
|
23
|
+
logger = get_logger(name=__name__, category="inference::oci")
|
|
24
|
+
|
|
25
|
+
OCI_AUTH_TYPE_INSTANCE_PRINCIPAL = "instance_principal"
|
|
26
|
+
OCI_AUTH_TYPE_CONFIG_FILE = "config_file"
|
|
27
|
+
VALID_OCI_AUTH_TYPES = [OCI_AUTH_TYPE_INSTANCE_PRINCIPAL, OCI_AUTH_TYPE_CONFIG_FILE]
|
|
28
|
+
DEFAULT_OCI_REGION = "us-ashburn-1"
|
|
29
|
+
|
|
30
|
+
MODEL_CAPABILITIES = ["TEXT_GENERATION", "TEXT_SUMMARIZATION", "TEXT_EMBEDDINGS", "CHAT"]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class OCIInferenceAdapter(OpenAIMixin):
|
|
34
|
+
config: OCIConfig
|
|
35
|
+
|
|
36
|
+
embedding_models: list[str] = []
|
|
37
|
+
|
|
38
|
+
async def initialize(self) -> None:
|
|
39
|
+
"""Initialize and validate OCI configuration."""
|
|
40
|
+
if self.config.oci_auth_type not in VALID_OCI_AUTH_TYPES:
|
|
41
|
+
raise ValueError(
|
|
42
|
+
f"Invalid OCI authentication type: {self.config.oci_auth_type}."
|
|
43
|
+
f"Valid types are one of: {VALID_OCI_AUTH_TYPES}"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
if not self.config.oci_compartment_id:
|
|
47
|
+
raise ValueError("OCI_COMPARTMENT_OCID is a required parameter. Either set in env variable or config.")
|
|
48
|
+
|
|
49
|
+
def get_base_url(self) -> str:
|
|
50
|
+
region = self.config.oci_region or DEFAULT_OCI_REGION
|
|
51
|
+
return f"https://inference.generativeai.{region}.oci.oraclecloud.com/20231130/actions/v1"
|
|
52
|
+
|
|
53
|
+
def get_api_key(self) -> str | None:
|
|
54
|
+
# OCI doesn't use API keys, it uses request signing
|
|
55
|
+
return "<NOTUSED>"
|
|
56
|
+
|
|
57
|
+
def get_extra_client_params(self) -> dict[str, Any]:
|
|
58
|
+
"""
|
|
59
|
+
Get extra parameters for the AsyncOpenAI client, including OCI-specific auth and headers.
|
|
60
|
+
"""
|
|
61
|
+
auth = self._get_auth()
|
|
62
|
+
compartment_id = self.config.oci_compartment_id or ""
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
"http_client": DefaultAsyncHttpxClient(
|
|
66
|
+
auth=auth,
|
|
67
|
+
headers={
|
|
68
|
+
"CompartmentId": compartment_id,
|
|
69
|
+
},
|
|
70
|
+
),
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
def _get_oci_signer(self) -> oci.signer.AbstractBaseSigner | None:
|
|
74
|
+
if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
|
|
75
|
+
return oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
def _get_oci_config(self) -> dict:
|
|
79
|
+
if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
|
|
80
|
+
config = {"region": self.config.oci_region}
|
|
81
|
+
elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE:
|
|
82
|
+
config = oci.config.from_file(self.config.oci_config_file_path, self.config.oci_config_profile)
|
|
83
|
+
if not config.get("region"):
|
|
84
|
+
raise ValueError(
|
|
85
|
+
"Region not specified in config. Please specify in config or with OCI_REGION env variable."
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
return config
|
|
89
|
+
|
|
90
|
+
def _get_auth(self) -> httpx.Auth:
|
|
91
|
+
if self.config.oci_auth_type == OCI_AUTH_TYPE_INSTANCE_PRINCIPAL:
|
|
92
|
+
return OciInstancePrincipalAuth()
|
|
93
|
+
elif self.config.oci_auth_type == OCI_AUTH_TYPE_CONFIG_FILE:
|
|
94
|
+
return OciUserPrincipalAuth(
|
|
95
|
+
config_file=self.config.oci_config_file_path, profile_name=self.config.oci_config_profile
|
|
96
|
+
)
|
|
97
|
+
else:
|
|
98
|
+
raise ValueError(f"Invalid OCI authentication type: {self.config.oci_auth_type}")
|
|
99
|
+
|
|
100
|
+
async def list_provider_model_ids(self) -> Iterable[str]:
|
|
101
|
+
"""
|
|
102
|
+
List available models from OCI Generative AI service.
|
|
103
|
+
"""
|
|
104
|
+
oci_config = self._get_oci_config()
|
|
105
|
+
oci_signer = self._get_oci_signer()
|
|
106
|
+
compartment_id = self.config.oci_compartment_id or ""
|
|
107
|
+
|
|
108
|
+
if oci_signer is None:
|
|
109
|
+
client = GenerativeAiClient(config=oci_config)
|
|
110
|
+
else:
|
|
111
|
+
client = GenerativeAiClient(config=oci_config, signer=oci_signer)
|
|
112
|
+
|
|
113
|
+
models: ModelCollection = client.list_models(
|
|
114
|
+
compartment_id=compartment_id,
|
|
115
|
+
# capability=MODEL_CAPABILITIES,
|
|
116
|
+
lifecycle_state="ACTIVE",
|
|
117
|
+
).data
|
|
118
|
+
|
|
119
|
+
seen_models = set()
|
|
120
|
+
model_ids = []
|
|
121
|
+
for model in models.items:
|
|
122
|
+
if model.time_deprecated or model.time_on_demand_retired:
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
if "UNKNOWN_ENUM_VALUE" in model.capabilities or "FINE_TUNE" in model.capabilities:
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
# Use display_name + model_type as the key to avoid conflicts
|
|
129
|
+
model_key = (model.display_name, ModelType.llm)
|
|
130
|
+
if model_key in seen_models:
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
seen_models.add(model_key)
|
|
134
|
+
model_ids.append(model.display_name)
|
|
135
|
+
|
|
136
|
+
if "TEXT_EMBEDDINGS" in model.capabilities:
|
|
137
|
+
self.embedding_models.append(model.display_name)
|
|
138
|
+
|
|
139
|
+
return model_ids
|
|
140
|
+
|
|
141
|
+
def construct_model_from_identifier(self, identifier: str) -> Model:
|
|
142
|
+
"""
|
|
143
|
+
Construct a Model instance corresponding to the given identifier
|
|
144
|
+
|
|
145
|
+
Child classes can override this to customize model typing/metadata.
|
|
146
|
+
|
|
147
|
+
:param identifier: The provider's model identifier
|
|
148
|
+
:return: A Model instance
|
|
149
|
+
"""
|
|
150
|
+
if identifier in self.embedding_models:
|
|
151
|
+
return Model(
|
|
152
|
+
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
|
153
|
+
provider_resource_id=identifier,
|
|
154
|
+
identifier=identifier,
|
|
155
|
+
model_type=ModelType.embedding,
|
|
156
|
+
)
|
|
157
|
+
return Model(
|
|
158
|
+
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
|
159
|
+
provider_resource_id=identifier,
|
|
160
|
+
identifier=identifier,
|
|
161
|
+
model_type=ModelType.llm,
|
|
162
|
+
)
|
|
@@ -6,20 +6,22 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from pydantic import Field, SecretStr
|
|
9
|
+
from pydantic import Field, HttpUrl, SecretStr
|
|
10
10
|
|
|
11
11
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
12
|
|
|
13
|
-
DEFAULT_OLLAMA_URL = "http://localhost:11434"
|
|
13
|
+
DEFAULT_OLLAMA_URL = "http://localhost:11434/v1"
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class OllamaImplConfig(RemoteInferenceProviderConfig):
|
|
17
17
|
auth_credential: SecretStr | None = Field(default=None, exclude=True)
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
base_url: HttpUrl | None = Field(default=HttpUrl(DEFAULT_OLLAMA_URL))
|
|
20
20
|
|
|
21
21
|
@classmethod
|
|
22
|
-
def sample_run_config(
|
|
22
|
+
def sample_run_config(
|
|
23
|
+
cls, base_url: str = "${env.OLLAMA_URL:=http://localhost:11434/v1}", **kwargs
|
|
24
|
+
) -> dict[str, Any]:
|
|
23
25
|
return {
|
|
24
|
-
"
|
|
26
|
+
"base_url": base_url,
|
|
25
27
|
}
|
|
@@ -9,15 +9,15 @@ import asyncio
|
|
|
9
9
|
|
|
10
10
|
from ollama import AsyncClient as AsyncOllamaClient
|
|
11
11
|
|
|
12
|
-
from llama_stack.apis.common.errors import UnsupportedModelError
|
|
13
|
-
from llama_stack.apis.models import Model
|
|
14
12
|
from llama_stack.log import get_logger
|
|
15
|
-
from llama_stack.providers.
|
|
13
|
+
from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
|
|
14
|
+
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
15
|
+
from llama_stack_api import (
|
|
16
16
|
HealthResponse,
|
|
17
17
|
HealthStatus,
|
|
18
|
+
Model,
|
|
19
|
+
UnsupportedModelError,
|
|
18
20
|
)
|
|
19
|
-
from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
|
|
20
|
-
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
21
21
|
|
|
22
22
|
logger = get_logger(name=__name__, category="inference::ollama")
|
|
23
23
|
|
|
@@ -28,6 +28,9 @@ class OllamaInferenceAdapter(OpenAIMixin):
|
|
|
28
28
|
# automatically set by the resolver when instantiating the provider
|
|
29
29
|
__provider_id__: str
|
|
30
30
|
|
|
31
|
+
# Ollama does not support the stream_options parameter
|
|
32
|
+
supports_stream_options: bool = False
|
|
33
|
+
|
|
31
34
|
embedding_model_metadata: dict[str, dict[str, int]] = {
|
|
32
35
|
"all-minilm:l6-v2": {
|
|
33
36
|
"embedding_dimension": 384,
|
|
@@ -55,17 +58,23 @@ class OllamaInferenceAdapter(OpenAIMixin):
|
|
|
55
58
|
# ollama client attaches itself to the current event loop (sadly?)
|
|
56
59
|
loop = asyncio.get_running_loop()
|
|
57
60
|
if loop not in self._clients:
|
|
58
|
-
|
|
61
|
+
# Ollama client expects base URL without /v1 suffix
|
|
62
|
+
base_url_str = str(self.config.base_url)
|
|
63
|
+
if base_url_str.endswith("/v1"):
|
|
64
|
+
host = base_url_str[:-3]
|
|
65
|
+
else:
|
|
66
|
+
host = base_url_str
|
|
67
|
+
self._clients[loop] = AsyncOllamaClient(host=host)
|
|
59
68
|
return self._clients[loop]
|
|
60
69
|
|
|
61
70
|
def get_api_key(self):
|
|
62
71
|
return "NO KEY REQUIRED"
|
|
63
72
|
|
|
64
73
|
def get_base_url(self):
|
|
65
|
-
return self.config.
|
|
74
|
+
return str(self.config.base_url)
|
|
66
75
|
|
|
67
76
|
async def initialize(self) -> None:
|
|
68
|
-
logger.info(f"checking connectivity to Ollama at `{self.config.
|
|
77
|
+
logger.info(f"checking connectivity to Ollama at `{self.config.base_url}`...")
|
|
69
78
|
r = await self.health()
|
|
70
79
|
if r["status"] == HealthStatus.ERROR:
|
|
71
80
|
logger.warning(
|
|
@@ -6,10 +6,10 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel, Field
|
|
9
|
+
from pydantic import BaseModel, Field, HttpUrl
|
|
10
10
|
|
|
11
11
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class OpenAIProviderDataValidator(BaseModel):
|
|
@@ -21,8 +21,8 @@ class OpenAIProviderDataValidator(BaseModel):
|
|
|
21
21
|
|
|
22
22
|
@json_schema_type
|
|
23
23
|
class OpenAIConfig(RemoteInferenceProviderConfig):
|
|
24
|
-
base_url:
|
|
25
|
-
default="https://api.openai.com/v1",
|
|
24
|
+
base_url: HttpUrl | None = Field(
|
|
25
|
+
default=HttpUrl("https://api.openai.com/v1"),
|
|
26
26
|
description="Base URL for OpenAI API",
|
|
27
27
|
)
|
|
28
28
|
|
|
@@ -10,8 +10,8 @@ from .config import PassthroughImplConfig
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class PassthroughProviderDataValidator(BaseModel):
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
passthrough_url: str
|
|
14
|
+
passthrough_api_key: str
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
async def get_adapter_impl(config: PassthroughImplConfig, _deps):
|
|
@@ -6,29 +6,24 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from pydantic import Field,
|
|
9
|
+
from pydantic import Field, HttpUrl
|
|
10
10
|
|
|
11
11
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@json_schema_type
|
|
16
16
|
class PassthroughImplConfig(RemoteInferenceProviderConfig):
|
|
17
|
-
|
|
17
|
+
base_url: HttpUrl | None = Field(
|
|
18
18
|
default=None,
|
|
19
19
|
description="The URL for the passthrough endpoint",
|
|
20
20
|
)
|
|
21
21
|
|
|
22
|
-
api_key: SecretStr | None = Field(
|
|
23
|
-
default=None,
|
|
24
|
-
description="API Key for the passthrouth endpoint",
|
|
25
|
-
)
|
|
26
|
-
|
|
27
22
|
@classmethod
|
|
28
23
|
def sample_run_config(
|
|
29
|
-
cls,
|
|
24
|
+
cls, base_url: HttpUrl | None = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
|
|
30
25
|
) -> dict[str, Any]:
|
|
31
26
|
return {
|
|
32
|
-
"
|
|
27
|
+
"base_url": base_url,
|
|
33
28
|
"api_key": api_key,
|
|
34
29
|
}
|
|
@@ -5,12 +5,14 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
from collections.abc import AsyncIterator
|
|
8
|
-
from typing import Any
|
|
9
8
|
|
|
10
|
-
from
|
|
9
|
+
from openai import AsyncOpenAI
|
|
11
10
|
|
|
12
|
-
from llama_stack.
|
|
11
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
12
|
+
from llama_stack.providers.utils.inference.stream_utils import wrap_async_stream
|
|
13
|
+
from llama_stack_api import (
|
|
13
14
|
Inference,
|
|
15
|
+
Model,
|
|
14
16
|
OpenAIChatCompletion,
|
|
15
17
|
OpenAIChatCompletionChunk,
|
|
16
18
|
OpenAIChatCompletionRequestWithExtraBody,
|
|
@@ -19,104 +21,124 @@ from llama_stack.apis.inference import (
|
|
|
19
21
|
OpenAIEmbeddingsRequestWithExtraBody,
|
|
20
22
|
OpenAIEmbeddingsResponse,
|
|
21
23
|
)
|
|
22
|
-
from llama_stack.apis.models import Model
|
|
23
|
-
from llama_stack.core.library_client import convert_pydantic_to_json_value
|
|
24
|
-
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
|
25
24
|
|
|
26
25
|
from .config import PassthroughImplConfig
|
|
27
26
|
|
|
28
27
|
|
|
29
|
-
class PassthroughInferenceAdapter(Inference):
|
|
28
|
+
class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
|
|
30
29
|
def __init__(self, config: PassthroughImplConfig) -> None:
|
|
31
|
-
ModelRegistryHelper.__init__(self)
|
|
32
30
|
self.config = config
|
|
33
31
|
|
|
32
|
+
async def initialize(self) -> None:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
async def shutdown(self) -> None:
|
|
36
|
+
pass
|
|
37
|
+
|
|
34
38
|
async def unregister_model(self, model_id: str) -> None:
|
|
35
39
|
pass
|
|
36
40
|
|
|
37
41
|
async def register_model(self, model: Model) -> Model:
|
|
38
42
|
return model
|
|
39
43
|
|
|
40
|
-
def
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
44
|
+
async def list_models(self) -> list[Model]:
|
|
45
|
+
"""List models by calling the downstream /v1/models endpoint."""
|
|
46
|
+
client = self._get_openai_client()
|
|
47
|
+
|
|
48
|
+
response = await client.models.list()
|
|
49
|
+
|
|
50
|
+
# Convert from OpenAI format to Llama Stack Model format
|
|
51
|
+
models = []
|
|
52
|
+
for model_data in response.data:
|
|
53
|
+
downstream_model_id = model_data.id
|
|
54
|
+
custom_metadata = getattr(model_data, "custom_metadata", {}) or {}
|
|
55
|
+
|
|
56
|
+
# Prefix identifier with provider ID for local registry
|
|
57
|
+
local_identifier = f"{self.__provider_id__}/{downstream_model_id}"
|
|
58
|
+
|
|
59
|
+
model = Model(
|
|
60
|
+
identifier=local_identifier,
|
|
61
|
+
provider_id=self.__provider_id__,
|
|
62
|
+
provider_resource_id=downstream_model_id,
|
|
63
|
+
model_type=custom_metadata.get("model_type", "llm"),
|
|
64
|
+
metadata=custom_metadata,
|
|
65
|
+
)
|
|
66
|
+
models.append(model)
|
|
67
|
+
|
|
68
|
+
return models
|
|
69
|
+
|
|
70
|
+
async def should_refresh_models(self) -> bool:
|
|
71
|
+
"""Passthrough should refresh models since they come from downstream dynamically."""
|
|
72
|
+
return self.config.refresh_models
|
|
73
|
+
|
|
74
|
+
def _get_openai_client(self) -> AsyncOpenAI:
|
|
75
|
+
"""Get an AsyncOpenAI client configured for the downstream server."""
|
|
76
|
+
base_url = self._get_passthrough_url()
|
|
77
|
+
api_key = self._get_passthrough_api_key()
|
|
78
|
+
|
|
79
|
+
return AsyncOpenAI(
|
|
80
|
+
base_url=f"{base_url.rstrip('/')}/v1",
|
|
81
|
+
api_key=api_key,
|
|
69
82
|
)
|
|
70
83
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
84
|
+
def _get_passthrough_url(self) -> str:
|
|
85
|
+
"""Get the passthrough URL from config or provider data."""
|
|
86
|
+
if self.config.base_url is not None:
|
|
87
|
+
return str(self.config.base_url)
|
|
88
|
+
|
|
89
|
+
provider_data = self.get_request_provider_data()
|
|
90
|
+
if provider_data is None:
|
|
91
|
+
raise ValueError(
|
|
92
|
+
'Pass url of the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_url": <your passthrough url>}'
|
|
93
|
+
)
|
|
94
|
+
return provider_data.passthrough_url
|
|
95
|
+
|
|
96
|
+
def _get_passthrough_api_key(self) -> str:
|
|
97
|
+
"""Get the passthrough API key from config or provider data."""
|
|
98
|
+
if self.config.auth_credential is not None:
|
|
99
|
+
return self.config.auth_credential.get_secret_value()
|
|
100
|
+
|
|
101
|
+
provider_data = self.get_request_provider_data()
|
|
102
|
+
if provider_data is None:
|
|
103
|
+
raise ValueError(
|
|
104
|
+
'Pass API Key for the passthrough endpoint in the header X-LlamaStack-Provider-Data as { "passthrough_api_key": <your api key>}'
|
|
105
|
+
)
|
|
106
|
+
return provider_data.passthrough_api_key
|
|
76
107
|
|
|
77
108
|
async def openai_completion(
|
|
78
109
|
self,
|
|
79
110
|
params: OpenAICompletionRequestWithExtraBody,
|
|
80
|
-
) -> OpenAICompletion:
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
params = params.model_copy()
|
|
85
|
-
params.model = model_obj.provider_resource_id
|
|
86
|
-
|
|
111
|
+
) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
|
|
112
|
+
"""Forward completion request to downstream using OpenAI client."""
|
|
113
|
+
client = self._get_openai_client()
|
|
87
114
|
request_params = params.model_dump(exclude_none=True)
|
|
115
|
+
response = await client.completions.create(**request_params)
|
|
116
|
+
|
|
117
|
+
if params.stream:
|
|
118
|
+
return wrap_async_stream(response)
|
|
88
119
|
|
|
89
|
-
return
|
|
120
|
+
return response # type: ignore[return-value]
|
|
90
121
|
|
|
91
122
|
async def openai_chat_completion(
|
|
92
123
|
self,
|
|
93
124
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
|
94
125
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
|
95
|
-
|
|
96
|
-
|
|
126
|
+
"""Forward chat completion request to downstream using OpenAI client."""
|
|
127
|
+
client = self._get_openai_client()
|
|
128
|
+
request_params = params.model_dump(exclude_none=True)
|
|
129
|
+
response = await client.chat.completions.create(**request_params)
|
|
97
130
|
|
|
98
|
-
|
|
99
|
-
|
|
131
|
+
if params.stream:
|
|
132
|
+
return wrap_async_stream(response)
|
|
100
133
|
|
|
101
|
-
|
|
134
|
+
return response # type: ignore[return-value]
|
|
102
135
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
json_input = [x for x in json_input if x is not None]
|
|
113
|
-
new_input = []
|
|
114
|
-
for x in json_input:
|
|
115
|
-
if isinstance(x, dict):
|
|
116
|
-
x = {k: v for k, v in x.items() if v is not None}
|
|
117
|
-
new_input.append(x)
|
|
118
|
-
json_input = new_input
|
|
119
|
-
|
|
120
|
-
json_params[key] = json_input
|
|
121
|
-
|
|
122
|
-
return json_params
|
|
136
|
+
async def openai_embeddings(
|
|
137
|
+
self,
|
|
138
|
+
params: OpenAIEmbeddingsRequestWithExtraBody,
|
|
139
|
+
) -> OpenAIEmbeddingsResponse:
|
|
140
|
+
"""Forward embeddings request to downstream using OpenAI client."""
|
|
141
|
+
client = self._get_openai_client()
|
|
142
|
+
request_params = params.model_dump(exclude_none=True)
|
|
143
|
+
response = await client.embeddings.create(**request_params)
|
|
144
|
+
return response # type: ignore
|
|
@@ -6,15 +6,22 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from pydantic import Field, SecretStr
|
|
9
|
+
from pydantic import BaseModel, Field, HttpUrl, SecretStr
|
|
10
10
|
|
|
11
11
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RunpodProviderDataValidator(BaseModel):
|
|
16
|
+
runpod_api_token: str | None = Field(
|
|
17
|
+
default=None,
|
|
18
|
+
description="API token for RunPod models",
|
|
19
|
+
)
|
|
13
20
|
|
|
14
21
|
|
|
15
22
|
@json_schema_type
|
|
16
23
|
class RunpodImplConfig(RemoteInferenceProviderConfig):
|
|
17
|
-
|
|
24
|
+
base_url: HttpUrl | None = Field(
|
|
18
25
|
default=None,
|
|
19
26
|
description="The URL for the Runpod model serving endpoint",
|
|
20
27
|
)
|
|
@@ -27,6 +34,6 @@ class RunpodImplConfig(RemoteInferenceProviderConfig):
|
|
|
27
34
|
@classmethod
|
|
28
35
|
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
|
29
36
|
return {
|
|
30
|
-
"
|
|
31
|
-
"api_token": "${env.RUNPOD_API_TOKEN}",
|
|
37
|
+
"base_url": "${env.RUNPOD_URL:=}",
|
|
38
|
+
"api_token": "${env.RUNPOD_API_TOKEN:=}",
|
|
32
39
|
}
|
|
@@ -4,13 +4,6 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from collections.abc import AsyncIterator
|
|
8
|
-
|
|
9
|
-
from llama_stack.apis.inference import (
|
|
10
|
-
OpenAIChatCompletion,
|
|
11
|
-
OpenAIChatCompletionChunk,
|
|
12
|
-
OpenAIChatCompletionRequestWithExtraBody,
|
|
13
|
-
)
|
|
14
7
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
15
8
|
|
|
16
9
|
from .config import RunpodImplConfig
|
|
@@ -24,19 +17,8 @@ class RunpodInferenceAdapter(OpenAIMixin):
|
|
|
24
17
|
"""
|
|
25
18
|
|
|
26
19
|
config: RunpodImplConfig
|
|
20
|
+
provider_data_api_key_field: str = "runpod_api_token"
|
|
27
21
|
|
|
28
22
|
def get_base_url(self) -> str:
|
|
29
23
|
"""Get base URL for OpenAI client."""
|
|
30
|
-
return self.config.
|
|
31
|
-
|
|
32
|
-
async def openai_chat_completion(
|
|
33
|
-
self,
|
|
34
|
-
params: OpenAIChatCompletionRequestWithExtraBody,
|
|
35
|
-
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
|
36
|
-
"""Override to add RunPod-specific stream_options requirement."""
|
|
37
|
-
params = params.model_copy()
|
|
38
|
-
|
|
39
|
-
if params.stream and not params.stream_options:
|
|
40
|
-
params.stream_options = {"include_usage": True}
|
|
41
|
-
|
|
42
|
-
return await super().openai_chat_completion(params)
|
|
24
|
+
return str(self.config.base_url)
|
|
@@ -6,10 +6,10 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel, Field
|
|
9
|
+
from pydantic import BaseModel, Field, HttpUrl
|
|
10
10
|
|
|
11
11
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import json_schema_type
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class SambaNovaProviderDataValidator(BaseModel):
|
|
@@ -21,14 +21,14 @@ class SambaNovaProviderDataValidator(BaseModel):
|
|
|
21
21
|
|
|
22
22
|
@json_schema_type
|
|
23
23
|
class SambaNovaImplConfig(RemoteInferenceProviderConfig):
|
|
24
|
-
|
|
25
|
-
default="https://api.sambanova.ai/v1",
|
|
24
|
+
base_url: HttpUrl | None = Field(
|
|
25
|
+
default=HttpUrl("https://api.sambanova.ai/v1"),
|
|
26
26
|
description="The URL for the SambaNova AI server",
|
|
27
27
|
)
|
|
28
28
|
|
|
29
29
|
@classmethod
|
|
30
30
|
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
|
31
31
|
return {
|
|
32
|
-
"
|
|
32
|
+
"base_url": "https://api.sambanova.ai/v1",
|
|
33
33
|
"api_key": api_key,
|
|
34
34
|
}
|