llama-stack 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +12 -21
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -131
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.4.dist-info/RECORD +0 -625
- llama_stack-0.3.4.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -8,20 +8,19 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
from sqlalchemy.exc import IntegrityError
|
|
10
10
|
|
|
11
|
-
from llama_stack.
|
|
11
|
+
from llama_stack.core.datatypes import AccessRule
|
|
12
|
+
from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
|
|
13
|
+
from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
14
|
+
from llama_stack.core.storage.sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
|
|
15
|
+
from llama_stack.log import get_logger
|
|
16
|
+
from llama_stack_api import (
|
|
12
17
|
ListOpenAIChatCompletionResponse,
|
|
13
18
|
OpenAIChatCompletion,
|
|
14
19
|
OpenAICompletionWithInputMessages,
|
|
15
20
|
OpenAIMessageParam,
|
|
16
21
|
Order,
|
|
17
22
|
)
|
|
18
|
-
from
|
|
19
|
-
from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
|
|
20
|
-
from llama_stack.log import get_logger
|
|
21
|
-
|
|
22
|
-
from ..sqlstore.api import ColumnDefinition, ColumnType
|
|
23
|
-
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
24
|
-
from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
|
|
23
|
+
from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
|
|
25
24
|
|
|
26
25
|
logger = get_logger(name=__name__, category="inference")
|
|
27
26
|
|
|
@@ -56,7 +55,7 @@ class InferenceStore:
|
|
|
56
55
|
logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
|
|
57
56
|
|
|
58
57
|
await self.sql_store.create_table(
|
|
59
|
-
|
|
58
|
+
self.reference.table_name,
|
|
60
59
|
{
|
|
61
60
|
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
|
|
62
61
|
"created": ColumnType.INTEGER,
|
|
@@ -66,14 +65,6 @@ class InferenceStore:
|
|
|
66
65
|
},
|
|
67
66
|
)
|
|
68
67
|
|
|
69
|
-
if self.enable_write_queue:
|
|
70
|
-
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
|
|
71
|
-
for _ in range(self._num_writers):
|
|
72
|
-
self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
|
|
73
|
-
logger.debug(
|
|
74
|
-
f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
|
|
75
|
-
)
|
|
76
|
-
|
|
77
68
|
async def shutdown(self) -> None:
|
|
78
69
|
if not self._worker_tasks:
|
|
79
70
|
return
|
|
@@ -161,7 +152,7 @@ class InferenceStore:
|
|
|
161
152
|
|
|
162
153
|
try:
|
|
163
154
|
await self.sql_store.insert(
|
|
164
|
-
table=
|
|
155
|
+
table=self.reference.table_name,
|
|
165
156
|
data=record_data,
|
|
166
157
|
)
|
|
167
158
|
except IntegrityError as e:
|
|
@@ -173,7 +164,7 @@ class InferenceStore:
|
|
|
173
164
|
error_message = str(e.orig) if e.orig else str(e)
|
|
174
165
|
if self._is_unique_constraint_error(error_message):
|
|
175
166
|
# Update the existing record instead
|
|
176
|
-
await self.sql_store.update(table=
|
|
167
|
+
await self.sql_store.update(table=self.reference.table_name, data=record_data, where={"id": data["id"]})
|
|
177
168
|
else:
|
|
178
169
|
# Re-raise if it's not a unique constraint error
|
|
179
170
|
raise
|
|
@@ -217,7 +208,7 @@ class InferenceStore:
|
|
|
217
208
|
where_conditions["model"] = model
|
|
218
209
|
|
|
219
210
|
paginated_result = await self.sql_store.fetch_all(
|
|
220
|
-
table=
|
|
211
|
+
table=self.reference.table_name,
|
|
221
212
|
where=where_conditions if where_conditions else None,
|
|
222
213
|
order_by=[("created", order.value)],
|
|
223
214
|
cursor=("id", after) if after else None,
|
|
@@ -246,7 +237,7 @@ class InferenceStore:
|
|
|
246
237
|
raise ValueError("Inference store is not initialized")
|
|
247
238
|
|
|
248
239
|
row = await self.sql_store.fetch_one(
|
|
249
|
-
table=
|
|
240
|
+
table=self.reference.table_name,
|
|
250
241
|
where={"id": completion_id},
|
|
251
242
|
)
|
|
252
243
|
|
|
@@ -7,13 +7,20 @@
|
|
|
7
7
|
import base64
|
|
8
8
|
import struct
|
|
9
9
|
from collections.abc import AsyncIterator
|
|
10
|
+
from typing import Any
|
|
10
11
|
|
|
11
12
|
import litellm
|
|
12
13
|
|
|
13
|
-
from llama_stack.
|
|
14
|
-
|
|
14
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
15
|
+
from llama_stack.log import get_logger
|
|
16
|
+
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
|
|
17
|
+
from llama_stack.providers.utils.inference.openai_compat import (
|
|
18
|
+
get_stream_options_for_telemetry,
|
|
19
|
+
prepare_openai_completion_params,
|
|
20
|
+
)
|
|
21
|
+
from llama_stack.providers.utils.inference.stream_utils import wrap_async_stream
|
|
22
|
+
from llama_stack_api import (
|
|
15
23
|
InferenceProvider,
|
|
16
|
-
JsonSchemaResponseFormat,
|
|
17
24
|
OpenAIChatCompletion,
|
|
18
25
|
OpenAIChatCompletionChunk,
|
|
19
26
|
OpenAIChatCompletionRequestWithExtraBody,
|
|
@@ -23,16 +30,6 @@ from llama_stack.apis.inference import (
|
|
|
23
30
|
OpenAIEmbeddingsRequestWithExtraBody,
|
|
24
31
|
OpenAIEmbeddingsResponse,
|
|
25
32
|
OpenAIEmbeddingUsage,
|
|
26
|
-
ToolChoice,
|
|
27
|
-
)
|
|
28
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
29
|
-
from llama_stack.log import get_logger
|
|
30
|
-
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
|
|
31
|
-
from llama_stack.providers.utils.inference.openai_compat import (
|
|
32
|
-
convert_message_to_openai_dict_new,
|
|
33
|
-
convert_tooldef_to_openai_tool,
|
|
34
|
-
get_sampling_options,
|
|
35
|
-
prepare_openai_completion_params,
|
|
36
33
|
)
|
|
37
34
|
|
|
38
35
|
logger = get_logger(name=__name__, category="providers::utils")
|
|
@@ -55,6 +52,7 @@ class LiteLLMOpenAIMixin(
|
|
|
55
52
|
openai_compat_api_base: str | None = None,
|
|
56
53
|
download_images: bool = False,
|
|
57
54
|
json_schema_strict: bool = True,
|
|
55
|
+
supports_stream_options: bool = True,
|
|
58
56
|
):
|
|
59
57
|
"""
|
|
60
58
|
Initialize the LiteLLMOpenAIMixin.
|
|
@@ -66,6 +64,7 @@ class LiteLLMOpenAIMixin(
|
|
|
66
64
|
:param openai_compat_api_base: The base URL for OpenAI compatibility, or None if not using OpenAI compatibility.
|
|
67
65
|
:param download_images: Whether to download images and convert to base64 for message conversion.
|
|
68
66
|
:param json_schema_strict: Whether to use strict mode for JSON schema validation.
|
|
67
|
+
:param supports_stream_options: Whether the provider supports stream_options parameter.
|
|
69
68
|
"""
|
|
70
69
|
ModelRegistryHelper.__init__(self, model_entries=model_entries)
|
|
71
70
|
|
|
@@ -75,6 +74,7 @@ class LiteLLMOpenAIMixin(
|
|
|
75
74
|
self.api_base = openai_compat_api_base
|
|
76
75
|
self.download_images = download_images
|
|
77
76
|
self.json_schema_strict = json_schema_strict
|
|
77
|
+
self.supports_stream_options = supports_stream_options
|
|
78
78
|
|
|
79
79
|
if openai_compat_api_base:
|
|
80
80
|
self.is_openai_compat = True
|
|
@@ -127,59 +127,13 @@ class LiteLLMOpenAIMixin(
|
|
|
127
127
|
|
|
128
128
|
return schema
|
|
129
129
|
|
|
130
|
-
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
|
131
|
-
input_dict = {}
|
|
132
|
-
|
|
133
|
-
input_dict["messages"] = [
|
|
134
|
-
await convert_message_to_openai_dict_new(m, download_images=self.download_images) for m in request.messages
|
|
135
|
-
]
|
|
136
|
-
if fmt := request.response_format:
|
|
137
|
-
if not isinstance(fmt, JsonSchemaResponseFormat):
|
|
138
|
-
raise ValueError(
|
|
139
|
-
f"Unsupported response format: {type(fmt)}. Only JsonSchemaResponseFormat is supported."
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
fmt = fmt.json_schema
|
|
143
|
-
name = fmt["title"]
|
|
144
|
-
del fmt["title"]
|
|
145
|
-
fmt["additionalProperties"] = False
|
|
146
|
-
|
|
147
|
-
# Apply additionalProperties: False recursively to all objects
|
|
148
|
-
fmt = self._add_additional_properties_recursive(fmt)
|
|
149
|
-
|
|
150
|
-
input_dict["response_format"] = {
|
|
151
|
-
"type": "json_schema",
|
|
152
|
-
"json_schema": {
|
|
153
|
-
"name": name,
|
|
154
|
-
"schema": fmt,
|
|
155
|
-
"strict": self.json_schema_strict,
|
|
156
|
-
},
|
|
157
|
-
}
|
|
158
|
-
if request.tools:
|
|
159
|
-
input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools]
|
|
160
|
-
if request.tool_config.tool_choice:
|
|
161
|
-
input_dict["tool_choice"] = (
|
|
162
|
-
request.tool_config.tool_choice.value
|
|
163
|
-
if isinstance(request.tool_config.tool_choice, ToolChoice)
|
|
164
|
-
else request.tool_config.tool_choice
|
|
165
|
-
)
|
|
166
|
-
|
|
167
|
-
return {
|
|
168
|
-
"model": request.model,
|
|
169
|
-
"api_key": self.get_api_key(),
|
|
170
|
-
"api_base": self.api_base,
|
|
171
|
-
**input_dict,
|
|
172
|
-
"stream": request.stream,
|
|
173
|
-
**get_sampling_options(request.sampling_params),
|
|
174
|
-
}
|
|
175
|
-
|
|
176
130
|
def get_api_key(self) -> str:
|
|
177
131
|
provider_data = self.get_request_provider_data()
|
|
178
132
|
key_field = self.provider_data_api_key_field
|
|
179
|
-
if provider_data and getattr(provider_data, key_field, None):
|
|
180
|
-
api_key
|
|
181
|
-
|
|
182
|
-
|
|
133
|
+
if provider_data and key_field and (api_key := getattr(provider_data, key_field, None)):
|
|
134
|
+
return str(api_key) # type: ignore[no-any-return] # getattr returns Any, can't narrow without runtime type inspection
|
|
135
|
+
|
|
136
|
+
api_key = self.api_key_from_config
|
|
183
137
|
if not api_key:
|
|
184
138
|
raise ValueError(
|
|
185
139
|
"API key is not set. Please provide a valid API key in the "
|
|
@@ -192,7 +146,13 @@ class LiteLLMOpenAIMixin(
|
|
|
192
146
|
self,
|
|
193
147
|
params: OpenAIEmbeddingsRequestWithExtraBody,
|
|
194
148
|
) -> OpenAIEmbeddingsResponse:
|
|
149
|
+
if not self.model_store:
|
|
150
|
+
raise ValueError("Model store is not initialized")
|
|
151
|
+
|
|
195
152
|
model_obj = await self.model_store.get_model(params.model)
|
|
153
|
+
if model_obj.provider_resource_id is None:
|
|
154
|
+
raise ValueError(f"Model {params.model} has no provider_resource_id")
|
|
155
|
+
provider_resource_id = model_obj.provider_resource_id
|
|
196
156
|
|
|
197
157
|
# Convert input to list if it's a string
|
|
198
158
|
input_list = [params.input] if isinstance(params.input, str) else params.input
|
|
@@ -200,7 +160,7 @@ class LiteLLMOpenAIMixin(
|
|
|
200
160
|
# Call litellm embedding function
|
|
201
161
|
# litellm.drop_params = True
|
|
202
162
|
response = litellm.embedding(
|
|
203
|
-
model=self.get_litellm_model_name(
|
|
163
|
+
model=self.get_litellm_model_name(provider_resource_id),
|
|
204
164
|
input=input_list,
|
|
205
165
|
api_key=self.get_api_key(),
|
|
206
166
|
api_base=self.api_base,
|
|
@@ -217,18 +177,29 @@ class LiteLLMOpenAIMixin(
|
|
|
217
177
|
|
|
218
178
|
return OpenAIEmbeddingsResponse(
|
|
219
179
|
data=data,
|
|
220
|
-
model=
|
|
180
|
+
model=provider_resource_id,
|
|
221
181
|
usage=usage,
|
|
222
182
|
)
|
|
223
183
|
|
|
224
184
|
async def openai_completion(
|
|
225
185
|
self,
|
|
226
186
|
params: OpenAICompletionRequestWithExtraBody,
|
|
227
|
-
) -> OpenAICompletion:
|
|
187
|
+
) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
|
|
188
|
+
# Inject stream_options when streaming and telemetry is active
|
|
189
|
+
stream_options = get_stream_options_for_telemetry(
|
|
190
|
+
params.stream_options, params.stream, self.supports_stream_options
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
if not self.model_store:
|
|
194
|
+
raise ValueError("Model store is not initialized")
|
|
195
|
+
|
|
228
196
|
model_obj = await self.model_store.get_model(params.model)
|
|
197
|
+
if model_obj.provider_resource_id is None:
|
|
198
|
+
raise ValueError(f"Model {params.model} has no provider_resource_id")
|
|
199
|
+
provider_resource_id = model_obj.provider_resource_id
|
|
229
200
|
|
|
230
201
|
request_params = await prepare_openai_completion_params(
|
|
231
|
-
model=self.get_litellm_model_name(
|
|
202
|
+
model=self.get_litellm_model_name(provider_resource_id),
|
|
232
203
|
prompt=params.prompt,
|
|
233
204
|
best_of=params.best_of,
|
|
234
205
|
echo=params.echo,
|
|
@@ -241,34 +212,42 @@ class LiteLLMOpenAIMixin(
|
|
|
241
212
|
seed=params.seed,
|
|
242
213
|
stop=params.stop,
|
|
243
214
|
stream=params.stream,
|
|
244
|
-
stream_options=
|
|
215
|
+
stream_options=stream_options,
|
|
245
216
|
temperature=params.temperature,
|
|
246
217
|
top_p=params.top_p,
|
|
247
218
|
user=params.user,
|
|
248
219
|
suffix=params.suffix,
|
|
249
220
|
api_key=self.get_api_key(),
|
|
250
221
|
api_base=self.api_base,
|
|
222
|
+
**self._litellm_extra_request_params(params),
|
|
251
223
|
)
|
|
252
|
-
|
|
224
|
+
# LiteLLM returns compatible type but mypy can't verify external library
|
|
225
|
+
result = await litellm.atext_completion(**request_params)
|
|
226
|
+
|
|
227
|
+
if params.stream:
|
|
228
|
+
return wrap_async_stream(result) # type: ignore[arg-type] # LiteLLM streaming types
|
|
229
|
+
|
|
230
|
+
return result # type: ignore[return-value] # external lib lacks type stubs
|
|
253
231
|
|
|
254
232
|
async def openai_chat_completion(
|
|
255
233
|
self,
|
|
256
234
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
|
257
235
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
|
258
|
-
#
|
|
259
|
-
|
|
236
|
+
# Inject stream_options when streaming and telemetry is active
|
|
237
|
+
stream_options = get_stream_options_for_telemetry(
|
|
238
|
+
params.stream_options, params.stream, self.supports_stream_options
|
|
239
|
+
)
|
|
260
240
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
if stream_options is None:
|
|
264
|
-
stream_options = {"include_usage": True}
|
|
265
|
-
elif "include_usage" not in stream_options:
|
|
266
|
-
stream_options = {**stream_options, "include_usage": True}
|
|
241
|
+
if not self.model_store:
|
|
242
|
+
raise ValueError("Model store is not initialized")
|
|
267
243
|
|
|
268
244
|
model_obj = await self.model_store.get_model(params.model)
|
|
245
|
+
if model_obj.provider_resource_id is None:
|
|
246
|
+
raise ValueError(f"Model {params.model} has no provider_resource_id")
|
|
247
|
+
provider_resource_id = model_obj.provider_resource_id
|
|
269
248
|
|
|
270
249
|
request_params = await prepare_openai_completion_params(
|
|
271
|
-
model=self.get_litellm_model_name(
|
|
250
|
+
model=self.get_litellm_model_name(provider_resource_id),
|
|
272
251
|
messages=params.messages,
|
|
273
252
|
frequency_penalty=params.frequency_penalty,
|
|
274
253
|
function_call=params.function_call,
|
|
@@ -293,8 +272,15 @@ class LiteLLMOpenAIMixin(
|
|
|
293
272
|
user=params.user,
|
|
294
273
|
api_key=self.get_api_key(),
|
|
295
274
|
api_base=self.api_base,
|
|
275
|
+
**self._litellm_extra_request_params(params),
|
|
296
276
|
)
|
|
297
|
-
|
|
277
|
+
# LiteLLM returns compatible type but mypy can't verify external library
|
|
278
|
+
result = await litellm.acompletion(**request_params)
|
|
279
|
+
|
|
280
|
+
if params.stream:
|
|
281
|
+
return wrap_async_stream(result) # type: ignore[arg-type] # LiteLLM streaming types
|
|
282
|
+
|
|
283
|
+
return result # type: ignore[return-value] # external lib lacks type stubs
|
|
298
284
|
|
|
299
285
|
async def check_model_availability(self, model: str) -> bool:
|
|
300
286
|
"""
|
|
@@ -310,6 +296,20 @@ class LiteLLMOpenAIMixin(
|
|
|
310
296
|
|
|
311
297
|
return model in litellm.models_by_provider[self.litellm_provider_name]
|
|
312
298
|
|
|
299
|
+
def _litellm_extra_request_params(
|
|
300
|
+
self,
|
|
301
|
+
params: OpenAIChatCompletionRequestWithExtraBody | OpenAICompletionRequestWithExtraBody,
|
|
302
|
+
) -> dict[str, Any]:
|
|
303
|
+
"""
|
|
304
|
+
Provider hook for extra LiteLLM/OpenAI-compat request params.
|
|
305
|
+
|
|
306
|
+
This is intentionally a narrow hook so provider adapters (e.g. WatsonX)
|
|
307
|
+
can add provider-specific kwargs (timeouts, project IDs, etc.) while the
|
|
308
|
+
mixin remains the single source of truth for telemetry-driven
|
|
309
|
+
stream_options injection.
|
|
310
|
+
"""
|
|
311
|
+
return {}
|
|
312
|
+
|
|
313
313
|
|
|
314
314
|
def b64_encode_openai_embeddings_response(
|
|
315
315
|
response_data: list[dict], encoding_format: str | None = "float"
|
|
@@ -8,13 +8,11 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel, Field, SecretStr
|
|
10
10
|
|
|
11
|
-
from llama_stack.apis.common.errors import UnsupportedModelError
|
|
12
|
-
from llama_stack.apis.models import ModelType
|
|
13
11
|
from llama_stack.log import get_logger
|
|
14
|
-
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
|
|
15
12
|
from llama_stack.providers.utils.inference import (
|
|
16
13
|
ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
|
|
17
14
|
)
|
|
15
|
+
from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
|
|
18
16
|
|
|
19
17
|
logger = get_logger(name=__name__, category="providers::utils")
|
|
20
18
|
|