llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -12,17 +12,9 @@ from numpy.typing import NDArray
|
|
|
12
12
|
from weaviate.classes.init import Auth
|
|
13
13
|
from weaviate.classes.query import Filter, HybridFusion
|
|
14
14
|
|
|
15
|
-
from llama_stack.apis.common.content_types import InterleavedContent
|
|
16
|
-
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
|
17
|
-
from llama_stack.apis.files import Files
|
|
18
|
-
from llama_stack.apis.inference import Inference
|
|
19
|
-
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
|
20
|
-
from llama_stack.apis.vector_stores import VectorStore
|
|
21
15
|
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
16
|
+
from llama_stack.core.storage.kvstore import kvstore_impl
|
|
22
17
|
from llama_stack.log import get_logger
|
|
23
|
-
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
|
|
24
|
-
from llama_stack.providers.utils.kvstore import kvstore_impl
|
|
25
|
-
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
26
18
|
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
|
|
27
19
|
from llama_stack.providers.utils.memory.vector_store import (
|
|
28
20
|
RERANKER_TYPE_RRF,
|
|
@@ -30,7 +22,20 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|
|
30
22
|
EmbeddingIndex,
|
|
31
23
|
VectorStoreWithIndex,
|
|
32
24
|
)
|
|
25
|
+
from llama_stack.providers.utils.vector_io import load_embedded_chunk_with_backward_compat
|
|
33
26
|
from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
|
|
27
|
+
from llama_stack_api import (
|
|
28
|
+
EmbeddedChunk,
|
|
29
|
+
Files,
|
|
30
|
+
Inference,
|
|
31
|
+
InterleavedContent,
|
|
32
|
+
QueryChunksResponse,
|
|
33
|
+
VectorIO,
|
|
34
|
+
VectorStore,
|
|
35
|
+
VectorStoreNotFoundError,
|
|
36
|
+
VectorStoresProtocolPrivate,
|
|
37
|
+
)
|
|
38
|
+
from llama_stack_api.internal.kvstore import KVStore
|
|
34
39
|
|
|
35
40
|
from .config import WeaviateVectorIOConfig
|
|
36
41
|
|
|
@@ -53,20 +58,19 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
53
58
|
async def initialize(self):
|
|
54
59
|
pass
|
|
55
60
|
|
|
56
|
-
async def add_chunks(self, chunks: list[
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
)
|
|
61
|
+
async def add_chunks(self, chunks: list[EmbeddedChunk]):
|
|
62
|
+
if not chunks:
|
|
63
|
+
return
|
|
60
64
|
|
|
61
65
|
data_objects = []
|
|
62
|
-
for chunk
|
|
66
|
+
for chunk in chunks:
|
|
63
67
|
data_objects.append(
|
|
64
68
|
wvc.data.DataObject(
|
|
65
69
|
properties={
|
|
66
70
|
"chunk_id": chunk.chunk_id,
|
|
67
71
|
"chunk_content": chunk.model_dump_json(),
|
|
68
72
|
},
|
|
69
|
-
vector=embedding
|
|
73
|
+
vector=chunk.embedding, # Already a list[float]
|
|
70
74
|
)
|
|
71
75
|
)
|
|
72
76
|
|
|
@@ -112,7 +116,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
112
116
|
chunk_json = doc.properties["chunk_content"]
|
|
113
117
|
try:
|
|
114
118
|
chunk_dict = json.loads(chunk_json)
|
|
115
|
-
chunk =
|
|
119
|
+
chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
|
|
116
120
|
except Exception:
|
|
117
121
|
log.exception(f"Failed to parse document: {chunk_json}")
|
|
118
122
|
continue
|
|
@@ -172,7 +176,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
172
176
|
chunk_json = doc.properties["chunk_content"]
|
|
173
177
|
try:
|
|
174
178
|
chunk_dict = json.loads(chunk_json)
|
|
175
|
-
chunk =
|
|
179
|
+
chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
|
|
176
180
|
except Exception:
|
|
177
181
|
log.exception(f"Failed to parse document: {chunk_json}")
|
|
178
182
|
continue
|
|
@@ -241,7 +245,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
241
245
|
chunk_json = doc.properties["chunk_content"]
|
|
242
246
|
try:
|
|
243
247
|
chunk_dict = json.loads(chunk_json)
|
|
244
|
-
chunk =
|
|
248
|
+
chunk = load_embedded_chunk_with_backward_compat(chunk_dict)
|
|
245
249
|
except Exception:
|
|
246
250
|
log.exception(f"Failed to parse document: {chunk_json}")
|
|
247
251
|
continue
|
|
@@ -259,9 +263,8 @@ class WeaviateIndex(EmbeddingIndex):
|
|
|
259
263
|
|
|
260
264
|
class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate):
|
|
261
265
|
def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
|
|
262
|
-
super().__init__(files_api=files_api, kvstore=None)
|
|
266
|
+
super().__init__(inference_api=inference_api, files_api=files_api, kvstore=None)
|
|
263
267
|
self.config = config
|
|
264
|
-
self.inference_api = inference_api
|
|
265
268
|
self.client_cache = {}
|
|
266
269
|
self.cache = {}
|
|
267
270
|
self.vector_store_table = None
|
|
@@ -369,19 +372,21 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
|
|
|
369
372
|
self.cache[vector_store_id] = index
|
|
370
373
|
return index
|
|
371
374
|
|
|
372
|
-
async def insert_chunks(
|
|
373
|
-
|
|
375
|
+
async def insert_chunks(
|
|
376
|
+
self, vector_store_id: str, chunks: list[EmbeddedChunk], ttl_seconds: int | None = None
|
|
377
|
+
) -> None:
|
|
378
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
374
379
|
if not index:
|
|
375
|
-
raise VectorStoreNotFoundError(
|
|
380
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
376
381
|
|
|
377
382
|
await index.insert_chunks(chunks)
|
|
378
383
|
|
|
379
384
|
async def query_chunks(
|
|
380
|
-
self,
|
|
385
|
+
self, vector_store_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
|
|
381
386
|
) -> QueryChunksResponse:
|
|
382
|
-
index = await self._get_and_cache_vector_store_index(
|
|
387
|
+
index = await self._get_and_cache_vector_store_index(vector_store_id)
|
|
383
388
|
if not index:
|
|
384
|
-
raise VectorStoreNotFoundError(
|
|
389
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
385
390
|
|
|
386
391
|
return await index.query_chunks(query, params)
|
|
387
392
|
|
|
@@ -7,12 +7,8 @@
|
|
|
7
7
|
from enum import Enum
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
|
-
from llama_stack.apis.common.type_system import (
|
|
11
|
-
ChatCompletionInputType,
|
|
12
|
-
CompletionInputType,
|
|
13
|
-
StringType,
|
|
14
|
-
)
|
|
15
10
|
from llama_stack.core.datatypes import Api
|
|
11
|
+
from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType
|
|
16
12
|
|
|
17
13
|
|
|
18
14
|
class ColumnName(Enum):
|
|
@@ -9,7 +9,7 @@ import json
|
|
|
9
9
|
from fastapi import Request
|
|
10
10
|
from pydantic import BaseModel, ValidationError
|
|
11
11
|
|
|
12
|
-
from
|
|
12
|
+
from llama_stack_api import ExpiresAfter
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
async def parse_pydantic_from_form[T: BaseModel](request: Request, field_name: str, model_class: type[T]) -> T | None:
|
|
@@ -17,7 +17,7 @@ from llama_stack.log import get_logger
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
18
18
|
from sentence_transformers import SentenceTransformer
|
|
19
19
|
|
|
20
|
-
from
|
|
20
|
+
from llama_stack_api import (
|
|
21
21
|
ModelStore,
|
|
22
22
|
OpenAIEmbeddingData,
|
|
23
23
|
OpenAIEmbeddingsRequestWithExtraBody,
|
|
@@ -8,20 +8,19 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
from sqlalchemy.exc import IntegrityError
|
|
10
10
|
|
|
11
|
-
from llama_stack.
|
|
11
|
+
from llama_stack.core.datatypes import AccessRule
|
|
12
|
+
from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
|
|
13
|
+
from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
14
|
+
from llama_stack.core.storage.sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
|
|
15
|
+
from llama_stack.log import get_logger
|
|
16
|
+
from llama_stack_api import (
|
|
12
17
|
ListOpenAIChatCompletionResponse,
|
|
13
18
|
OpenAIChatCompletion,
|
|
14
19
|
OpenAICompletionWithInputMessages,
|
|
15
20
|
OpenAIMessageParam,
|
|
16
21
|
Order,
|
|
17
22
|
)
|
|
18
|
-
from
|
|
19
|
-
from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
|
|
20
|
-
from llama_stack.log import get_logger
|
|
21
|
-
|
|
22
|
-
from ..sqlstore.api import ColumnDefinition, ColumnType
|
|
23
|
-
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
24
|
-
from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
|
|
23
|
+
from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType
|
|
25
24
|
|
|
26
25
|
logger = get_logger(name=__name__, category="inference")
|
|
27
26
|
|
|
@@ -7,13 +7,20 @@
|
|
|
7
7
|
import base64
|
|
8
8
|
import struct
|
|
9
9
|
from collections.abc import AsyncIterator
|
|
10
|
+
from typing import Any
|
|
10
11
|
|
|
11
12
|
import litellm
|
|
12
13
|
|
|
13
|
-
from llama_stack.
|
|
14
|
-
|
|
14
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
15
|
+
from llama_stack.log import get_logger
|
|
16
|
+
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
|
|
17
|
+
from llama_stack.providers.utils.inference.openai_compat import (
|
|
18
|
+
get_stream_options_for_telemetry,
|
|
19
|
+
prepare_openai_completion_params,
|
|
20
|
+
)
|
|
21
|
+
from llama_stack.providers.utils.inference.stream_utils import wrap_async_stream
|
|
22
|
+
from llama_stack_api import (
|
|
15
23
|
InferenceProvider,
|
|
16
|
-
JsonSchemaResponseFormat,
|
|
17
24
|
OpenAIChatCompletion,
|
|
18
25
|
OpenAIChatCompletionChunk,
|
|
19
26
|
OpenAIChatCompletionRequestWithExtraBody,
|
|
@@ -23,16 +30,6 @@ from llama_stack.apis.inference import (
|
|
|
23
30
|
OpenAIEmbeddingsRequestWithExtraBody,
|
|
24
31
|
OpenAIEmbeddingsResponse,
|
|
25
32
|
OpenAIEmbeddingUsage,
|
|
26
|
-
ToolChoice,
|
|
27
|
-
)
|
|
28
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
29
|
-
from llama_stack.log import get_logger
|
|
30
|
-
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
|
|
31
|
-
from llama_stack.providers.utils.inference.openai_compat import (
|
|
32
|
-
convert_message_to_openai_dict_new,
|
|
33
|
-
convert_tooldef_to_openai_tool,
|
|
34
|
-
get_sampling_options,
|
|
35
|
-
prepare_openai_completion_params,
|
|
36
33
|
)
|
|
37
34
|
|
|
38
35
|
logger = get_logger(name=__name__, category="providers::utils")
|
|
@@ -55,6 +52,7 @@ class LiteLLMOpenAIMixin(
|
|
|
55
52
|
openai_compat_api_base: str | None = None,
|
|
56
53
|
download_images: bool = False,
|
|
57
54
|
json_schema_strict: bool = True,
|
|
55
|
+
supports_stream_options: bool = True,
|
|
58
56
|
):
|
|
59
57
|
"""
|
|
60
58
|
Initialize the LiteLLMOpenAIMixin.
|
|
@@ -66,6 +64,7 @@ class LiteLLMOpenAIMixin(
|
|
|
66
64
|
:param openai_compat_api_base: The base URL for OpenAI compatibility, or None if not using OpenAI compatibility.
|
|
67
65
|
:param download_images: Whether to download images and convert to base64 for message conversion.
|
|
68
66
|
:param json_schema_strict: Whether to use strict mode for JSON schema validation.
|
|
67
|
+
:param supports_stream_options: Whether the provider supports stream_options parameter.
|
|
69
68
|
"""
|
|
70
69
|
ModelRegistryHelper.__init__(self, model_entries=model_entries)
|
|
71
70
|
|
|
@@ -75,6 +74,7 @@ class LiteLLMOpenAIMixin(
|
|
|
75
74
|
self.api_base = openai_compat_api_base
|
|
76
75
|
self.download_images = download_images
|
|
77
76
|
self.json_schema_strict = json_schema_strict
|
|
77
|
+
self.supports_stream_options = supports_stream_options
|
|
78
78
|
|
|
79
79
|
if openai_compat_api_base:
|
|
80
80
|
self.is_openai_compat = True
|
|
@@ -127,59 +127,13 @@ class LiteLLMOpenAIMixin(
|
|
|
127
127
|
|
|
128
128
|
return schema
|
|
129
129
|
|
|
130
|
-
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
|
131
|
-
input_dict = {}
|
|
132
|
-
|
|
133
|
-
input_dict["messages"] = [
|
|
134
|
-
await convert_message_to_openai_dict_new(m, download_images=self.download_images) for m in request.messages
|
|
135
|
-
]
|
|
136
|
-
if fmt := request.response_format:
|
|
137
|
-
if not isinstance(fmt, JsonSchemaResponseFormat):
|
|
138
|
-
raise ValueError(
|
|
139
|
-
f"Unsupported response format: {type(fmt)}. Only JsonSchemaResponseFormat is supported."
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
fmt = fmt.json_schema
|
|
143
|
-
name = fmt["title"]
|
|
144
|
-
del fmt["title"]
|
|
145
|
-
fmt["additionalProperties"] = False
|
|
146
|
-
|
|
147
|
-
# Apply additionalProperties: False recursively to all objects
|
|
148
|
-
fmt = self._add_additional_properties_recursive(fmt)
|
|
149
|
-
|
|
150
|
-
input_dict["response_format"] = {
|
|
151
|
-
"type": "json_schema",
|
|
152
|
-
"json_schema": {
|
|
153
|
-
"name": name,
|
|
154
|
-
"schema": fmt,
|
|
155
|
-
"strict": self.json_schema_strict,
|
|
156
|
-
},
|
|
157
|
-
}
|
|
158
|
-
if request.tools:
|
|
159
|
-
input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools]
|
|
160
|
-
if request.tool_config.tool_choice:
|
|
161
|
-
input_dict["tool_choice"] = (
|
|
162
|
-
request.tool_config.tool_choice.value
|
|
163
|
-
if isinstance(request.tool_config.tool_choice, ToolChoice)
|
|
164
|
-
else request.tool_config.tool_choice
|
|
165
|
-
)
|
|
166
|
-
|
|
167
|
-
return {
|
|
168
|
-
"model": request.model,
|
|
169
|
-
"api_key": self.get_api_key(),
|
|
170
|
-
"api_base": self.api_base,
|
|
171
|
-
**input_dict,
|
|
172
|
-
"stream": request.stream,
|
|
173
|
-
**get_sampling_options(request.sampling_params),
|
|
174
|
-
}
|
|
175
|
-
|
|
176
130
|
def get_api_key(self) -> str:
|
|
177
131
|
provider_data = self.get_request_provider_data()
|
|
178
132
|
key_field = self.provider_data_api_key_field
|
|
179
|
-
if provider_data and getattr(provider_data, key_field, None):
|
|
180
|
-
api_key
|
|
181
|
-
|
|
182
|
-
|
|
133
|
+
if provider_data and key_field and (api_key := getattr(provider_data, key_field, None)):
|
|
134
|
+
return str(api_key) # type: ignore[no-any-return] # getattr returns Any, can't narrow without runtime type inspection
|
|
135
|
+
|
|
136
|
+
api_key = self.api_key_from_config
|
|
183
137
|
if not api_key:
|
|
184
138
|
raise ValueError(
|
|
185
139
|
"API key is not set. Please provide a valid API key in the "
|
|
@@ -192,7 +146,13 @@ class LiteLLMOpenAIMixin(
|
|
|
192
146
|
self,
|
|
193
147
|
params: OpenAIEmbeddingsRequestWithExtraBody,
|
|
194
148
|
) -> OpenAIEmbeddingsResponse:
|
|
149
|
+
if not self.model_store:
|
|
150
|
+
raise ValueError("Model store is not initialized")
|
|
151
|
+
|
|
195
152
|
model_obj = await self.model_store.get_model(params.model)
|
|
153
|
+
if model_obj.provider_resource_id is None:
|
|
154
|
+
raise ValueError(f"Model {params.model} has no provider_resource_id")
|
|
155
|
+
provider_resource_id = model_obj.provider_resource_id
|
|
196
156
|
|
|
197
157
|
# Convert input to list if it's a string
|
|
198
158
|
input_list = [params.input] if isinstance(params.input, str) else params.input
|
|
@@ -200,7 +160,7 @@ class LiteLLMOpenAIMixin(
|
|
|
200
160
|
# Call litellm embedding function
|
|
201
161
|
# litellm.drop_params = True
|
|
202
162
|
response = litellm.embedding(
|
|
203
|
-
model=self.get_litellm_model_name(
|
|
163
|
+
model=self.get_litellm_model_name(provider_resource_id),
|
|
204
164
|
input=input_list,
|
|
205
165
|
api_key=self.get_api_key(),
|
|
206
166
|
api_base=self.api_base,
|
|
@@ -217,18 +177,29 @@ class LiteLLMOpenAIMixin(
|
|
|
217
177
|
|
|
218
178
|
return OpenAIEmbeddingsResponse(
|
|
219
179
|
data=data,
|
|
220
|
-
model=
|
|
180
|
+
model=provider_resource_id,
|
|
221
181
|
usage=usage,
|
|
222
182
|
)
|
|
223
183
|
|
|
224
184
|
async def openai_completion(
|
|
225
185
|
self,
|
|
226
186
|
params: OpenAICompletionRequestWithExtraBody,
|
|
227
|
-
) -> OpenAICompletion:
|
|
187
|
+
) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
|
|
188
|
+
# Inject stream_options when streaming and telemetry is active
|
|
189
|
+
stream_options = get_stream_options_for_telemetry(
|
|
190
|
+
params.stream_options, params.stream, self.supports_stream_options
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
if not self.model_store:
|
|
194
|
+
raise ValueError("Model store is not initialized")
|
|
195
|
+
|
|
228
196
|
model_obj = await self.model_store.get_model(params.model)
|
|
197
|
+
if model_obj.provider_resource_id is None:
|
|
198
|
+
raise ValueError(f"Model {params.model} has no provider_resource_id")
|
|
199
|
+
provider_resource_id = model_obj.provider_resource_id
|
|
229
200
|
|
|
230
201
|
request_params = await prepare_openai_completion_params(
|
|
231
|
-
model=self.get_litellm_model_name(
|
|
202
|
+
model=self.get_litellm_model_name(provider_resource_id),
|
|
232
203
|
prompt=params.prompt,
|
|
233
204
|
best_of=params.best_of,
|
|
234
205
|
echo=params.echo,
|
|
@@ -241,34 +212,42 @@ class LiteLLMOpenAIMixin(
|
|
|
241
212
|
seed=params.seed,
|
|
242
213
|
stop=params.stop,
|
|
243
214
|
stream=params.stream,
|
|
244
|
-
stream_options=
|
|
215
|
+
stream_options=stream_options,
|
|
245
216
|
temperature=params.temperature,
|
|
246
217
|
top_p=params.top_p,
|
|
247
218
|
user=params.user,
|
|
248
219
|
suffix=params.suffix,
|
|
249
220
|
api_key=self.get_api_key(),
|
|
250
221
|
api_base=self.api_base,
|
|
222
|
+
**self._litellm_extra_request_params(params),
|
|
251
223
|
)
|
|
252
|
-
|
|
224
|
+
# LiteLLM returns compatible type but mypy can't verify external library
|
|
225
|
+
result = await litellm.atext_completion(**request_params)
|
|
226
|
+
|
|
227
|
+
if params.stream:
|
|
228
|
+
return wrap_async_stream(result) # type: ignore[arg-type] # LiteLLM streaming types
|
|
229
|
+
|
|
230
|
+
return result # type: ignore[return-value] # external lib lacks type stubs
|
|
253
231
|
|
|
254
232
|
async def openai_chat_completion(
|
|
255
233
|
self,
|
|
256
234
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
|
257
235
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
|
258
|
-
#
|
|
259
|
-
|
|
236
|
+
# Inject stream_options when streaming and telemetry is active
|
|
237
|
+
stream_options = get_stream_options_for_telemetry(
|
|
238
|
+
params.stream_options, params.stream, self.supports_stream_options
|
|
239
|
+
)
|
|
260
240
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
if stream_options is None:
|
|
264
|
-
stream_options = {"include_usage": True}
|
|
265
|
-
elif "include_usage" not in stream_options:
|
|
266
|
-
stream_options = {**stream_options, "include_usage": True}
|
|
241
|
+
if not self.model_store:
|
|
242
|
+
raise ValueError("Model store is not initialized")
|
|
267
243
|
|
|
268
244
|
model_obj = await self.model_store.get_model(params.model)
|
|
245
|
+
if model_obj.provider_resource_id is None:
|
|
246
|
+
raise ValueError(f"Model {params.model} has no provider_resource_id")
|
|
247
|
+
provider_resource_id = model_obj.provider_resource_id
|
|
269
248
|
|
|
270
249
|
request_params = await prepare_openai_completion_params(
|
|
271
|
-
model=self.get_litellm_model_name(
|
|
250
|
+
model=self.get_litellm_model_name(provider_resource_id),
|
|
272
251
|
messages=params.messages,
|
|
273
252
|
frequency_penalty=params.frequency_penalty,
|
|
274
253
|
function_call=params.function_call,
|
|
@@ -293,8 +272,15 @@ class LiteLLMOpenAIMixin(
|
|
|
293
272
|
user=params.user,
|
|
294
273
|
api_key=self.get_api_key(),
|
|
295
274
|
api_base=self.api_base,
|
|
275
|
+
**self._litellm_extra_request_params(params),
|
|
296
276
|
)
|
|
297
|
-
|
|
277
|
+
# LiteLLM returns compatible type but mypy can't verify external library
|
|
278
|
+
result = await litellm.acompletion(**request_params)
|
|
279
|
+
|
|
280
|
+
if params.stream:
|
|
281
|
+
return wrap_async_stream(result) # type: ignore[arg-type] # LiteLLM streaming types
|
|
282
|
+
|
|
283
|
+
return result # type: ignore[return-value] # external lib lacks type stubs
|
|
298
284
|
|
|
299
285
|
async def check_model_availability(self, model: str) -> bool:
|
|
300
286
|
"""
|
|
@@ -310,6 +296,20 @@ class LiteLLMOpenAIMixin(
|
|
|
310
296
|
|
|
311
297
|
return model in litellm.models_by_provider[self.litellm_provider_name]
|
|
312
298
|
|
|
299
|
+
def _litellm_extra_request_params(
|
|
300
|
+
self,
|
|
301
|
+
params: OpenAIChatCompletionRequestWithExtraBody | OpenAICompletionRequestWithExtraBody,
|
|
302
|
+
) -> dict[str, Any]:
|
|
303
|
+
"""
|
|
304
|
+
Provider hook for extra LiteLLM/OpenAI-compat request params.
|
|
305
|
+
|
|
306
|
+
This is intentionally a narrow hook so provider adapters (e.g. WatsonX)
|
|
307
|
+
can add provider-specific kwargs (timeouts, project IDs, etc.) while the
|
|
308
|
+
mixin remains the single source of truth for telemetry-driven
|
|
309
|
+
stream_options injection.
|
|
310
|
+
"""
|
|
311
|
+
return {}
|
|
312
|
+
|
|
313
313
|
|
|
314
314
|
def b64_encode_openai_embeddings_response(
|
|
315
315
|
response_data: list[dict], encoding_format: str | None = "float"
|
|
@@ -8,13 +8,11 @@ from typing import Any
|
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel, Field, SecretStr
|
|
10
10
|
|
|
11
|
-
from llama_stack.apis.common.errors import UnsupportedModelError
|
|
12
|
-
from llama_stack.apis.models import ModelType
|
|
13
11
|
from llama_stack.log import get_logger
|
|
14
|
-
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
|
|
15
12
|
from llama_stack.providers.utils.inference import (
|
|
16
13
|
ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
|
|
17
14
|
)
|
|
15
|
+
from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError
|
|
18
16
|
|
|
19
17
|
logger = get_logger(name=__name__, category="providers::utils")
|
|
20
18
|
|