llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -10,11 +10,20 @@ from abc import ABC, abstractmethod
|
|
|
10
10
|
from collections.abc import AsyncIterator, Iterable
|
|
11
11
|
from typing import Any
|
|
12
12
|
|
|
13
|
-
from openai import
|
|
13
|
+
from openai import AsyncOpenAI
|
|
14
14
|
from pydantic import BaseModel, ConfigDict
|
|
15
15
|
|
|
16
|
-
from llama_stack.
|
|
16
|
+
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
17
|
+
from llama_stack.log import get_logger
|
|
18
|
+
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
19
|
+
from llama_stack.providers.utils.inference.openai_compat import (
|
|
20
|
+
get_stream_options_for_telemetry,
|
|
21
|
+
prepare_openai_completion_params,
|
|
22
|
+
)
|
|
23
|
+
from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
|
|
24
|
+
from llama_stack_api import (
|
|
17
25
|
Model,
|
|
26
|
+
ModelType,
|
|
18
27
|
OpenAIChatCompletion,
|
|
19
28
|
OpenAIChatCompletionChunk,
|
|
20
29
|
OpenAIChatCompletionRequestWithExtraBody,
|
|
@@ -26,12 +35,6 @@ from llama_stack.apis.inference import (
|
|
|
26
35
|
OpenAIEmbeddingUsage,
|
|
27
36
|
OpenAIMessageParam,
|
|
28
37
|
)
|
|
29
|
-
from llama_stack.apis.models import ModelType
|
|
30
|
-
from llama_stack.core.request_headers import NeedsRequestProviderData
|
|
31
|
-
from llama_stack.log import get_logger
|
|
32
|
-
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
|
33
|
-
from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
|
|
34
|
-
from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content
|
|
35
38
|
|
|
36
39
|
logger = get_logger(name=__name__, category="providers::utils")
|
|
37
40
|
|
|
@@ -47,7 +50,9 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|
|
47
50
|
The behavior of this class can be customized by child classes in the following ways:
|
|
48
51
|
- overwrite_completion_id: If True, overwrites the 'id' field in OpenAI responses
|
|
49
52
|
- download_images: If True, downloads images and converts to base64 for providers that require it
|
|
53
|
+
- supports_stream_options: If False, disables stream_options injection for providers that don't support it
|
|
50
54
|
- embedding_model_metadata: A dictionary mapping model IDs to their embedding metadata
|
|
55
|
+
- construct_model_from_identifier: Method to construct a Model instance corresponding to the given identifier
|
|
51
56
|
- provider_data_api_key_field: Optional field name in provider data to look for API key
|
|
52
57
|
- list_provider_model_ids: Method to list available models from the provider
|
|
53
58
|
- get_extra_client_params: Method to provide extra parameters to the AsyncOpenAI client
|
|
@@ -73,6 +78,10 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|
|
73
78
|
# for providers that require base64 encoded images instead of URLs.
|
|
74
79
|
download_images: bool = False
|
|
75
80
|
|
|
81
|
+
# Allow subclasses to control whether the provider supports stream_options parameter
|
|
82
|
+
# Set to False for providers that don't support stream_options (e.g., Ollama, vLLM)
|
|
83
|
+
supports_stream_options: bool = True
|
|
84
|
+
|
|
76
85
|
# Embedding model metadata for this provider
|
|
77
86
|
# Can be set by subclasses or instances to provide embedding models
|
|
78
87
|
# Format: {"model_id": {"embedding_dimension": 1536, "context_length": 8192}}
|
|
@@ -118,6 +127,30 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|
|
118
127
|
"""
|
|
119
128
|
return {}
|
|
120
129
|
|
|
130
|
+
def construct_model_from_identifier(self, identifier: str) -> Model:
|
|
131
|
+
"""
|
|
132
|
+
Construct a Model instance corresponding to the given identifier
|
|
133
|
+
|
|
134
|
+
Child classes can override this to customize model typing/metadata.
|
|
135
|
+
|
|
136
|
+
:param identifier: The provider's model identifier
|
|
137
|
+
:return: A Model instance
|
|
138
|
+
"""
|
|
139
|
+
if metadata := self.embedding_model_metadata.get(identifier):
|
|
140
|
+
return Model(
|
|
141
|
+
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
|
142
|
+
provider_resource_id=identifier,
|
|
143
|
+
identifier=identifier,
|
|
144
|
+
model_type=ModelType.embedding,
|
|
145
|
+
metadata=metadata,
|
|
146
|
+
)
|
|
147
|
+
return Model(
|
|
148
|
+
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
|
149
|
+
provider_resource_id=identifier,
|
|
150
|
+
identifier=identifier,
|
|
151
|
+
model_type=ModelType.llm,
|
|
152
|
+
)
|
|
153
|
+
|
|
121
154
|
async def list_provider_model_ids(self) -> Iterable[str]:
|
|
122
155
|
"""
|
|
123
156
|
List available models from the provider.
|
|
@@ -223,30 +256,33 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|
|
223
256
|
return model_obj.provider_resource_id
|
|
224
257
|
|
|
225
258
|
async def _maybe_overwrite_id(self, resp: Any, stream: bool | None) -> Any:
|
|
226
|
-
if not self.overwrite_completion_id:
|
|
227
|
-
return resp
|
|
228
|
-
|
|
229
|
-
new_id = f"cltsd-{uuid.uuid4()}"
|
|
230
259
|
if stream:
|
|
260
|
+
new_id = f"cltsd-{uuid.uuid4()}" if self.overwrite_completion_id else None
|
|
231
261
|
|
|
232
262
|
async def _gen():
|
|
233
263
|
async for chunk in resp:
|
|
234
|
-
|
|
264
|
+
if new_id:
|
|
265
|
+
chunk.id = new_id
|
|
235
266
|
yield chunk
|
|
236
267
|
|
|
237
268
|
return _gen()
|
|
238
269
|
else:
|
|
239
|
-
|
|
270
|
+
if self.overwrite_completion_id:
|
|
271
|
+
resp.id = f"cltsd-{uuid.uuid4()}"
|
|
240
272
|
return resp
|
|
241
273
|
|
|
242
274
|
async def openai_completion(
|
|
243
275
|
self,
|
|
244
276
|
params: OpenAICompletionRequestWithExtraBody,
|
|
245
|
-
) -> OpenAICompletion:
|
|
277
|
+
) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
|
|
246
278
|
"""
|
|
247
279
|
Direct OpenAI completion API call.
|
|
248
280
|
"""
|
|
249
|
-
#
|
|
281
|
+
# Inject stream_options when streaming and telemetry is active
|
|
282
|
+
stream_options = get_stream_options_for_telemetry(
|
|
283
|
+
params.stream_options, params.stream or False, self.supports_stream_options
|
|
284
|
+
)
|
|
285
|
+
|
|
250
286
|
provider_model_id = await self._get_provider_model_id(params.model)
|
|
251
287
|
self._validate_model_allowed(provider_model_id)
|
|
252
288
|
|
|
@@ -264,7 +300,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|
|
264
300
|
seed=params.seed,
|
|
265
301
|
stop=params.stop,
|
|
266
302
|
stream=params.stream,
|
|
267
|
-
stream_options=
|
|
303
|
+
stream_options=stream_options,
|
|
268
304
|
temperature=params.temperature,
|
|
269
305
|
top_p=params.top_p,
|
|
270
306
|
user=params.user,
|
|
@@ -283,6 +319,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|
|
283
319
|
"""
|
|
284
320
|
Direct OpenAI chat completion API call.
|
|
285
321
|
"""
|
|
322
|
+
# Inject stream_options when streaming and telemetry is active
|
|
323
|
+
stream_options = get_stream_options_for_telemetry(
|
|
324
|
+
params.stream_options, params.stream or False, self.supports_stream_options
|
|
325
|
+
)
|
|
326
|
+
|
|
286
327
|
provider_model_id = await self._get_provider_model_id(params.model)
|
|
287
328
|
self._validate_model_allowed(provider_model_id)
|
|
288
329
|
|
|
@@ -323,7 +364,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|
|
323
364
|
seed=params.seed,
|
|
324
365
|
stop=params.stop,
|
|
325
366
|
stream=params.stream,
|
|
326
|
-
stream_options=
|
|
367
|
+
stream_options=stream_options,
|
|
327
368
|
temperature=params.temperature,
|
|
328
369
|
tool_choice=params.tool_choice,
|
|
329
370
|
tools=params.tools,
|
|
@@ -353,17 +394,16 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|
|
353
394
|
request_params: dict[str, Any] = {
|
|
354
395
|
"model": provider_model_id,
|
|
355
396
|
"input": params.input,
|
|
356
|
-
"encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN,
|
|
357
|
-
"dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN,
|
|
358
|
-
"user": params.user if params.user is not None else NOT_GIVEN,
|
|
359
397
|
}
|
|
398
|
+
if params.encoding_format is not None:
|
|
399
|
+
request_params["encoding_format"] = params.encoding_format
|
|
400
|
+
if params.dimensions is not None:
|
|
401
|
+
request_params["dimensions"] = params.dimensions
|
|
402
|
+
if params.user is not None:
|
|
403
|
+
request_params["user"] = params.user
|
|
404
|
+
if params.model_extra:
|
|
405
|
+
request_params["extra_body"] = params.model_extra
|
|
360
406
|
|
|
361
|
-
# Add extra_body if present
|
|
362
|
-
extra_body = params.model_extra
|
|
363
|
-
if extra_body:
|
|
364
|
-
request_params["extra_body"] = extra_body
|
|
365
|
-
|
|
366
|
-
# Call OpenAI embeddings API with properly typed parameters
|
|
367
407
|
response = await self.client.embeddings.create(**request_params)
|
|
368
408
|
|
|
369
409
|
data = []
|
|
@@ -439,21 +479,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|
|
439
479
|
if self.config.allowed_models is not None and provider_model_id not in self.config.allowed_models:
|
|
440
480
|
logger.info(f"Skipping model {provider_model_id} as it is not in the allowed models list")
|
|
441
481
|
continue
|
|
442
|
-
|
|
443
|
-
model = Model(
|
|
444
|
-
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
|
445
|
-
provider_resource_id=provider_model_id,
|
|
446
|
-
identifier=provider_model_id,
|
|
447
|
-
model_type=ModelType.embedding,
|
|
448
|
-
metadata=metadata,
|
|
449
|
-
)
|
|
450
|
-
else:
|
|
451
|
-
model = Model(
|
|
452
|
-
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
|
453
|
-
provider_resource_id=provider_model_id,
|
|
454
|
-
identifier=provider_model_id,
|
|
455
|
-
model_type=ModelType.llm,
|
|
456
|
-
)
|
|
482
|
+
model = self.construct_model_from_identifier(provider_model_id)
|
|
457
483
|
self._model_cache[provider_model_id] = model
|
|
458
484
|
|
|
459
485
|
return list(self._model_cache.values())
|
|
@@ -14,27 +14,6 @@ from typing import Any
|
|
|
14
14
|
import httpx
|
|
15
15
|
from PIL import Image as PIL_Image
|
|
16
16
|
|
|
17
|
-
from llama_stack.apis.common.content_types import (
|
|
18
|
-
ImageContentItem,
|
|
19
|
-
InterleavedContent,
|
|
20
|
-
InterleavedContentItem,
|
|
21
|
-
TextContentItem,
|
|
22
|
-
)
|
|
23
|
-
from llama_stack.apis.inference import (
|
|
24
|
-
ChatCompletionRequest,
|
|
25
|
-
CompletionRequest,
|
|
26
|
-
Message,
|
|
27
|
-
OpenAIChatCompletionContentPartImageParam,
|
|
28
|
-
OpenAIChatCompletionContentPartTextParam,
|
|
29
|
-
OpenAIFile,
|
|
30
|
-
ResponseFormat,
|
|
31
|
-
ResponseFormatType,
|
|
32
|
-
SystemMessage,
|
|
33
|
-
SystemMessageBehavior,
|
|
34
|
-
ToolChoice,
|
|
35
|
-
ToolDefinition,
|
|
36
|
-
UserMessage,
|
|
37
|
-
)
|
|
38
17
|
from llama_stack.log import get_logger
|
|
39
18
|
from llama_stack.models.llama.datatypes import (
|
|
40
19
|
RawContent,
|
|
@@ -42,33 +21,37 @@ from llama_stack.models.llama.datatypes import (
|
|
|
42
21
|
RawMediaItem,
|
|
43
22
|
RawMessage,
|
|
44
23
|
RawTextItem,
|
|
45
|
-
Role,
|
|
46
24
|
StopReason,
|
|
25
|
+
ToolCall,
|
|
26
|
+
ToolDefinition,
|
|
47
27
|
ToolPromptFormat,
|
|
48
28
|
)
|
|
49
29
|
from llama_stack.models.llama.llama3.chat_format import ChatFormat
|
|
50
|
-
from llama_stack.models.llama.llama3.prompt_templates import (
|
|
51
|
-
BuiltinToolGenerator,
|
|
52
|
-
FunctionTagCustomToolGenerator,
|
|
53
|
-
JsonCustomToolGenerator,
|
|
54
|
-
PythonListCustomToolGenerator,
|
|
55
|
-
SystemDefaultGenerator,
|
|
56
|
-
)
|
|
57
30
|
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
|
|
58
|
-
from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
|
|
59
|
-
PythonListCustomToolGenerator as PythonListCustomToolGeneratorLlama4,
|
|
60
|
-
)
|
|
61
31
|
from llama_stack.models.llama.sku_list import resolve_model
|
|
62
32
|
from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
|
|
63
|
-
from
|
|
33
|
+
from llama_stack_api import (
|
|
34
|
+
CompletionRequest,
|
|
35
|
+
ImageContentItem,
|
|
36
|
+
InterleavedContent,
|
|
37
|
+
InterleavedContentItem,
|
|
38
|
+
OpenAIAssistantMessageParam,
|
|
39
|
+
OpenAIChatCompletionContentPartImageParam,
|
|
40
|
+
OpenAIChatCompletionContentPartTextParam,
|
|
41
|
+
OpenAIFile,
|
|
42
|
+
OpenAIMessageParam,
|
|
43
|
+
OpenAISystemMessageParam,
|
|
44
|
+
OpenAIToolMessageParam,
|
|
45
|
+
OpenAIUserMessageParam,
|
|
46
|
+
ResponseFormat,
|
|
47
|
+
ResponseFormatType,
|
|
48
|
+
TextContentItem,
|
|
49
|
+
ToolChoice,
|
|
50
|
+
)
|
|
64
51
|
|
|
65
52
|
log = get_logger(name=__name__, category="providers::utils")
|
|
66
53
|
|
|
67
54
|
|
|
68
|
-
class ChatCompletionRequestWithRawContent(ChatCompletionRequest):
|
|
69
|
-
messages: list[RawMessage]
|
|
70
|
-
|
|
71
|
-
|
|
72
55
|
class CompletionRequestWithRawContent(CompletionRequest):
|
|
73
56
|
content: RawContent
|
|
74
57
|
|
|
@@ -103,28 +86,6 @@ def interleaved_content_as_str(
|
|
|
103
86
|
return _process(content)
|
|
104
87
|
|
|
105
88
|
|
|
106
|
-
async def convert_request_to_raw(
|
|
107
|
-
request: ChatCompletionRequest | CompletionRequest,
|
|
108
|
-
) -> ChatCompletionRequestWithRawContent | CompletionRequestWithRawContent:
|
|
109
|
-
if isinstance(request, ChatCompletionRequest):
|
|
110
|
-
messages = []
|
|
111
|
-
for m in request.messages:
|
|
112
|
-
content = await interleaved_content_convert_to_raw(m.content)
|
|
113
|
-
d = m.model_dump()
|
|
114
|
-
d["content"] = content
|
|
115
|
-
messages.append(RawMessage(**d))
|
|
116
|
-
|
|
117
|
-
d = request.model_dump()
|
|
118
|
-
d["messages"] = messages
|
|
119
|
-
request = ChatCompletionRequestWithRawContent(**d)
|
|
120
|
-
else:
|
|
121
|
-
d = request.model_dump()
|
|
122
|
-
d["content"] = await interleaved_content_convert_to_raw(request.content)
|
|
123
|
-
request = CompletionRequestWithRawContent(**d)
|
|
124
|
-
|
|
125
|
-
return request
|
|
126
|
-
|
|
127
|
-
|
|
128
89
|
async def interleaved_content_convert_to_raw(
|
|
129
90
|
content: InterleavedContent,
|
|
130
91
|
) -> RawContent:
|
|
@@ -171,6 +132,36 @@ async def interleaved_content_convert_to_raw(
|
|
|
171
132
|
return await _localize_single(content)
|
|
172
133
|
|
|
173
134
|
|
|
135
|
+
async def convert_openai_message_to_raw_message(message: OpenAIMessageParam) -> RawMessage:
|
|
136
|
+
"""Convert OpenAI message format to RawMessage format used by Llama formatters."""
|
|
137
|
+
if isinstance(message, OpenAIUserMessageParam):
|
|
138
|
+
content = await interleaved_content_convert_to_raw(message.content) # type: ignore[arg-type]
|
|
139
|
+
return RawMessage(role="user", content=content)
|
|
140
|
+
elif isinstance(message, OpenAISystemMessageParam):
|
|
141
|
+
content = await interleaved_content_convert_to_raw(message.content) # type: ignore[arg-type]
|
|
142
|
+
return RawMessage(role="system", content=content)
|
|
143
|
+
elif isinstance(message, OpenAIAssistantMessageParam):
|
|
144
|
+
content = await interleaved_content_convert_to_raw(message.content or "") # type: ignore[arg-type]
|
|
145
|
+
tool_calls = []
|
|
146
|
+
if message.tool_calls:
|
|
147
|
+
for tc in message.tool_calls:
|
|
148
|
+
if tc.function:
|
|
149
|
+
tool_calls.append(
|
|
150
|
+
ToolCall(
|
|
151
|
+
call_id=tc.id or "",
|
|
152
|
+
tool_name=tc.function.name or "",
|
|
153
|
+
arguments=tc.function.arguments or "{}",
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
return RawMessage(role="assistant", content=content, tool_calls=tool_calls)
|
|
157
|
+
elif isinstance(message, OpenAIToolMessageParam):
|
|
158
|
+
content = await interleaved_content_convert_to_raw(message.content) # type: ignore[arg-type]
|
|
159
|
+
return RawMessage(role="tool", content=content)
|
|
160
|
+
else:
|
|
161
|
+
# Handle OpenAIDeveloperMessageParam if needed
|
|
162
|
+
raise ValueError(f"Unsupported message type: {type(message)}")
|
|
163
|
+
|
|
164
|
+
|
|
174
165
|
def content_has_media(content: InterleavedContent):
|
|
175
166
|
def _has_media_content(c):
|
|
176
167
|
return isinstance(c, ImageContentItem)
|
|
@@ -181,17 +172,6 @@ def content_has_media(content: InterleavedContent):
|
|
|
181
172
|
return _has_media_content(content)
|
|
182
173
|
|
|
183
174
|
|
|
184
|
-
def messages_have_media(messages: list[Message]):
|
|
185
|
-
return any(content_has_media(m.content) for m in messages)
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
def request_has_media(request: ChatCompletionRequest | CompletionRequest):
|
|
189
|
-
if isinstance(request, ChatCompletionRequest):
|
|
190
|
-
return messages_have_media(request.messages)
|
|
191
|
-
else:
|
|
192
|
-
return content_has_media(request.content)
|
|
193
|
-
|
|
194
|
-
|
|
195
175
|
async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
|
|
196
176
|
if uri.startswith("http"):
|
|
197
177
|
async with httpx.AsyncClient() as client:
|
|
@@ -253,79 +233,6 @@ def augment_content_with_response_format_prompt(response_format, content):
|
|
|
253
233
|
return content
|
|
254
234
|
|
|
255
235
|
|
|
256
|
-
async def chat_completion_request_to_prompt(request: ChatCompletionRequest, llama_model: str) -> str:
|
|
257
|
-
messages = chat_completion_request_to_messages(request, llama_model)
|
|
258
|
-
request.messages = messages
|
|
259
|
-
request = await convert_request_to_raw(request)
|
|
260
|
-
|
|
261
|
-
formatter = ChatFormat(tokenizer=Tokenizer.get_instance())
|
|
262
|
-
model_input = formatter.encode_dialog_prompt(
|
|
263
|
-
request.messages,
|
|
264
|
-
tool_prompt_format=request.tool_config.tool_prompt_format or get_default_tool_prompt_format(llama_model),
|
|
265
|
-
)
|
|
266
|
-
return formatter.tokenizer.decode(model_input.tokens)
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
async def chat_completion_request_to_model_input_info(
|
|
270
|
-
request: ChatCompletionRequest, llama_model: str
|
|
271
|
-
) -> tuple[str, int]:
|
|
272
|
-
messages = chat_completion_request_to_messages(request, llama_model)
|
|
273
|
-
request.messages = messages
|
|
274
|
-
request = await convert_request_to_raw(request)
|
|
275
|
-
|
|
276
|
-
formatter = ChatFormat(tokenizer=Tokenizer.get_instance())
|
|
277
|
-
model_input = formatter.encode_dialog_prompt(
|
|
278
|
-
request.messages,
|
|
279
|
-
tool_prompt_format=request.tool_config.tool_prompt_format or get_default_tool_prompt_format(llama_model),
|
|
280
|
-
)
|
|
281
|
-
return (
|
|
282
|
-
formatter.tokenizer.decode(model_input.tokens),
|
|
283
|
-
len(model_input.tokens),
|
|
284
|
-
)
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
def chat_completion_request_to_messages(
|
|
288
|
-
request: ChatCompletionRequest,
|
|
289
|
-
llama_model: str,
|
|
290
|
-
) -> list[Message]:
|
|
291
|
-
"""Reads chat completion request and augments the messages to handle tools.
|
|
292
|
-
For eg. for llama_3_1, add system message with the appropriate tools or
|
|
293
|
-
add user messsage for custom tools, etc.
|
|
294
|
-
"""
|
|
295
|
-
assert llama_model is not None, "llama_model is required"
|
|
296
|
-
model = resolve_model(llama_model)
|
|
297
|
-
if model is None:
|
|
298
|
-
log.error(f"Could not resolve model {llama_model}")
|
|
299
|
-
return request.messages
|
|
300
|
-
|
|
301
|
-
allowed_models = supported_inference_models()
|
|
302
|
-
descriptors = [m.descriptor() for m in allowed_models]
|
|
303
|
-
if model.descriptor() not in descriptors:
|
|
304
|
-
log.error(f"Unsupported inference model? {model.descriptor()}")
|
|
305
|
-
return request.messages
|
|
306
|
-
|
|
307
|
-
if model.model_family == ModelFamily.llama3_1 or (
|
|
308
|
-
model.model_family == ModelFamily.llama3_2 and is_multimodal(model.core_model_id)
|
|
309
|
-
):
|
|
310
|
-
# llama3.1 and llama3.2 multimodal models follow the same tool prompt format
|
|
311
|
-
messages = augment_messages_for_tools_llama_3_1(request)
|
|
312
|
-
elif model.model_family in (
|
|
313
|
-
ModelFamily.llama3_2,
|
|
314
|
-
ModelFamily.llama3_3,
|
|
315
|
-
):
|
|
316
|
-
# llama3.2, llama3.3 follow the same tool prompt format
|
|
317
|
-
messages = augment_messages_for_tools_llama(request, PythonListCustomToolGenerator)
|
|
318
|
-
elif model.model_family == ModelFamily.llama4:
|
|
319
|
-
messages = augment_messages_for_tools_llama(request, PythonListCustomToolGeneratorLlama4)
|
|
320
|
-
else:
|
|
321
|
-
messages = request.messages
|
|
322
|
-
|
|
323
|
-
if fmt_prompt := response_format_prompt(request.response_format):
|
|
324
|
-
messages.append(UserMessage(content=fmt_prompt))
|
|
325
|
-
|
|
326
|
-
return messages
|
|
327
|
-
|
|
328
|
-
|
|
329
236
|
def response_format_prompt(fmt: ResponseFormat | None):
|
|
330
237
|
if not fmt:
|
|
331
238
|
return None
|
|
@@ -338,128 +245,6 @@ def response_format_prompt(fmt: ResponseFormat | None):
|
|
|
338
245
|
raise ValueError(f"Unknown response format {fmt.type}")
|
|
339
246
|
|
|
340
247
|
|
|
341
|
-
def augment_messages_for_tools_llama_3_1(
|
|
342
|
-
request: ChatCompletionRequest,
|
|
343
|
-
) -> list[Message]:
|
|
344
|
-
existing_messages = request.messages
|
|
345
|
-
existing_system_message = None
|
|
346
|
-
if existing_messages[0].role == Role.system.value:
|
|
347
|
-
existing_system_message = existing_messages.pop(0)
|
|
348
|
-
|
|
349
|
-
assert existing_messages[0].role != Role.system.value, "Should only have 1 system message"
|
|
350
|
-
|
|
351
|
-
messages = []
|
|
352
|
-
|
|
353
|
-
default_gen = SystemDefaultGenerator()
|
|
354
|
-
default_template = default_gen.gen()
|
|
355
|
-
|
|
356
|
-
sys_content = ""
|
|
357
|
-
|
|
358
|
-
tool_template = None
|
|
359
|
-
if request.tools:
|
|
360
|
-
tool_gen = BuiltinToolGenerator()
|
|
361
|
-
tool_template = tool_gen.gen(request.tools)
|
|
362
|
-
|
|
363
|
-
sys_content += tool_template.render()
|
|
364
|
-
sys_content += "\n"
|
|
365
|
-
|
|
366
|
-
sys_content += default_template.render()
|
|
367
|
-
|
|
368
|
-
if existing_system_message:
|
|
369
|
-
# TODO: this fn is needed in many places
|
|
370
|
-
def _process(c):
|
|
371
|
-
if isinstance(c, str):
|
|
372
|
-
return c
|
|
373
|
-
else:
|
|
374
|
-
return "<media>"
|
|
375
|
-
|
|
376
|
-
sys_content += "\n"
|
|
377
|
-
|
|
378
|
-
if isinstance(existing_system_message.content, str):
|
|
379
|
-
sys_content += _process(existing_system_message.content)
|
|
380
|
-
elif isinstance(existing_system_message.content, list):
|
|
381
|
-
sys_content += "\n".join([_process(c) for c in existing_system_message.content])
|
|
382
|
-
|
|
383
|
-
tool_choice_prompt = _get_tool_choice_prompt(request.tool_config.tool_choice, request.tools)
|
|
384
|
-
if tool_choice_prompt:
|
|
385
|
-
sys_content += "\n" + tool_choice_prompt
|
|
386
|
-
|
|
387
|
-
messages.append(SystemMessage(content=sys_content))
|
|
388
|
-
|
|
389
|
-
has_custom_tools = request.tools is not None and any(isinstance(dfn.tool_name, str) for dfn in request.tools)
|
|
390
|
-
if has_custom_tools:
|
|
391
|
-
fmt = request.tool_config.tool_prompt_format or ToolPromptFormat.json
|
|
392
|
-
if fmt == ToolPromptFormat.json:
|
|
393
|
-
tool_gen = JsonCustomToolGenerator()
|
|
394
|
-
elif fmt == ToolPromptFormat.function_tag:
|
|
395
|
-
tool_gen = FunctionTagCustomToolGenerator()
|
|
396
|
-
else:
|
|
397
|
-
raise ValueError(f"Non supported ToolPromptFormat {fmt}")
|
|
398
|
-
|
|
399
|
-
custom_tools = [t for t in request.tools if isinstance(t.tool_name, str)]
|
|
400
|
-
custom_template = tool_gen.gen(custom_tools)
|
|
401
|
-
messages.append(UserMessage(content=custom_template.render()))
|
|
402
|
-
|
|
403
|
-
# Add back existing messages from the request
|
|
404
|
-
messages += existing_messages
|
|
405
|
-
|
|
406
|
-
return messages
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
def augment_messages_for_tools_llama(
|
|
410
|
-
request: ChatCompletionRequest,
|
|
411
|
-
custom_tool_prompt_generator,
|
|
412
|
-
) -> list[Message]:
|
|
413
|
-
existing_messages = request.messages
|
|
414
|
-
existing_system_message = None
|
|
415
|
-
if existing_messages[0].role == Role.system.value:
|
|
416
|
-
existing_system_message = existing_messages.pop(0)
|
|
417
|
-
|
|
418
|
-
assert existing_messages[0].role != Role.system.value, "Should only have 1 system message"
|
|
419
|
-
|
|
420
|
-
sys_content = ""
|
|
421
|
-
custom_tools, builtin_tools = [], []
|
|
422
|
-
for t in request.tools:
|
|
423
|
-
if isinstance(t.tool_name, str):
|
|
424
|
-
custom_tools.append(t)
|
|
425
|
-
else:
|
|
426
|
-
builtin_tools.append(t)
|
|
427
|
-
|
|
428
|
-
if builtin_tools:
|
|
429
|
-
tool_gen = BuiltinToolGenerator()
|
|
430
|
-
tool_template = tool_gen.gen(builtin_tools)
|
|
431
|
-
|
|
432
|
-
sys_content += tool_template.render()
|
|
433
|
-
sys_content += "\n"
|
|
434
|
-
|
|
435
|
-
custom_tools = [dfn for dfn in request.tools if isinstance(dfn.tool_name, str)]
|
|
436
|
-
if custom_tools:
|
|
437
|
-
fmt = request.tool_config.tool_prompt_format or ToolPromptFormat.python_list
|
|
438
|
-
if fmt != ToolPromptFormat.python_list:
|
|
439
|
-
raise ValueError(f"Non supported ToolPromptFormat {request.tool_config.tool_prompt_format}")
|
|
440
|
-
|
|
441
|
-
system_prompt = None
|
|
442
|
-
if existing_system_message and request.tool_config.system_message_behavior == SystemMessageBehavior.replace:
|
|
443
|
-
system_prompt = existing_system_message.content
|
|
444
|
-
|
|
445
|
-
tool_template = custom_tool_prompt_generator().gen(custom_tools, system_prompt)
|
|
446
|
-
|
|
447
|
-
sys_content += tool_template.render()
|
|
448
|
-
sys_content += "\n"
|
|
449
|
-
|
|
450
|
-
if existing_system_message and (
|
|
451
|
-
request.tool_config.system_message_behavior == SystemMessageBehavior.append or not custom_tools
|
|
452
|
-
):
|
|
453
|
-
sys_content += interleaved_content_as_str(existing_system_message.content, sep="\n")
|
|
454
|
-
|
|
455
|
-
tool_choice_prompt = _get_tool_choice_prompt(request.tool_config.tool_choice, request.tools)
|
|
456
|
-
if tool_choice_prompt:
|
|
457
|
-
sys_content += "\n" + tool_choice_prompt
|
|
458
|
-
|
|
459
|
-
messages = [SystemMessage(content=sys_content.strip("\n")), *existing_messages]
|
|
460
|
-
return messages
|
|
461
|
-
|
|
462
|
-
|
|
463
248
|
def _get_tool_choice_prompt(tool_choice: ToolChoice | str, tools: list[ToolDefinition]) -> str:
|
|
464
249
|
if tool_choice == ToolChoice.auto:
|
|
465
250
|
return ""
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from collections.abc import AsyncIterator
|
|
8
|
+
|
|
9
|
+
from llama_stack.log import get_logger
|
|
10
|
+
|
|
11
|
+
log = get_logger(name=__name__, category="providers::utils")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def wrap_async_stream[T](stream: AsyncIterator[T]) -> AsyncIterator[T]:
|
|
15
|
+
"""
|
|
16
|
+
Wrap an async stream to ensure it returns a proper AsyncIterator.
|
|
17
|
+
"""
|
|
18
|
+
try:
|
|
19
|
+
async for item in stream:
|
|
20
|
+
yield item
|
|
21
|
+
except Exception as e:
|
|
22
|
+
log.error(f"Error in wrapped async stream: {e}")
|
|
23
|
+
raise
|