llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -17,41 +17,43 @@ providers:
|
|
|
17
17
|
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
|
18
18
|
provider_type: remote::cerebras
|
|
19
19
|
config:
|
|
20
|
-
base_url: https://api.cerebras.ai
|
|
20
|
+
base_url: https://api.cerebras.ai/v1
|
|
21
21
|
api_key: ${env.CEREBRAS_API_KEY:=}
|
|
22
22
|
- provider_id: ${env.OLLAMA_URL:+ollama}
|
|
23
23
|
provider_type: remote::ollama
|
|
24
24
|
config:
|
|
25
|
-
|
|
25
|
+
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
|
26
26
|
- provider_id: ${env.VLLM_URL:+vllm}
|
|
27
27
|
provider_type: remote::vllm
|
|
28
28
|
config:
|
|
29
|
-
|
|
29
|
+
base_url: ${env.VLLM_URL:=}
|
|
30
30
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
31
31
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
32
32
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
33
33
|
- provider_id: ${env.TGI_URL:+tgi}
|
|
34
34
|
provider_type: remote::tgi
|
|
35
35
|
config:
|
|
36
|
-
|
|
36
|
+
base_url: ${env.TGI_URL:=}
|
|
37
37
|
- provider_id: fireworks
|
|
38
38
|
provider_type: remote::fireworks
|
|
39
39
|
config:
|
|
40
|
-
|
|
40
|
+
base_url: https://api.fireworks.ai/inference/v1
|
|
41
41
|
api_key: ${env.FIREWORKS_API_KEY:=}
|
|
42
42
|
- provider_id: together
|
|
43
43
|
provider_type: remote::together
|
|
44
44
|
config:
|
|
45
|
-
|
|
45
|
+
base_url: https://api.together.xyz/v1
|
|
46
46
|
api_key: ${env.TOGETHER_API_KEY:=}
|
|
47
47
|
- provider_id: bedrock
|
|
48
48
|
provider_type: remote::bedrock
|
|
49
|
+
config:
|
|
50
|
+
api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
|
|
51
|
+
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
|
|
49
52
|
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
|
50
53
|
provider_type: remote::nvidia
|
|
51
54
|
config:
|
|
52
|
-
|
|
55
|
+
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
|
53
56
|
api_key: ${env.NVIDIA_API_KEY:=}
|
|
54
|
-
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
|
55
57
|
- provider_id: openai
|
|
56
58
|
provider_type: remote::openai
|
|
57
59
|
config:
|
|
@@ -73,18 +75,18 @@ providers:
|
|
|
73
75
|
- provider_id: groq
|
|
74
76
|
provider_type: remote::groq
|
|
75
77
|
config:
|
|
76
|
-
|
|
78
|
+
base_url: https://api.groq.com/openai/v1
|
|
77
79
|
api_key: ${env.GROQ_API_KEY:=}
|
|
78
80
|
- provider_id: sambanova
|
|
79
81
|
provider_type: remote::sambanova
|
|
80
82
|
config:
|
|
81
|
-
|
|
83
|
+
base_url: https://api.sambanova.ai/v1
|
|
82
84
|
api_key: ${env.SAMBANOVA_API_KEY:=}
|
|
83
85
|
- provider_id: ${env.AZURE_API_KEY:+azure}
|
|
84
86
|
provider_type: remote::azure
|
|
85
87
|
config:
|
|
86
88
|
api_key: ${env.AZURE_API_KEY:=}
|
|
87
|
-
|
|
89
|
+
base_url: ${env.AZURE_API_BASE:=}
|
|
88
90
|
api_version: ${env.AZURE_API_VERSION:=}
|
|
89
91
|
api_type: ${env.AZURE_API_TYPE:=}
|
|
90
92
|
- provider_id: sentence-transformers
|
|
@@ -259,6 +261,9 @@ storage:
|
|
|
259
261
|
conversations:
|
|
260
262
|
table_name: openai_conversations
|
|
261
263
|
backend: sql_default
|
|
264
|
+
prompts:
|
|
265
|
+
namespace: prompts
|
|
266
|
+
backend: kv_default
|
|
262
267
|
registered_resources:
|
|
263
268
|
models: []
|
|
264
269
|
shields:
|
|
@@ -279,10 +284,56 @@ registered_resources:
|
|
|
279
284
|
provider_id: rag-runtime
|
|
280
285
|
server:
|
|
281
286
|
port: 8321
|
|
282
|
-
telemetry:
|
|
283
|
-
enabled: true
|
|
284
287
|
vector_stores:
|
|
285
288
|
default_provider_id: faiss
|
|
286
289
|
default_embedding_model:
|
|
287
290
|
provider_id: sentence-transformers
|
|
288
291
|
model_id: nomic-ai/nomic-embed-text-v1.5
|
|
292
|
+
file_search_params:
|
|
293
|
+
header_template: 'knowledge_search tool found {num_chunks} chunks:
|
|
294
|
+
|
|
295
|
+
BEGIN of knowledge_search tool results.
|
|
296
|
+
|
|
297
|
+
'
|
|
298
|
+
footer_template: 'END of knowledge_search tool results.
|
|
299
|
+
|
|
300
|
+
'
|
|
301
|
+
context_prompt_params:
|
|
302
|
+
chunk_annotation_template: 'Result {index}
|
|
303
|
+
|
|
304
|
+
Content: {chunk.content}
|
|
305
|
+
|
|
306
|
+
Metadata: {metadata}
|
|
307
|
+
|
|
308
|
+
'
|
|
309
|
+
context_template: 'The above results were retrieved to help answer the user''s
|
|
310
|
+
query: "{query}". Use them as supporting information only in answering this
|
|
311
|
+
query. {annotation_instruction}
|
|
312
|
+
|
|
313
|
+
'
|
|
314
|
+
annotation_prompt_params:
|
|
315
|
+
enable_annotations: true
|
|
316
|
+
annotation_instruction_template: Cite sources immediately at the end of sentences
|
|
317
|
+
before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'.
|
|
318
|
+
Do not add extra punctuation. Use only the file IDs provided, do not invent
|
|
319
|
+
new ones.
|
|
320
|
+
chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
|
|
321
|
+
|
|
322
|
+
{chunk_text}
|
|
323
|
+
|
|
324
|
+
'
|
|
325
|
+
file_ingestion_params:
|
|
326
|
+
default_chunk_size_tokens: 512
|
|
327
|
+
default_chunk_overlap_tokens: 128
|
|
328
|
+
chunk_retrieval_params:
|
|
329
|
+
chunk_multiplier: 5
|
|
330
|
+
max_tokens_in_context: 4000
|
|
331
|
+
default_reranker_strategy: rrf
|
|
332
|
+
rrf_impact_factor: 60.0
|
|
333
|
+
weighted_search_alpha: 0.5
|
|
334
|
+
file_batch_params:
|
|
335
|
+
max_concurrent_files_per_batch: 3
|
|
336
|
+
file_batch_chunk_size: 10
|
|
337
|
+
cleanup_interval_seconds: 86400
|
|
338
|
+
safety:
|
|
339
|
+
default_shield_id: llama-guard
|
|
@@ -12,20 +12,16 @@ import rich
|
|
|
12
12
|
import yaml
|
|
13
13
|
from pydantic import BaseModel, Field
|
|
14
14
|
|
|
15
|
-
from llama_stack.apis.datasets import DatasetPurpose
|
|
16
|
-
from llama_stack.apis.models import ModelType
|
|
17
15
|
from llama_stack.core.datatypes import (
|
|
18
16
|
LLAMA_STACK_RUN_CONFIG_VERSION,
|
|
19
17
|
Api,
|
|
20
18
|
BenchmarkInput,
|
|
21
|
-
BuildConfig,
|
|
22
19
|
BuildProvider,
|
|
23
20
|
DatasetInput,
|
|
24
|
-
DistributionSpec,
|
|
25
21
|
ModelInput,
|
|
26
22
|
Provider,
|
|
23
|
+
SafetyConfig,
|
|
27
24
|
ShieldInput,
|
|
28
|
-
TelemetryConfig,
|
|
29
25
|
ToolGroupInput,
|
|
30
26
|
VectorStoresConfig,
|
|
31
27
|
)
|
|
@@ -36,13 +32,11 @@ from llama_stack.core.storage.datatypes import (
|
|
|
36
32
|
SqlStoreReference,
|
|
37
33
|
StorageBackendType,
|
|
38
34
|
)
|
|
35
|
+
from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig
|
|
36
|
+
from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig
|
|
39
37
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
|
40
|
-
from llama_stack.core.utils.image_types import LlamaStackImageType
|
|
41
38
|
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
|
42
|
-
from
|
|
43
|
-
from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
|
|
44
|
-
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
|
45
|
-
from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
|
|
39
|
+
from llama_stack_api import DatasetPurpose, ModelType
|
|
46
40
|
|
|
47
41
|
|
|
48
42
|
def filter_empty_values(obj: Any) -> Any:
|
|
@@ -188,7 +182,7 @@ class RunConfigSettings(BaseModel):
|
|
|
188
182
|
default_datasets: list[DatasetInput] | None = None
|
|
189
183
|
default_benchmarks: list[BenchmarkInput] | None = None
|
|
190
184
|
vector_stores_config: VectorStoresConfig | None = None
|
|
191
|
-
|
|
185
|
+
safety_config: SafetyConfig | None = None
|
|
192
186
|
storage_backends: dict[str, Any] | None = None
|
|
193
187
|
storage_stores: dict[str, Any] | None = None
|
|
194
188
|
|
|
@@ -257,6 +251,10 @@ class RunConfigSettings(BaseModel):
|
|
|
257
251
|
backend="sql_default",
|
|
258
252
|
table_name="openai_conversations",
|
|
259
253
|
).model_dump(exclude_none=True),
|
|
254
|
+
"prompts": KVStoreReference(
|
|
255
|
+
backend="kv_default",
|
|
256
|
+
namespace="prompts",
|
|
257
|
+
).model_dump(exclude_none=True),
|
|
260
258
|
}
|
|
261
259
|
|
|
262
260
|
storage_config = dict(
|
|
@@ -284,12 +282,14 @@ class RunConfigSettings(BaseModel):
|
|
|
284
282
|
"server": {
|
|
285
283
|
"port": 8321,
|
|
286
284
|
},
|
|
287
|
-
"telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
|
|
288
285
|
}
|
|
289
286
|
|
|
290
287
|
if self.vector_stores_config:
|
|
291
288
|
config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True)
|
|
292
289
|
|
|
290
|
+
if self.safety_config:
|
|
291
|
+
config["safety"] = self.safety_config.model_dump(exclude_none=True)
|
|
292
|
+
|
|
293
293
|
return config
|
|
294
294
|
|
|
295
295
|
|
|
@@ -314,55 +314,6 @@ class DistributionTemplate(BaseModel):
|
|
|
314
314
|
|
|
315
315
|
available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None
|
|
316
316
|
|
|
317
|
-
# we may want to specify additional pip packages without necessarily indicating a
|
|
318
|
-
# specific "default" inference store (which is what typically used to dictate additional
|
|
319
|
-
# pip packages)
|
|
320
|
-
additional_pip_packages: list[str] | None = None
|
|
321
|
-
|
|
322
|
-
def build_config(self) -> BuildConfig:
|
|
323
|
-
additional_pip_packages: list[str] = []
|
|
324
|
-
for run_config in self.run_configs.values():
|
|
325
|
-
run_config_ = run_config.run_config(self.name, self.providers, self.container_image)
|
|
326
|
-
|
|
327
|
-
# TODO: This is a hack to get the dependencies for internal APIs into build
|
|
328
|
-
# We should have a better way to do this by formalizing the concept of "internal" APIs
|
|
329
|
-
# and providers, with a way to specify dependencies for them.
|
|
330
|
-
|
|
331
|
-
storage_cfg = run_config_.get("storage", {})
|
|
332
|
-
for backend_cfg in storage_cfg.get("backends", {}).values():
|
|
333
|
-
store_type = backend_cfg.get("type")
|
|
334
|
-
if not store_type:
|
|
335
|
-
continue
|
|
336
|
-
if str(store_type).startswith("kv_"):
|
|
337
|
-
additional_pip_packages.extend(get_kv_pip_packages(backend_cfg))
|
|
338
|
-
elif str(store_type).startswith("sql_"):
|
|
339
|
-
additional_pip_packages.extend(get_sql_pip_packages(backend_cfg))
|
|
340
|
-
|
|
341
|
-
if self.additional_pip_packages:
|
|
342
|
-
additional_pip_packages.extend(self.additional_pip_packages)
|
|
343
|
-
|
|
344
|
-
# Create minimal providers for build config (without runtime configs)
|
|
345
|
-
build_providers = {}
|
|
346
|
-
for api, providers in self.providers.items():
|
|
347
|
-
build_providers[api] = []
|
|
348
|
-
for provider in providers:
|
|
349
|
-
# Create a minimal build provider object with only essential build information
|
|
350
|
-
build_provider = BuildProvider(
|
|
351
|
-
provider_type=provider.provider_type,
|
|
352
|
-
module=provider.module,
|
|
353
|
-
)
|
|
354
|
-
build_providers[api].append(build_provider)
|
|
355
|
-
|
|
356
|
-
return BuildConfig(
|
|
357
|
-
distribution_spec=DistributionSpec(
|
|
358
|
-
description=self.description,
|
|
359
|
-
container_image=self.container_image,
|
|
360
|
-
providers=build_providers,
|
|
361
|
-
),
|
|
362
|
-
image_type=LlamaStackImageType.VENV.value, # default to venv
|
|
363
|
-
additional_pip_packages=sorted(set(additional_pip_packages)),
|
|
364
|
-
)
|
|
365
|
-
|
|
366
317
|
def generate_markdown_docs(self) -> str:
|
|
367
318
|
providers_table = "| API | Provider(s) |\n"
|
|
368
319
|
providers_table += "|-----|-------------|\n"
|
|
@@ -415,6 +366,7 @@ class DistributionTemplate(BaseModel):
|
|
|
415
366
|
providers_table=providers_table,
|
|
416
367
|
run_config_env_vars=self.run_config_env_vars,
|
|
417
368
|
default_models=default_models,
|
|
369
|
+
run_configs=list(self.run_configs.keys()),
|
|
418
370
|
)
|
|
419
371
|
return ""
|
|
420
372
|
|
|
@@ -433,14 +385,6 @@ class DistributionTemplate(BaseModel):
|
|
|
433
385
|
for output_dir in [yaml_output_dir, doc_output_dir]:
|
|
434
386
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
435
387
|
|
|
436
|
-
build_config = self.build_config()
|
|
437
|
-
with open(yaml_output_dir / "build.yaml", "w") as f:
|
|
438
|
-
yaml.safe_dump(
|
|
439
|
-
filter_empty_values(build_config.model_dump(exclude_none=True)),
|
|
440
|
-
f,
|
|
441
|
-
sort_keys=False,
|
|
442
|
-
)
|
|
443
|
-
|
|
444
388
|
for yaml_pth, settings in self.run_configs.items():
|
|
445
389
|
run_config = settings.run_config(self.name, self.providers, self.container_image)
|
|
446
390
|
with open(yaml_output_dir / yaml_pth, "w") as f:
|
|
@@ -15,7 +15,7 @@ providers:
|
|
|
15
15
|
- provider_id: watsonx
|
|
16
16
|
provider_type: remote::watsonx
|
|
17
17
|
config:
|
|
18
|
-
|
|
18
|
+
base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
|
19
19
|
api_key: ${env.WATSONX_API_KEY:=}
|
|
20
20
|
project_id: ${env.WATSONX_PROJECT_ID:=}
|
|
21
21
|
vector_io:
|
|
@@ -115,6 +115,9 @@ storage:
|
|
|
115
115
|
conversations:
|
|
116
116
|
table_name: openai_conversations
|
|
117
117
|
backend: sql_default
|
|
118
|
+
prompts:
|
|
119
|
+
namespace: prompts
|
|
120
|
+
backend: kv_default
|
|
118
121
|
registered_resources:
|
|
119
122
|
models: []
|
|
120
123
|
shields: []
|
|
@@ -129,5 +132,3 @@ registered_resources:
|
|
|
129
132
|
provider_id: rag-runtime
|
|
130
133
|
server:
|
|
131
134
|
port: 8321
|
|
132
|
-
telemetry:
|
|
133
|
-
enabled: true
|
|
@@ -69,7 +69,7 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
|
|
69
69
|
template_path=None,
|
|
70
70
|
providers=providers,
|
|
71
71
|
run_configs={
|
|
72
|
-
"
|
|
72
|
+
"config.yaml": RunConfigSettings(
|
|
73
73
|
provider_overrides={
|
|
74
74
|
"inference": [inference_provider],
|
|
75
75
|
"files": [files_provider],
|
llama_stack/log.py
CHANGED
|
@@ -9,15 +9,23 @@ import os
|
|
|
9
9
|
import re
|
|
10
10
|
from logging.config import dictConfig # allow-direct-logging
|
|
11
11
|
|
|
12
|
+
from pydantic import BaseModel, Field
|
|
12
13
|
from rich.console import Console
|
|
13
14
|
from rich.errors import MarkupError
|
|
14
15
|
from rich.logging import RichHandler
|
|
15
16
|
|
|
16
|
-
from llama_stack.core.datatypes import LoggingConfig
|
|
17
|
-
|
|
18
17
|
# Default log level
|
|
19
18
|
DEFAULT_LOG_LEVEL = logging.INFO
|
|
20
19
|
|
|
20
|
+
|
|
21
|
+
class LoggingConfig(BaseModel):
|
|
22
|
+
category_levels: dict[str, str] = Field(
|
|
23
|
+
default_factory=dict,
|
|
24
|
+
description="""
|
|
25
|
+
Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
21
29
|
# Predefined categories
|
|
22
30
|
CATEGORIES = [
|
|
23
31
|
"core",
|
|
@@ -29,7 +37,6 @@ CATEGORIES = [
|
|
|
29
37
|
"eval",
|
|
30
38
|
"tools",
|
|
31
39
|
"client",
|
|
32
|
-
"telemetry",
|
|
33
40
|
"openai",
|
|
34
41
|
"openai_responses",
|
|
35
42
|
"openai_conversations",
|
|
@@ -37,6 +44,7 @@ CATEGORIES = [
|
|
|
37
44
|
"providers",
|
|
38
45
|
"models",
|
|
39
46
|
"files",
|
|
47
|
+
"file_processors",
|
|
40
48
|
"vector_io",
|
|
41
49
|
"tool_runtime",
|
|
42
50
|
"cli",
|
|
@@ -84,10 +92,10 @@ def config_to_category_levels(category: str, level: str):
|
|
|
84
92
|
|
|
85
93
|
def parse_yaml_config(yaml_config: LoggingConfig) -> dict[str, int]:
|
|
86
94
|
"""
|
|
87
|
-
Helper function to parse a yaml logging configuration found in the
|
|
95
|
+
Helper function to parse a yaml logging configuration found in the config.yaml
|
|
88
96
|
|
|
89
97
|
Parameters:
|
|
90
|
-
yaml_config (Logging): the logger config object found in the
|
|
98
|
+
yaml_config (Logging): the logger config object found in the config.yaml
|
|
91
99
|
|
|
92
100
|
Returns:
|
|
93
101
|
Dict[str, int]: A dictionary mapping categories to their log levels.
|
|
@@ -137,7 +145,8 @@ class CustomRichHandler(RichHandler):
|
|
|
137
145
|
# Set a reasonable default width for console output, especially when redirected to files
|
|
138
146
|
console_width = int(os.environ.get("LLAMA_STACK_LOG_WIDTH", "120"))
|
|
139
147
|
# Don't force terminal codes to avoid ANSI escape codes in log files
|
|
140
|
-
|
|
148
|
+
# Ensure logs go to stderr, not stdout
|
|
149
|
+
kwargs["console"] = Console(width=console_width, stderr=True)
|
|
141
150
|
super().__init__(*args, **kwargs)
|
|
142
151
|
|
|
143
152
|
def emit(self, record):
|
|
@@ -177,6 +186,7 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str |
|
|
|
177
186
|
log_file (str | None): Path to a log file to additionally pipe the logs into.
|
|
178
187
|
If None, reads from LLAMA_STACK_LOG_FILE environment variable.
|
|
179
188
|
"""
|
|
189
|
+
global _category_levels
|
|
180
190
|
# Read from environment variables if not explicitly provided
|
|
181
191
|
if category_levels is None:
|
|
182
192
|
category_levels = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
|
|
@@ -184,6 +194,9 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str |
|
|
|
184
194
|
if env_config:
|
|
185
195
|
category_levels.update(parse_environment_config(env_config))
|
|
186
196
|
|
|
197
|
+
# Update the module-level _category_levels so that already-created loggers pick up the new levels
|
|
198
|
+
_category_levels.update(category_levels)
|
|
199
|
+
|
|
187
200
|
if log_file is None:
|
|
188
201
|
log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
|
|
189
202
|
log_format = "%(asctime)s %(name)s:%(lineno)d %(category)s: %(message)s"
|
|
@@ -268,14 +281,18 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str |
|
|
|
268
281
|
}
|
|
269
282
|
dictConfig(logging_config)
|
|
270
283
|
|
|
271
|
-
#
|
|
272
|
-
# already-configured loggers (e.g., uvicorn) and our own llama_stack loggers
|
|
284
|
+
# Update log levels for all loggers that were created before setup_logging was called
|
|
273
285
|
for name, logger in logging.root.manager.loggerDict.items():
|
|
274
286
|
if isinstance(logger, logging.Logger):
|
|
275
|
-
# Skip infrastructure loggers (uvicorn, fastapi)
|
|
276
|
-
if name.startswith(("uvicorn", "fastapi"
|
|
287
|
+
# Skip infrastructure loggers (uvicorn, fastapi) to preserve their configured levels
|
|
288
|
+
if name.startswith(("uvicorn", "fastapi")):
|
|
277
289
|
continue
|
|
278
|
-
|
|
290
|
+
# Update llama_stack loggers if root level was explicitly set (e.g., via all=CRITICAL)
|
|
291
|
+
if name.startswith("llama_stack") and "root" in category_levels:
|
|
292
|
+
logger.setLevel(root_level)
|
|
293
|
+
# Update third-party library loggers
|
|
294
|
+
elif not name.startswith("llama_stack"):
|
|
295
|
+
logger.setLevel(root_level)
|
|
279
296
|
|
|
280
297
|
|
|
281
298
|
def get_logger(
|
|
@@ -38,18 +38,18 @@ def maybe_reshard_state_dict(
|
|
|
38
38
|
mmap: bool = True,
|
|
39
39
|
) -> dict[str, torch.Tensor]:
|
|
40
40
|
if str(map_location) == "cpu":
|
|
41
|
-
torch.
|
|
41
|
+
torch.set_default_dtype(torch.bfloat16)
|
|
42
42
|
else:
|
|
43
|
-
torch.
|
|
43
|
+
torch.set_default_dtype(torch.bfloat16)
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
ckpt_paths_array = np.array(sorted(ckpt_paths))
|
|
46
46
|
|
|
47
47
|
new_mp_size, new_mp_rank = get_model_parallel_world_size(), get_model_parallel_rank()
|
|
48
|
-
old_mp_size = len(
|
|
48
|
+
old_mp_size = len(ckpt_paths_array)
|
|
49
49
|
old_mp_ranks = map_mp_rank(old_mp_size, new_mp_size, new_mp_rank)
|
|
50
50
|
|
|
51
|
-
print(f"Loading checkpoint shards:\n{str(
|
|
52
|
-
paths =
|
|
51
|
+
print(f"Loading checkpoint shards:\n{str(ckpt_paths_array[old_mp_ranks])}") # type: ignore
|
|
52
|
+
paths = ckpt_paths_array[old_mp_ranks] # type: ignore
|
|
53
53
|
state_dicts = [torch.load(str(p), map_location=map_location, mmap=mmap) for p in paths]
|
|
54
54
|
|
|
55
55
|
if new_mp_size == old_mp_size:
|
|
@@ -79,6 +79,8 @@ def add_hadamard_transform_for_spinquant(model: torch.nn.Module, prefix: str = "
|
|
|
79
79
|
for module_name, module in model.named_children():
|
|
80
80
|
child_full_name = prefix + "." + module_name
|
|
81
81
|
if re.search(pattern_last_linear_ffn, child_full_name):
|
|
82
|
+
# Module matching this pattern should be nn.Linear with in_features
|
|
83
|
+
assert isinstance(module, nn.Linear), f"Expected nn.Linear, got {type(module)}"
|
|
82
84
|
new_module = nn.Sequential(HadamardModule(group_size=module.in_features), module)
|
|
83
85
|
del module
|
|
84
86
|
setattr(model, module_name, new_module)
|
|
@@ -26,8 +26,10 @@ from fairscale.nn.model_parallel.initialize import (
|
|
|
26
26
|
)
|
|
27
27
|
from termcolor import cprint
|
|
28
28
|
|
|
29
|
+
from llama_stack.models.llama.datatypes import ToolPromptFormat
|
|
30
|
+
|
|
29
31
|
from ..checkpoint import maybe_reshard_state_dict
|
|
30
|
-
from ..datatypes import GenerationResult, QuantizationMode, RawContent, RawMessage
|
|
32
|
+
from ..datatypes import GenerationResult, QuantizationMode, RawContent, RawMessage
|
|
31
33
|
from .args import ModelArgs
|
|
32
34
|
from .chat_format import ChatFormat, LLMInput
|
|
33
35
|
from .model import Transformer
|
|
@@ -15,13 +15,10 @@ from pathlib import Path
|
|
|
15
15
|
|
|
16
16
|
from termcolor import colored
|
|
17
17
|
|
|
18
|
+
from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall, ToolDefinition, ToolPromptFormat
|
|
19
|
+
|
|
18
20
|
from ..datatypes import (
|
|
19
|
-
BuiltinTool,
|
|
20
21
|
RawMessage,
|
|
21
|
-
StopReason,
|
|
22
|
-
ToolCall,
|
|
23
|
-
ToolDefinition,
|
|
24
|
-
ToolPromptFormat,
|
|
25
22
|
)
|
|
26
23
|
from . import template_data
|
|
27
24
|
from .chat_format import ChatFormat
|
|
@@ -141,15 +141,15 @@ def build_encoder_attention_mask(
|
|
|
141
141
|
"""
|
|
142
142
|
Build vision encoder attention mask that omits padding tokens.
|
|
143
143
|
"""
|
|
144
|
-
|
|
144
|
+
masks_list: list[torch.Tensor] = []
|
|
145
145
|
for arx in ar:
|
|
146
146
|
mask_i = torch.ones((num_chunks, x.shape[2], 1), dtype=x.dtype)
|
|
147
147
|
mask_i[: arx[0] * arx[1], :ntok] = 0
|
|
148
148
|
mask_i = mask_i.view(num_chunks * x.shape[2], -1)
|
|
149
149
|
mask_i = mask_i @ mask_i.T * get_negative_inf_value(x.dtype)
|
|
150
150
|
mask_i = mask_i.unsqueeze(0)
|
|
151
|
-
|
|
152
|
-
masks = torch.stack(
|
|
151
|
+
masks_list.append(mask_i)
|
|
152
|
+
masks = torch.stack(masks_list).to(x.device).expand(-1, n_heads, -1, -1)
|
|
153
153
|
return masks
|
|
154
154
|
|
|
155
155
|
|
|
@@ -95,7 +95,7 @@ class VariableSizeImageTransform:
|
|
|
95
95
|
factors_set.add(n // i)
|
|
96
96
|
return factors_set
|
|
97
97
|
|
|
98
|
-
def find_supported_resolutions(self, max_num_chunks: int, patch_size: int) ->
|
|
98
|
+
def find_supported_resolutions(self, max_num_chunks: int, patch_size: int) -> list[tuple[int, int]]:
|
|
99
99
|
"""
|
|
100
100
|
Computes all of the allowed resoltuions for a fixed number of chunks
|
|
101
101
|
and patch_size. Useful for when dividing an image into chunks.
|
|
@@ -198,10 +198,10 @@ class VariableSizeImageTransform:
|
|
|
198
198
|
|
|
199
199
|
def resize_without_distortion(
|
|
200
200
|
self,
|
|
201
|
-
image:
|
|
201
|
+
image: Image.Image,
|
|
202
202
|
target_size: tuple[int, int],
|
|
203
203
|
max_upscaling_size: int | None,
|
|
204
|
-
) ->
|
|
204
|
+
) -> Image.Image:
|
|
205
205
|
"""
|
|
206
206
|
Used to resize an image to target_resolution, without distortion.
|
|
207
207
|
|
|
@@ -380,12 +380,12 @@ class VariableSizeImageTransform:
|
|
|
380
380
|
assert isinstance(image, Image.Image), type(image)
|
|
381
381
|
w, h = image.size
|
|
382
382
|
|
|
383
|
-
|
|
384
|
-
|
|
383
|
+
possible_resolutions_list = self.find_supported_resolutions(max_num_chunks=max_num_chunks, patch_size=self.size)
|
|
384
|
+
possible_resolutions_tensor = torch.tensor(possible_resolutions_list)
|
|
385
385
|
|
|
386
386
|
best_resolution = self.get_best_fit(
|
|
387
387
|
image_size=(w, h),
|
|
388
|
-
possible_resolutions=
|
|
388
|
+
possible_resolutions=possible_resolutions_tensor,
|
|
389
389
|
resize_to_max_canvas=resize_to_max_canvas,
|
|
390
390
|
)
|
|
391
391
|
|
|
@@ -8,8 +8,9 @@ import json
|
|
|
8
8
|
import re
|
|
9
9
|
|
|
10
10
|
from llama_stack.log import get_logger
|
|
11
|
+
from llama_stack.models.llama.datatypes import BuiltinTool, ToolCall, ToolPromptFormat
|
|
11
12
|
|
|
12
|
-
from ..datatypes import
|
|
13
|
+
from ..datatypes import RecursiveType
|
|
13
14
|
|
|
14
15
|
logger = get_logger(name=__name__, category="models::llama")
|
|
15
16
|
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
|
|
14
14
|
import textwrap
|
|
15
15
|
|
|
16
|
-
from llama_stack.
|
|
16
|
+
from llama_stack.models.llama.datatypes import ToolDefinition
|
|
17
17
|
from llama_stack.models.llama.llama3.prompt_templates.base import (
|
|
18
18
|
PromptTemplate,
|
|
19
19
|
PromptTemplateGeneratorBase,
|
|
@@ -15,7 +15,6 @@ async def get_provider_impl(
|
|
|
15
15
|
config: MetaReferenceAgentsImplConfig,
|
|
16
16
|
deps: dict[Api, Any],
|
|
17
17
|
policy: list[AccessRule],
|
|
18
|
-
telemetry_enabled: bool = False,
|
|
19
18
|
):
|
|
20
19
|
from .agents import MetaReferenceAgentsImpl
|
|
21
20
|
|
|
@@ -23,12 +22,13 @@ async def get_provider_impl(
|
|
|
23
22
|
config,
|
|
24
23
|
deps[Api.inference],
|
|
25
24
|
deps[Api.vector_io],
|
|
26
|
-
deps
|
|
25
|
+
deps.get(Api.safety),
|
|
27
26
|
deps[Api.tool_runtime],
|
|
28
27
|
deps[Api.tool_groups],
|
|
29
28
|
deps[Api.conversations],
|
|
29
|
+
deps[Api.prompts],
|
|
30
|
+
deps[Api.files],
|
|
30
31
|
policy,
|
|
31
|
-
telemetry_enabled,
|
|
32
32
|
)
|
|
33
33
|
await impl.initialize()
|
|
34
34
|
return impl
|