llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +201 -58
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +43 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +40 -33
- llama_stack/providers/registry/agents.py +7 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +125 -20
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +27 -21
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +26 -18
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +141 -24
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +26 -21
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/METADATA +56 -54
- llama_stack-0.4.0.dist-info/RECORD +588 -0
- llama_stack-0.4.0.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
llama_stack/core/utils/config.py
CHANGED
|
@@ -9,7 +9,10 @@ from typing import Any
|
|
|
9
9
|
|
|
10
10
|
def redact_sensitive_fields(data: dict[str, Any]) -> dict[str, Any]:
|
|
11
11
|
"""Redact sensitive information from config before printing."""
|
|
12
|
-
sensitive_patterns = ["api_key", "api_token", "password", "secret"]
|
|
12
|
+
sensitive_patterns = ["api_key", "api_token", "password", "secret", "token"]
|
|
13
|
+
|
|
14
|
+
# Specific configuration field names that should NOT be redacted despite containing "token"
|
|
15
|
+
safe_token_fields = ["chunk_size_tokens", "max_tokens", "default_chunk_overlap_tokens"]
|
|
13
16
|
|
|
14
17
|
def _redact_value(v: Any) -> Any:
|
|
15
18
|
if isinstance(v, dict):
|
|
@@ -21,7 +24,10 @@ def redact_sensitive_fields(data: dict[str, Any]) -> dict[str, Any]:
|
|
|
21
24
|
def _redact_dict(d: dict[str, Any]) -> dict[str, Any]:
|
|
22
25
|
result = {}
|
|
23
26
|
for k, v in d.items():
|
|
24
|
-
|
|
27
|
+
# Don't redact if it's a safe field
|
|
28
|
+
if any(safe_field in k.lower() for safe_field in safe_token_fields):
|
|
29
|
+
result[k] = _redact_value(v)
|
|
30
|
+
elif any(pattern in k.lower() for pattern in sensitive_patterns):
|
|
25
31
|
result[k] = "********"
|
|
26
32
|
else:
|
|
27
33
|
result[k] = _redact_value(v)
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from enum import StrEnum
|
|
8
7
|
from pathlib import Path
|
|
9
8
|
|
|
10
9
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
|
@@ -16,21 +15,14 @@ logger = get_logger(name=__name__, category="core")
|
|
|
16
15
|
DISTRO_DIR = Path(__file__).parent.parent.parent.parent / "llama_stack" / "distributions"
|
|
17
16
|
|
|
18
17
|
|
|
19
|
-
class Mode(StrEnum):
|
|
20
|
-
RUN = "run"
|
|
21
|
-
BUILD = "build"
|
|
22
|
-
|
|
23
|
-
|
|
24
18
|
def resolve_config_or_distro(
|
|
25
19
|
config_or_distro: str,
|
|
26
|
-
mode: Mode = Mode.RUN,
|
|
27
20
|
) -> Path:
|
|
28
21
|
"""
|
|
29
22
|
Resolve a config/distro argument to a concrete config file path.
|
|
30
23
|
|
|
31
24
|
Args:
|
|
32
25
|
config_or_distro: User input (file path, distribution name, or built distribution)
|
|
33
|
-
mode: Mode resolving for ("run", "build", "server")
|
|
34
26
|
|
|
35
27
|
Returns:
|
|
36
28
|
Path to the resolved config file
|
|
@@ -47,38 +39,50 @@ def resolve_config_or_distro(
|
|
|
47
39
|
|
|
48
40
|
# Strategy 2: Try as distribution name (if no .yaml extension)
|
|
49
41
|
if not config_or_distro.endswith(".yaml"):
|
|
50
|
-
distro_config = _get_distro_config_path(config_or_distro
|
|
42
|
+
distro_config = _get_distro_config_path(config_or_distro)
|
|
51
43
|
if distro_config.exists():
|
|
52
44
|
logger.debug(f"Using distribution: {distro_config}")
|
|
53
45
|
return distro_config
|
|
54
46
|
|
|
55
|
-
# Strategy 3: Try as
|
|
56
|
-
|
|
47
|
+
# Strategy 3: Try as distro config path (if no .yaml extension and contains a slash)
|
|
48
|
+
# eg: starter::run-with-postgres-store.yaml
|
|
49
|
+
# Use :: to avoid slash and confusion with a filesystem path
|
|
50
|
+
if "::" in config_or_distro:
|
|
51
|
+
distro_name, config_name = config_or_distro.split("::")
|
|
52
|
+
distro_config = _get_distro_config_path(distro_name, config_name)
|
|
53
|
+
if distro_config.exists():
|
|
54
|
+
logger.info(f"Using distribution: {distro_config}")
|
|
55
|
+
return distro_config
|
|
56
|
+
|
|
57
|
+
# Strategy 4: Try as built distribution name
|
|
58
|
+
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-config.yaml"
|
|
57
59
|
if distrib_config.exists():
|
|
58
60
|
logger.debug(f"Using built distribution: {distrib_config}")
|
|
59
61
|
return distrib_config
|
|
60
62
|
|
|
61
|
-
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" /
|
|
63
|
+
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / "config.yaml"
|
|
62
64
|
if distrib_config.exists():
|
|
63
65
|
logger.debug(f"Using built distribution: {distrib_config}")
|
|
64
66
|
return distrib_config
|
|
65
67
|
|
|
66
|
-
# Strategy
|
|
67
|
-
raise ValueError(_format_resolution_error(config_or_distro
|
|
68
|
+
# Strategy 5: Failed - provide helpful error
|
|
69
|
+
raise ValueError(_format_resolution_error(config_or_distro))
|
|
68
70
|
|
|
69
71
|
|
|
70
|
-
def _get_distro_config_path(distro_name: str,
|
|
72
|
+
def _get_distro_config_path(distro_name: str, path: str | None = None) -> Path:
|
|
71
73
|
"""Get the config file path for a distro."""
|
|
72
|
-
|
|
74
|
+
if not path or not path.endswith(".yaml"):
|
|
75
|
+
path = "config.yaml"
|
|
76
|
+
return DISTRO_DIR / distro_name / path
|
|
73
77
|
|
|
74
78
|
|
|
75
|
-
def _format_resolution_error(config_or_distro: str
|
|
79
|
+
def _format_resolution_error(config_or_distro: str) -> str:
|
|
76
80
|
"""Format a helpful error message for resolution failures."""
|
|
77
81
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
|
78
82
|
|
|
79
|
-
distro_path = _get_distro_config_path(config_or_distro
|
|
80
|
-
distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-
|
|
81
|
-
distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-
|
|
83
|
+
distro_path = _get_distro_config_path(config_or_distro)
|
|
84
|
+
distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-config.yaml"
|
|
85
|
+
distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-config.yaml"
|
|
82
86
|
|
|
83
87
|
available_distros = _get_available_distros()
|
|
84
88
|
distros_str = ", ".join(available_distros) if available_distros else "none found"
|
|
@@ -99,15 +103,14 @@ Did you mean one of these distributions?
|
|
|
99
103
|
|
|
100
104
|
def _get_available_distros() -> list[str]:
|
|
101
105
|
"""Get list of available distro names."""
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
)
|
|
106
|
+
|
|
107
|
+
distros = []
|
|
108
|
+
if DISTRO_DIR.exists():
|
|
109
|
+
distros.extend([d.name for d in DISTRO_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")])
|
|
110
|
+
if DISTRIBS_BASE_DIR.exists():
|
|
111
|
+
distros.extend([d.name for d in DISTRIBS_BASE_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")])
|
|
112
|
+
|
|
113
|
+
return list(set(distros))
|
|
111
114
|
|
|
112
115
|
|
|
113
116
|
def _format_distro_suggestions(distros: list[str], user_input: str) -> str:
|
|
@@ -7,8 +7,6 @@
|
|
|
7
7
|
from collections.abc import AsyncGenerator
|
|
8
8
|
from contextvars import ContextVar
|
|
9
9
|
|
|
10
|
-
from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT
|
|
11
|
-
|
|
12
10
|
_MISSING = object()
|
|
13
11
|
|
|
14
12
|
|
|
@@ -69,16 +67,12 @@ def preserve_contexts_async_generator[T](
|
|
|
69
67
|
try:
|
|
70
68
|
yield item
|
|
71
69
|
# Update our tracked values with any changes made during this iteration
|
|
72
|
-
#
|
|
73
|
-
# to allow nested span tracking for telemetry
|
|
70
|
+
# This allows context changes to persist across generator iterations
|
|
74
71
|
for context_var in context_vars:
|
|
75
|
-
|
|
76
|
-
initial_context_values[context_var.name] = context_var.get()
|
|
72
|
+
initial_context_values[context_var.name] = context_var.get()
|
|
77
73
|
finally:
|
|
78
|
-
# Restore
|
|
79
|
-
# CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
|
|
74
|
+
# Restore context vars after each yield to prevent leaks between requests
|
|
80
75
|
for context_var in context_vars:
|
|
81
|
-
|
|
82
|
-
_restore_context_var(context_var)
|
|
76
|
+
_restore_context_var(context_var)
|
|
83
77
|
|
|
84
78
|
return wrapper()
|
llama_stack/core/utils/exec.py
CHANGED
|
@@ -84,6 +84,15 @@ def run_command(command: list[str]) -> int:
|
|
|
84
84
|
text=True,
|
|
85
85
|
check=False,
|
|
86
86
|
)
|
|
87
|
+
|
|
88
|
+
# Print stdout and stderr if command failed
|
|
89
|
+
if result.returncode != 0:
|
|
90
|
+
log.error(f"Command {' '.join(command)} failed with returncode {result.returncode}")
|
|
91
|
+
if result.stdout:
|
|
92
|
+
log.error(f"STDOUT: {result.stdout}")
|
|
93
|
+
if result.stderr:
|
|
94
|
+
log.error(f"STDERR: {result.stderr}")
|
|
95
|
+
|
|
87
96
|
return result.returncode
|
|
88
97
|
except subprocess.SubprocessError as e:
|
|
89
98
|
log.error(f"Subprocess error: {e}")
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
Utility functions for type inspection and parameter handling.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import inspect
|
|
12
|
+
import typing
|
|
13
|
+
from typing import Any, get_args, get_origin
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel
|
|
16
|
+
from pydantic.fields import FieldInfo
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def is_unwrapped_body_param(param_type: Any) -> bool:
|
|
20
|
+
"""
|
|
21
|
+
Check if a parameter type represents an unwrapped body parameter.
|
|
22
|
+
An unwrapped body parameter is an Annotated type with Body(embed=False)
|
|
23
|
+
|
|
24
|
+
This is used to determine whether request parameters should be flattened
|
|
25
|
+
in OpenAPI specs and client libraries (matching FastAPI's embed=False behavior).
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
param_type: The parameter type annotation to check
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
True if the parameter should be treated as an unwrapped body parameter
|
|
32
|
+
"""
|
|
33
|
+
# Check if it's Annotated with Body(embed=False)
|
|
34
|
+
if get_origin(param_type) is typing.Annotated:
|
|
35
|
+
args = get_args(param_type)
|
|
36
|
+
base_type = args[0]
|
|
37
|
+
metadata = args[1:]
|
|
38
|
+
|
|
39
|
+
# Look for Body annotation with embed=False
|
|
40
|
+
# Body() returns a FieldInfo object, so we check for that type and the embed attribute
|
|
41
|
+
for item in metadata:
|
|
42
|
+
if isinstance(item, FieldInfo) and hasattr(item, "embed") and not item.embed:
|
|
43
|
+
return inspect.isclass(base_type) and issubclass(base_type, BaseModel)
|
|
44
|
+
|
|
45
|
+
return False
|
|
@@ -105,6 +105,9 @@ storage:
|
|
|
105
105
|
conversations:
|
|
106
106
|
table_name: openai_conversations
|
|
107
107
|
backend: sql_default
|
|
108
|
+
prompts:
|
|
109
|
+
namespace: prompts
|
|
110
|
+
backend: kv_default
|
|
108
111
|
registered_resources:
|
|
109
112
|
models:
|
|
110
113
|
- metadata: {}
|
|
@@ -128,5 +131,3 @@ registered_resources:
|
|
|
128
131
|
provider_id: rag-runtime
|
|
129
132
|
server:
|
|
130
133
|
port: 8321
|
|
131
|
-
telemetry:
|
|
132
|
-
enabled: true
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from llama_stack.apis.models import ModelType
|
|
8
7
|
from llama_stack.core.datatypes import (
|
|
9
8
|
BuildProvider,
|
|
10
9
|
ModelInput,
|
|
@@ -17,6 +16,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
|
|
17
16
|
SentenceTransformersInferenceConfig,
|
|
18
17
|
)
|
|
19
18
|
from llama_stack.providers.remote.vector_io.chroma import ChromaVectorIOConfig
|
|
19
|
+
from llama_stack_api import ModelType
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def get_distribution_template() -> DistributionTemplate:
|
|
@@ -111,7 +111,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|
|
111
111
|
container_image=None,
|
|
112
112
|
providers=providers,
|
|
113
113
|
run_configs={
|
|
114
|
-
"
|
|
114
|
+
"config.yaml": RunConfigSettings(
|
|
115
115
|
provider_overrides={
|
|
116
116
|
"inference": [inference_provider, embedding_provider],
|
|
117
117
|
"vector_io": [chromadb_provider],
|
|
@@ -109,6 +109,9 @@ storage:
|
|
|
109
109
|
conversations:
|
|
110
110
|
table_name: openai_conversations
|
|
111
111
|
backend: sql_default
|
|
112
|
+
prompts:
|
|
113
|
+
namespace: prompts
|
|
114
|
+
backend: kv_default
|
|
112
115
|
registered_resources:
|
|
113
116
|
models:
|
|
114
117
|
- metadata: {}
|
|
@@ -137,5 +140,3 @@ registered_resources:
|
|
|
137
140
|
provider_id: rag-runtime
|
|
138
141
|
server:
|
|
139
142
|
port: 8321
|
|
140
|
-
telemetry:
|
|
141
|
-
enabled: true
|
|
@@ -112,6 +112,9 @@ storage:
|
|
|
112
112
|
conversations:
|
|
113
113
|
table_name: openai_conversations
|
|
114
114
|
backend: sql_default
|
|
115
|
+
prompts:
|
|
116
|
+
namespace: prompts
|
|
117
|
+
backend: kv_default
|
|
115
118
|
registered_resources:
|
|
116
119
|
models:
|
|
117
120
|
- metadata: {}
|
|
@@ -135,5 +138,3 @@ registered_resources:
|
|
|
135
138
|
provider_id: rag-runtime
|
|
136
139
|
server:
|
|
137
140
|
port: 8321
|
|
138
|
-
telemetry:
|
|
139
|
-
enabled: true
|
|
@@ -6,7 +6,6 @@
|
|
|
6
6
|
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
|
|
9
|
-
from llama_stack.apis.models import ModelType
|
|
10
9
|
from llama_stack.core.datatypes import (
|
|
11
10
|
BuildProvider,
|
|
12
11
|
ModelInput,
|
|
@@ -22,6 +21,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
|
|
22
21
|
SentenceTransformersInferenceConfig,
|
|
23
22
|
)
|
|
24
23
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
|
24
|
+
from llama_stack_api import ModelType
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def get_distribution_template() -> DistributionTemplate:
|
|
@@ -105,7 +105,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|
|
105
105
|
template_path=Path(__file__).parent / "doc_template.md",
|
|
106
106
|
providers=providers,
|
|
107
107
|
run_configs={
|
|
108
|
-
"
|
|
108
|
+
"config.yaml": RunConfigSettings(
|
|
109
109
|
provider_overrides={
|
|
110
110
|
"inference": [inference_provider, embedding_provider],
|
|
111
111
|
"vector_io": [vector_io_provider],
|
|
@@ -122,6 +122,9 @@ storage:
|
|
|
122
122
|
conversations:
|
|
123
123
|
table_name: openai_conversations
|
|
124
124
|
backend: sql_default
|
|
125
|
+
prompts:
|
|
126
|
+
namespace: prompts
|
|
127
|
+
backend: kv_default
|
|
125
128
|
registered_resources:
|
|
126
129
|
models:
|
|
127
130
|
- metadata: {}
|
|
@@ -150,5 +153,3 @@ registered_resources:
|
|
|
150
153
|
provider_id: rag-runtime
|
|
151
154
|
server:
|
|
152
155
|
port: 8321
|
|
153
|
-
telemetry:
|
|
154
|
-
enabled: true
|
|
@@ -16,9 +16,8 @@ providers:
|
|
|
16
16
|
- provider_id: nvidia
|
|
17
17
|
provider_type: remote::nvidia
|
|
18
18
|
config:
|
|
19
|
-
|
|
19
|
+
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
|
20
20
|
api_key: ${env.NVIDIA_API_KEY:=}
|
|
21
|
-
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
|
22
21
|
vector_io:
|
|
23
22
|
- provider_id: faiss
|
|
24
23
|
provider_type: inline::faiss
|
|
@@ -100,6 +99,9 @@ storage:
|
|
|
100
99
|
conversations:
|
|
101
100
|
table_name: openai_conversations
|
|
102
101
|
backend: sql_default
|
|
102
|
+
prompts:
|
|
103
|
+
namespace: prompts
|
|
104
|
+
backend: kv_default
|
|
103
105
|
registered_resources:
|
|
104
106
|
models: []
|
|
105
107
|
shields: []
|
|
@@ -112,5 +114,3 @@ registered_resources:
|
|
|
112
114
|
provider_id: rag-runtime
|
|
113
115
|
server:
|
|
114
116
|
port: 8321
|
|
115
|
-
telemetry:
|
|
116
|
-
enabled: true
|
|
@@ -81,7 +81,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
|
|
|
81
81
|
template_path=Path(__file__).parent / "doc_template.md",
|
|
82
82
|
providers=providers,
|
|
83
83
|
run_configs={
|
|
84
|
-
"
|
|
84
|
+
"config.yaml": RunConfigSettings(
|
|
85
85
|
provider_overrides={
|
|
86
86
|
"inference": [inference_provider],
|
|
87
87
|
"datasetio": [datasetio_provider],
|
|
@@ -16,9 +16,8 @@ providers:
|
|
|
16
16
|
- provider_id: nvidia
|
|
17
17
|
provider_type: remote::nvidia
|
|
18
18
|
config:
|
|
19
|
-
|
|
19
|
+
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
|
20
20
|
api_key: ${env.NVIDIA_API_KEY:=}
|
|
21
|
-
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
|
22
21
|
- provider_id: nvidia
|
|
23
22
|
provider_type: remote::nvidia
|
|
24
23
|
config:
|
|
@@ -111,6 +110,9 @@ storage:
|
|
|
111
110
|
conversations:
|
|
112
111
|
table_name: openai_conversations
|
|
113
112
|
backend: sql_default
|
|
113
|
+
prompts:
|
|
114
|
+
namespace: prompts
|
|
115
|
+
backend: kv_default
|
|
114
116
|
registered_resources:
|
|
115
117
|
models:
|
|
116
118
|
- metadata: {}
|
|
@@ -133,5 +135,3 @@ registered_resources:
|
|
|
133
135
|
provider_id: rag-runtime
|
|
134
136
|
server:
|
|
135
137
|
port: 8321
|
|
136
|
-
telemetry:
|
|
137
|
-
enabled: true
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
image_name: oci
|
|
3
|
+
apis:
|
|
4
|
+
- agents
|
|
5
|
+
- datasetio
|
|
6
|
+
- eval
|
|
7
|
+
- files
|
|
8
|
+
- inference
|
|
9
|
+
- safety
|
|
10
|
+
- scoring
|
|
11
|
+
- tool_runtime
|
|
12
|
+
- vector_io
|
|
13
|
+
providers:
|
|
14
|
+
inference:
|
|
15
|
+
- provider_id: oci
|
|
16
|
+
provider_type: remote::oci
|
|
17
|
+
config:
|
|
18
|
+
oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal}
|
|
19
|
+
oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}
|
|
20
|
+
oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT}
|
|
21
|
+
oci_region: ${env.OCI_REGION:=us-ashburn-1}
|
|
22
|
+
oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=}
|
|
23
|
+
vector_io:
|
|
24
|
+
- provider_id: faiss
|
|
25
|
+
provider_type: inline::faiss
|
|
26
|
+
config:
|
|
27
|
+
persistence:
|
|
28
|
+
namespace: vector_io::faiss
|
|
29
|
+
backend: kv_default
|
|
30
|
+
safety:
|
|
31
|
+
- provider_id: llama-guard
|
|
32
|
+
provider_type: inline::llama-guard
|
|
33
|
+
config:
|
|
34
|
+
excluded_categories: []
|
|
35
|
+
agents:
|
|
36
|
+
- provider_id: meta-reference
|
|
37
|
+
provider_type: inline::meta-reference
|
|
38
|
+
config:
|
|
39
|
+
persistence:
|
|
40
|
+
agent_state:
|
|
41
|
+
namespace: agents
|
|
42
|
+
backend: kv_default
|
|
43
|
+
responses:
|
|
44
|
+
table_name: responses
|
|
45
|
+
backend: sql_default
|
|
46
|
+
max_write_queue_size: 10000
|
|
47
|
+
num_writers: 4
|
|
48
|
+
eval:
|
|
49
|
+
- provider_id: meta-reference
|
|
50
|
+
provider_type: inline::meta-reference
|
|
51
|
+
config:
|
|
52
|
+
kvstore:
|
|
53
|
+
namespace: eval
|
|
54
|
+
backend: kv_default
|
|
55
|
+
datasetio:
|
|
56
|
+
- provider_id: huggingface
|
|
57
|
+
provider_type: remote::huggingface
|
|
58
|
+
config:
|
|
59
|
+
kvstore:
|
|
60
|
+
namespace: datasetio::huggingface
|
|
61
|
+
backend: kv_default
|
|
62
|
+
- provider_id: localfs
|
|
63
|
+
provider_type: inline::localfs
|
|
64
|
+
config:
|
|
65
|
+
kvstore:
|
|
66
|
+
namespace: datasetio::localfs
|
|
67
|
+
backend: kv_default
|
|
68
|
+
scoring:
|
|
69
|
+
- provider_id: basic
|
|
70
|
+
provider_type: inline::basic
|
|
71
|
+
- provider_id: llm-as-judge
|
|
72
|
+
provider_type: inline::llm-as-judge
|
|
73
|
+
- provider_id: braintrust
|
|
74
|
+
provider_type: inline::braintrust
|
|
75
|
+
config:
|
|
76
|
+
openai_api_key: ${env.OPENAI_API_KEY:=}
|
|
77
|
+
tool_runtime:
|
|
78
|
+
- provider_id: brave-search
|
|
79
|
+
provider_type: remote::brave-search
|
|
80
|
+
config:
|
|
81
|
+
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
|
|
82
|
+
max_results: 3
|
|
83
|
+
- provider_id: tavily-search
|
|
84
|
+
provider_type: remote::tavily-search
|
|
85
|
+
config:
|
|
86
|
+
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
|
87
|
+
max_results: 3
|
|
88
|
+
- provider_id: rag-runtime
|
|
89
|
+
provider_type: inline::rag-runtime
|
|
90
|
+
- provider_id: model-context-protocol
|
|
91
|
+
provider_type: remote::model-context-protocol
|
|
92
|
+
files:
|
|
93
|
+
- provider_id: meta-reference-files
|
|
94
|
+
provider_type: inline::localfs
|
|
95
|
+
config:
|
|
96
|
+
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/oci/files}
|
|
97
|
+
metadata_store:
|
|
98
|
+
table_name: files_metadata
|
|
99
|
+
backend: sql_default
|
|
100
|
+
storage:
|
|
101
|
+
backends:
|
|
102
|
+
kv_default:
|
|
103
|
+
type: kv_sqlite
|
|
104
|
+
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/kvstore.db
|
|
105
|
+
sql_default:
|
|
106
|
+
type: sql_sqlite
|
|
107
|
+
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/sql_store.db
|
|
108
|
+
stores:
|
|
109
|
+
metadata:
|
|
110
|
+
namespace: registry
|
|
111
|
+
backend: kv_default
|
|
112
|
+
inference:
|
|
113
|
+
table_name: inference_store
|
|
114
|
+
backend: sql_default
|
|
115
|
+
max_write_queue_size: 10000
|
|
116
|
+
num_writers: 4
|
|
117
|
+
conversations:
|
|
118
|
+
table_name: openai_conversations
|
|
119
|
+
backend: sql_default
|
|
120
|
+
prompts:
|
|
121
|
+
namespace: prompts
|
|
122
|
+
backend: kv_default
|
|
123
|
+
registered_resources:
|
|
124
|
+
models: []
|
|
125
|
+
shields: []
|
|
126
|
+
vector_dbs: []
|
|
127
|
+
datasets: []
|
|
128
|
+
scoring_fns: []
|
|
129
|
+
benchmarks: []
|
|
130
|
+
tool_groups:
|
|
131
|
+
- toolgroup_id: builtin::websearch
|
|
132
|
+
provider_id: tavily-search
|
|
133
|
+
server:
|
|
134
|
+
port: 8321
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
|
|
10
|
+
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
|
|
11
|
+
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
|
12
|
+
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
|
13
|
+
from llama_stack.providers.remote.inference.oci.config import OCIConfig
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_distribution_template(name: str = "oci") -> DistributionTemplate:
|
|
17
|
+
providers = {
|
|
18
|
+
"inference": [BuildProvider(provider_type="remote::oci")],
|
|
19
|
+
"vector_io": [
|
|
20
|
+
BuildProvider(provider_type="inline::faiss"),
|
|
21
|
+
BuildProvider(provider_type="remote::chromadb"),
|
|
22
|
+
BuildProvider(provider_type="remote::pgvector"),
|
|
23
|
+
],
|
|
24
|
+
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
|
25
|
+
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
|
26
|
+
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
|
27
|
+
"datasetio": [
|
|
28
|
+
BuildProvider(provider_type="remote::huggingface"),
|
|
29
|
+
BuildProvider(provider_type="inline::localfs"),
|
|
30
|
+
],
|
|
31
|
+
"scoring": [
|
|
32
|
+
BuildProvider(provider_type="inline::basic"),
|
|
33
|
+
BuildProvider(provider_type="inline::llm-as-judge"),
|
|
34
|
+
BuildProvider(provider_type="inline::braintrust"),
|
|
35
|
+
],
|
|
36
|
+
"tool_runtime": [
|
|
37
|
+
BuildProvider(provider_type="remote::brave-search"),
|
|
38
|
+
BuildProvider(provider_type="remote::tavily-search"),
|
|
39
|
+
BuildProvider(provider_type="inline::rag-runtime"),
|
|
40
|
+
BuildProvider(provider_type="remote::model-context-protocol"),
|
|
41
|
+
],
|
|
42
|
+
"files": [BuildProvider(provider_type="inline::localfs")],
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
inference_provider = Provider(
|
|
46
|
+
provider_id="oci",
|
|
47
|
+
provider_type="remote::oci",
|
|
48
|
+
config=OCIConfig.sample_run_config(),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
vector_io_provider = Provider(
|
|
52
|
+
provider_id="faiss",
|
|
53
|
+
provider_type="inline::faiss",
|
|
54
|
+
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
files_provider = Provider(
|
|
58
|
+
provider_id="meta-reference-files",
|
|
59
|
+
provider_type="inline::localfs",
|
|
60
|
+
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
61
|
+
)
|
|
62
|
+
default_tool_groups = [
|
|
63
|
+
ToolGroupInput(
|
|
64
|
+
toolgroup_id="builtin::websearch",
|
|
65
|
+
provider_id="tavily-search",
|
|
66
|
+
),
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
return DistributionTemplate(
|
|
70
|
+
name=name,
|
|
71
|
+
distro_type="remote_hosted",
|
|
72
|
+
description="Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM inference with scalable cloud services",
|
|
73
|
+
container_image=None,
|
|
74
|
+
template_path=Path(__file__).parent / "doc_template.md",
|
|
75
|
+
providers=providers,
|
|
76
|
+
run_configs={
|
|
77
|
+
"config.yaml": RunConfigSettings(
|
|
78
|
+
provider_overrides={
|
|
79
|
+
"inference": [inference_provider],
|
|
80
|
+
"vector_io": [vector_io_provider],
|
|
81
|
+
"files": [files_provider],
|
|
82
|
+
},
|
|
83
|
+
default_tool_groups=default_tool_groups,
|
|
84
|
+
),
|
|
85
|
+
},
|
|
86
|
+
run_config_env_vars={
|
|
87
|
+
"OCI_AUTH_TYPE": (
|
|
88
|
+
"instance_principal",
|
|
89
|
+
"OCI authentication type (instance_principal or config_file)",
|
|
90
|
+
),
|
|
91
|
+
"OCI_REGION": (
|
|
92
|
+
"",
|
|
93
|
+
"OCI region (e.g., us-ashburn-1, us-chicago-1, us-phoenix-1, eu-frankfurt-1)",
|
|
94
|
+
),
|
|
95
|
+
"OCI_COMPARTMENT_OCID": (
|
|
96
|
+
"",
|
|
97
|
+
"OCI compartment ID for the Generative AI service",
|
|
98
|
+
),
|
|
99
|
+
"OCI_CONFIG_FILE_PATH": (
|
|
100
|
+
"~/.oci/config",
|
|
101
|
+
"OCI config file path (required if OCI_AUTH_TYPE is config_file)",
|
|
102
|
+
),
|
|
103
|
+
"OCI_CLI_PROFILE": (
|
|
104
|
+
"DEFAULT",
|
|
105
|
+
"OCI CLI profile name to use from config file",
|
|
106
|
+
),
|
|
107
|
+
},
|
|
108
|
+
)
|