llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/__init__.py +0 -5
- llama_stack/cli/llama.py +3 -3
- llama_stack/cli/stack/_list_deps.py +12 -23
- llama_stack/cli/stack/list_stacks.py +37 -18
- llama_stack/cli/stack/run.py +121 -11
- llama_stack/cli/stack/utils.py +0 -127
- llama_stack/core/access_control/access_control.py +69 -28
- llama_stack/core/access_control/conditions.py +15 -5
- llama_stack/core/admin.py +267 -0
- llama_stack/core/build.py +6 -74
- llama_stack/core/client.py +1 -1
- llama_stack/core/configure.py +6 -6
- llama_stack/core/conversations/conversations.py +28 -25
- llama_stack/core/datatypes.py +271 -79
- llama_stack/core/distribution.py +15 -16
- llama_stack/core/external.py +3 -3
- llama_stack/core/inspect.py +98 -15
- llama_stack/core/library_client.py +73 -61
- llama_stack/core/prompts/prompts.py +12 -11
- llama_stack/core/providers.py +17 -11
- llama_stack/core/resolver.py +65 -56
- llama_stack/core/routers/__init__.py +8 -12
- llama_stack/core/routers/datasets.py +1 -4
- llama_stack/core/routers/eval_scoring.py +7 -4
- llama_stack/core/routers/inference.py +55 -271
- llama_stack/core/routers/safety.py +52 -24
- llama_stack/core/routers/tool_runtime.py +6 -48
- llama_stack/core/routers/vector_io.py +130 -51
- llama_stack/core/routing_tables/benchmarks.py +24 -20
- llama_stack/core/routing_tables/common.py +1 -4
- llama_stack/core/routing_tables/datasets.py +22 -22
- llama_stack/core/routing_tables/models.py +119 -6
- llama_stack/core/routing_tables/scoring_functions.py +7 -7
- llama_stack/core/routing_tables/shields.py +1 -2
- llama_stack/core/routing_tables/toolgroups.py +17 -7
- llama_stack/core/routing_tables/vector_stores.py +51 -16
- llama_stack/core/server/auth.py +5 -3
- llama_stack/core/server/auth_providers.py +36 -20
- llama_stack/core/server/fastapi_router_registry.py +84 -0
- llama_stack/core/server/quota.py +2 -2
- llama_stack/core/server/routes.py +79 -27
- llama_stack/core/server/server.py +102 -87
- llama_stack/core/stack.py +235 -62
- llama_stack/core/storage/datatypes.py +26 -3
- llama_stack/{providers/utils → core/storage}/kvstore/__init__.py +2 -0
- llama_stack/{providers/utils → core/storage}/kvstore/kvstore.py +55 -24
- llama_stack/{providers/utils → core/storage}/kvstore/mongodb/mongodb.py +13 -10
- llama_stack/{providers/utils → core/storage}/kvstore/postgres/postgres.py +28 -17
- llama_stack/{providers/utils → core/storage}/kvstore/redis/redis.py +41 -16
- llama_stack/{providers/utils → core/storage}/kvstore/sqlite/sqlite.py +1 -1
- llama_stack/core/storage/sqlstore/__init__.py +17 -0
- llama_stack/{providers/utils → core/storage}/sqlstore/authorized_sqlstore.py +69 -49
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlalchemy_sqlstore.py +47 -17
- llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py +25 -8
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/config.py +8 -2
- llama_stack/core/utils/config_resolution.py +32 -29
- llama_stack/core/utils/context.py +4 -10
- llama_stack/core/utils/exec.py +9 -0
- llama_stack/core/utils/type_inspection.py +45 -0
- llama_stack/distributions/dell/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/dell/dell.py +2 -2
- llama_stack/distributions/dell/run-with-safety.yaml +3 -2
- llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} +3 -2
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +2 -2
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +3 -2
- llama_stack/distributions/nvidia/{run.yaml → config.yaml} +4 -4
- llama_stack/distributions/nvidia/nvidia.py +1 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -4
- llama_stack/{apis/datasetio → distributions/oci}/__init__.py +1 -1
- llama_stack/distributions/oci/config.yaml +134 -0
- llama_stack/distributions/oci/oci.py +108 -0
- llama_stack/distributions/open-benchmark/{run.yaml → config.yaml} +5 -4
- llama_stack/distributions/open-benchmark/open_benchmark.py +2 -3
- llama_stack/distributions/postgres-demo/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/starter/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/starter/starter.py +8 -5
- llama_stack/distributions/starter-gpu/{run.yaml → config.yaml} +64 -13
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +64 -13
- llama_stack/distributions/template.py +13 -69
- llama_stack/distributions/watsonx/{run.yaml → config.yaml} +4 -3
- llama_stack/distributions/watsonx/watsonx.py +1 -1
- llama_stack/log.py +28 -11
- llama_stack/models/llama/checkpoint.py +6 -6
- llama_stack/models/llama/hadamard_utils.py +2 -0
- llama_stack/models/llama/llama3/generation.py +3 -1
- llama_stack/models/llama/llama3/interface.py +2 -5
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +3 -3
- llama_stack/models/llama/llama3/multimodal/image_transform.py +6 -6
- llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +1 -1
- llama_stack/models/llama/llama3/tool_utils.py +2 -1
- llama_stack/models/llama/llama4/prompt_templates/system_prompts.py +1 -1
- llama_stack/providers/inline/agents/meta_reference/__init__.py +3 -3
- llama_stack/providers/inline/agents/meta_reference/agents.py +44 -261
- llama_stack/providers/inline/agents/meta_reference/config.py +6 -1
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +207 -57
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +308 -47
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +162 -96
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +23 -8
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +201 -33
- llama_stack/providers/inline/agents/meta_reference/safety.py +8 -13
- llama_stack/providers/inline/batches/reference/__init__.py +2 -4
- llama_stack/providers/inline/batches/reference/batches.py +78 -60
- llama_stack/providers/inline/datasetio/localfs/datasetio.py +2 -5
- llama_stack/providers/inline/eval/meta_reference/eval.py +16 -61
- llama_stack/providers/inline/files/localfs/files.py +37 -28
- llama_stack/providers/inline/inference/meta_reference/config.py +2 -2
- llama_stack/providers/inline/inference/meta_reference/generators.py +50 -60
- llama_stack/providers/inline/inference/meta_reference/inference.py +403 -19
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +7 -26
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +2 -12
- llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +10 -15
- llama_stack/providers/inline/post_training/common/validator.py +1 -5
- llama_stack/providers/inline/post_training/huggingface/post_training.py +8 -8
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +18 -10
- llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +12 -9
- llama_stack/providers/inline/post_training/huggingface/utils.py +27 -6
- llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +1 -1
- llama_stack/providers/inline/post_training/torchtune/post_training.py +8 -8
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +16 -16
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +13 -9
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +18 -15
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +9 -9
- llama_stack/providers/inline/scoring/basic/scoring.py +6 -13
- llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +2 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +1 -2
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +12 -15
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +2 -2
- llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +7 -14
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +2 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +1 -2
- llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +1 -3
- llama_stack/providers/inline/tool_runtime/rag/__init__.py +1 -1
- llama_stack/providers/inline/tool_runtime/rag/config.py +8 -1
- llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +7 -6
- llama_stack/providers/inline/tool_runtime/rag/memory.py +64 -48
- llama_stack/providers/inline/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/chroma/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/config.py +1 -1
- llama_stack/providers/inline/vector_io/faiss/faiss.py +46 -28
- llama_stack/providers/inline/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/milvus/config.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +1 -1
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +44 -33
- llama_stack/providers/registry/agents.py +8 -3
- llama_stack/providers/registry/batches.py +1 -1
- llama_stack/providers/registry/datasetio.py +1 -1
- llama_stack/providers/registry/eval.py +1 -1
- llama_stack/{apis/datasets/__init__.py → providers/registry/file_processors.py} +5 -1
- llama_stack/providers/registry/files.py +11 -2
- llama_stack/providers/registry/inference.py +22 -3
- llama_stack/providers/registry/post_training.py +1 -1
- llama_stack/providers/registry/safety.py +1 -1
- llama_stack/providers/registry/scoring.py +1 -1
- llama_stack/providers/registry/tool_runtime.py +2 -2
- llama_stack/providers/registry/vector_io.py +7 -7
- llama_stack/providers/remote/datasetio/huggingface/huggingface.py +2 -5
- llama_stack/providers/remote/datasetio/nvidia/datasetio.py +1 -4
- llama_stack/providers/remote/eval/nvidia/eval.py +15 -9
- llama_stack/providers/remote/files/openai/__init__.py +19 -0
- llama_stack/providers/remote/files/openai/config.py +28 -0
- llama_stack/providers/remote/files/openai/files.py +253 -0
- llama_stack/providers/remote/files/s3/files.py +52 -30
- llama_stack/providers/remote/inference/anthropic/anthropic.py +2 -1
- llama_stack/providers/remote/inference/anthropic/config.py +1 -1
- llama_stack/providers/remote/inference/azure/azure.py +1 -3
- llama_stack/providers/remote/inference/azure/config.py +8 -7
- llama_stack/providers/remote/inference/bedrock/__init__.py +1 -1
- llama_stack/providers/remote/inference/bedrock/bedrock.py +82 -105
- llama_stack/providers/remote/inference/bedrock/config.py +24 -3
- llama_stack/providers/remote/inference/cerebras/cerebras.py +5 -5
- llama_stack/providers/remote/inference/cerebras/config.py +12 -5
- llama_stack/providers/remote/inference/databricks/config.py +13 -6
- llama_stack/providers/remote/inference/databricks/databricks.py +16 -6
- llama_stack/providers/remote/inference/fireworks/config.py +5 -5
- llama_stack/providers/remote/inference/fireworks/fireworks.py +1 -1
- llama_stack/providers/remote/inference/gemini/config.py +1 -1
- llama_stack/providers/remote/inference/gemini/gemini.py +13 -14
- llama_stack/providers/remote/inference/groq/config.py +5 -5
- llama_stack/providers/remote/inference/groq/groq.py +1 -1
- llama_stack/providers/remote/inference/llama_openai_compat/config.py +5 -5
- llama_stack/providers/remote/inference/llama_openai_compat/llama.py +8 -6
- llama_stack/providers/remote/inference/nvidia/__init__.py +1 -1
- llama_stack/providers/remote/inference/nvidia/config.py +21 -11
- llama_stack/providers/remote/inference/nvidia/nvidia.py +115 -3
- llama_stack/providers/remote/inference/nvidia/utils.py +1 -1
- llama_stack/providers/remote/inference/oci/__init__.py +17 -0
- llama_stack/providers/remote/inference/oci/auth.py +79 -0
- llama_stack/providers/remote/inference/oci/config.py +75 -0
- llama_stack/providers/remote/inference/oci/oci.py +162 -0
- llama_stack/providers/remote/inference/ollama/config.py +7 -5
- llama_stack/providers/remote/inference/ollama/ollama.py +17 -8
- llama_stack/providers/remote/inference/openai/config.py +4 -4
- llama_stack/providers/remote/inference/openai/openai.py +1 -1
- llama_stack/providers/remote/inference/passthrough/__init__.py +2 -2
- llama_stack/providers/remote/inference/passthrough/config.py +5 -10
- llama_stack/providers/remote/inference/passthrough/passthrough.py +97 -75
- llama_stack/providers/remote/inference/runpod/config.py +12 -5
- llama_stack/providers/remote/inference/runpod/runpod.py +2 -20
- llama_stack/providers/remote/inference/sambanova/config.py +5 -5
- llama_stack/providers/remote/inference/sambanova/sambanova.py +1 -1
- llama_stack/providers/remote/inference/tgi/config.py +7 -6
- llama_stack/providers/remote/inference/tgi/tgi.py +19 -11
- llama_stack/providers/remote/inference/together/config.py +5 -5
- llama_stack/providers/remote/inference/together/together.py +15 -12
- llama_stack/providers/remote/inference/vertexai/config.py +1 -1
- llama_stack/providers/remote/inference/vllm/config.py +5 -5
- llama_stack/providers/remote/inference/vllm/vllm.py +13 -14
- llama_stack/providers/remote/inference/watsonx/config.py +4 -4
- llama_stack/providers/remote/inference/watsonx/watsonx.py +21 -94
- llama_stack/providers/remote/post_training/nvidia/post_training.py +4 -4
- llama_stack/providers/remote/post_training/nvidia/utils.py +1 -1
- llama_stack/providers/remote/safety/bedrock/bedrock.py +6 -6
- llama_stack/providers/remote/safety/bedrock/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/config.py +1 -1
- llama_stack/providers/remote/safety/nvidia/nvidia.py +11 -5
- llama_stack/providers/remote/safety/sambanova/config.py +1 -1
- llama_stack/providers/remote/safety/sambanova/sambanova.py +6 -6
- llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +12 -7
- llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py +8 -2
- llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +57 -15
- llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +11 -6
- llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +11 -6
- llama_stack/providers/remote/vector_io/chroma/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/chroma/chroma.py +131 -23
- llama_stack/providers/remote/vector_io/chroma/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/config.py +1 -1
- llama_stack/providers/remote/vector_io/milvus/milvus.py +37 -28
- llama_stack/providers/remote/vector_io/pgvector/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/config.py +1 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +37 -25
- llama_stack/providers/remote/vector_io/qdrant/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/config.py +1 -1
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +147 -30
- llama_stack/providers/remote/vector_io/weaviate/__init__.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/config.py +1 -1
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +31 -26
- llama_stack/providers/utils/common/data_schema_validator.py +1 -5
- llama_stack/providers/utils/files/form_data.py +1 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -1
- llama_stack/providers/utils/inference/inference_store.py +7 -8
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +79 -79
- llama_stack/providers/utils/inference/model_registry.py +1 -3
- llama_stack/providers/utils/inference/openai_compat.py +44 -1171
- llama_stack/providers/utils/inference/openai_mixin.py +68 -42
- llama_stack/providers/utils/inference/prompt_adapter.py +50 -265
- llama_stack/providers/utils/inference/stream_utils.py +23 -0
- llama_stack/providers/utils/memory/__init__.py +2 -0
- llama_stack/providers/utils/memory/file_utils.py +1 -1
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +181 -84
- llama_stack/providers/utils/memory/vector_store.py +39 -38
- llama_stack/providers/utils/pagination.py +1 -1
- llama_stack/providers/utils/responses/responses_store.py +15 -25
- llama_stack/providers/utils/scoring/aggregation_utils.py +1 -2
- llama_stack/providers/utils/scoring/base_scoring_fn.py +1 -2
- llama_stack/providers/utils/tools/mcp.py +93 -11
- llama_stack/providers/utils/vector_io/__init__.py +16 -0
- llama_stack/providers/utils/vector_io/vector_utils.py +36 -0
- llama_stack/telemetry/constants.py +27 -0
- llama_stack/telemetry/helpers.py +43 -0
- llama_stack/testing/api_recorder.py +25 -16
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/METADATA +57 -55
- llama_stack-0.4.1.dist-info/RECORD +588 -0
- llama_stack-0.4.1.dist-info/top_level.txt +2 -0
- llama_stack_api/__init__.py +945 -0
- llama_stack_api/admin/__init__.py +45 -0
- llama_stack_api/admin/api.py +72 -0
- llama_stack_api/admin/fastapi_routes.py +117 -0
- llama_stack_api/admin/models.py +113 -0
- llama_stack_api/agents.py +173 -0
- llama_stack_api/batches/__init__.py +40 -0
- llama_stack_api/batches/api.py +53 -0
- llama_stack_api/batches/fastapi_routes.py +113 -0
- llama_stack_api/batches/models.py +78 -0
- llama_stack_api/benchmarks/__init__.py +43 -0
- llama_stack_api/benchmarks/api.py +39 -0
- llama_stack_api/benchmarks/fastapi_routes.py +109 -0
- llama_stack_api/benchmarks/models.py +109 -0
- {llama_stack/apis → llama_stack_api}/common/content_types.py +1 -43
- {llama_stack/apis → llama_stack_api}/common/errors.py +0 -8
- {llama_stack/apis → llama_stack_api}/common/job_types.py +1 -1
- llama_stack_api/common/responses.py +77 -0
- {llama_stack/apis → llama_stack_api}/common/training_types.py +1 -1
- {llama_stack/apis → llama_stack_api}/common/type_system.py +2 -14
- llama_stack_api/connectors.py +146 -0
- {llama_stack/apis/conversations → llama_stack_api}/conversations.py +23 -39
- {llama_stack/apis/datasetio → llama_stack_api}/datasetio.py +4 -8
- llama_stack_api/datasets/__init__.py +61 -0
- llama_stack_api/datasets/api.py +35 -0
- llama_stack_api/datasets/fastapi_routes.py +104 -0
- llama_stack_api/datasets/models.py +152 -0
- {llama_stack/providers → llama_stack_api}/datatypes.py +166 -10
- {llama_stack/apis/eval → llama_stack_api}/eval.py +8 -40
- llama_stack_api/file_processors/__init__.py +27 -0
- llama_stack_api/file_processors/api.py +64 -0
- llama_stack_api/file_processors/fastapi_routes.py +78 -0
- llama_stack_api/file_processors/models.py +42 -0
- llama_stack_api/files/__init__.py +35 -0
- llama_stack_api/files/api.py +51 -0
- llama_stack_api/files/fastapi_routes.py +124 -0
- llama_stack_api/files/models.py +107 -0
- {llama_stack/apis/inference → llama_stack_api}/inference.py +90 -194
- llama_stack_api/inspect_api/__init__.py +37 -0
- llama_stack_api/inspect_api/api.py +25 -0
- llama_stack_api/inspect_api/fastapi_routes.py +76 -0
- llama_stack_api/inspect_api/models.py +28 -0
- {llama_stack/apis/agents → llama_stack_api/internal}/__init__.py +3 -1
- llama_stack/providers/utils/kvstore/api.py → llama_stack_api/internal/kvstore.py +5 -0
- llama_stack_api/internal/sqlstore.py +79 -0
- {llama_stack/apis/models → llama_stack_api}/models.py +11 -9
- {llama_stack/apis/agents → llama_stack_api}/openai_responses.py +184 -27
- {llama_stack/apis/post_training → llama_stack_api}/post_training.py +7 -11
- {llama_stack/apis/prompts → llama_stack_api}/prompts.py +3 -4
- llama_stack_api/providers/__init__.py +33 -0
- llama_stack_api/providers/api.py +16 -0
- llama_stack_api/providers/fastapi_routes.py +57 -0
- llama_stack_api/providers/models.py +24 -0
- {llama_stack/apis/tools → llama_stack_api}/rag_tool.py +2 -52
- {llama_stack/apis → llama_stack_api}/resource.py +1 -1
- llama_stack_api/router_utils.py +160 -0
- {llama_stack/apis/safety → llama_stack_api}/safety.py +6 -9
- {llama_stack → llama_stack_api}/schema_utils.py +94 -4
- {llama_stack/apis/scoring → llama_stack_api}/scoring.py +3 -3
- {llama_stack/apis/scoring_functions → llama_stack_api}/scoring_functions.py +9 -6
- {llama_stack/apis/shields → llama_stack_api}/shields.py +6 -7
- {llama_stack/apis/tools → llama_stack_api}/tools.py +26 -21
- {llama_stack/apis/vector_io → llama_stack_api}/vector_io.py +133 -152
- {llama_stack/apis/vector_stores → llama_stack_api}/vector_stores.py +1 -1
- llama_stack/apis/agents/agents.py +0 -894
- llama_stack/apis/batches/__init__.py +0 -9
- llama_stack/apis/batches/batches.py +0 -100
- llama_stack/apis/benchmarks/__init__.py +0 -7
- llama_stack/apis/benchmarks/benchmarks.py +0 -108
- llama_stack/apis/common/responses.py +0 -36
- llama_stack/apis/conversations/__init__.py +0 -31
- llama_stack/apis/datasets/datasets.py +0 -251
- llama_stack/apis/datatypes.py +0 -160
- llama_stack/apis/eval/__init__.py +0 -7
- llama_stack/apis/files/__init__.py +0 -7
- llama_stack/apis/files/files.py +0 -199
- llama_stack/apis/inference/__init__.py +0 -7
- llama_stack/apis/inference/event_logger.py +0 -43
- llama_stack/apis/inspect/__init__.py +0 -7
- llama_stack/apis/inspect/inspect.py +0 -94
- llama_stack/apis/models/__init__.py +0 -7
- llama_stack/apis/post_training/__init__.py +0 -7
- llama_stack/apis/prompts/__init__.py +0 -9
- llama_stack/apis/providers/__init__.py +0 -7
- llama_stack/apis/providers/providers.py +0 -69
- llama_stack/apis/safety/__init__.py +0 -7
- llama_stack/apis/scoring/__init__.py +0 -7
- llama_stack/apis/scoring_functions/__init__.py +0 -7
- llama_stack/apis/shields/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/__init__.py +0 -7
- llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +0 -77
- llama_stack/apis/telemetry/__init__.py +0 -7
- llama_stack/apis/telemetry/telemetry.py +0 -423
- llama_stack/apis/tools/__init__.py +0 -8
- llama_stack/apis/vector_io/__init__.py +0 -7
- llama_stack/apis/vector_stores/__init__.py +0 -7
- llama_stack/core/server/tracing.py +0 -80
- llama_stack/core/ui/app.py +0 -55
- llama_stack/core/ui/modules/__init__.py +0 -5
- llama_stack/core/ui/modules/api.py +0 -32
- llama_stack/core/ui/modules/utils.py +0 -42
- llama_stack/core/ui/page/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/__init__.py +0 -5
- llama_stack/core/ui/page/distribution/datasets.py +0 -18
- llama_stack/core/ui/page/distribution/eval_tasks.py +0 -20
- llama_stack/core/ui/page/distribution/models.py +0 -18
- llama_stack/core/ui/page/distribution/providers.py +0 -27
- llama_stack/core/ui/page/distribution/resources.py +0 -48
- llama_stack/core/ui/page/distribution/scoring_functions.py +0 -18
- llama_stack/core/ui/page/distribution/shields.py +0 -19
- llama_stack/core/ui/page/evaluations/__init__.py +0 -5
- llama_stack/core/ui/page/evaluations/app_eval.py +0 -143
- llama_stack/core/ui/page/evaluations/native_eval.py +0 -253
- llama_stack/core/ui/page/playground/__init__.py +0 -5
- llama_stack/core/ui/page/playground/chat.py +0 -130
- llama_stack/core/ui/page/playground/tools.py +0 -352
- llama_stack/distributions/dell/build.yaml +0 -33
- llama_stack/distributions/meta-reference-gpu/build.yaml +0 -32
- llama_stack/distributions/nvidia/build.yaml +0 -29
- llama_stack/distributions/open-benchmark/build.yaml +0 -36
- llama_stack/distributions/postgres-demo/__init__.py +0 -7
- llama_stack/distributions/postgres-demo/build.yaml +0 -23
- llama_stack/distributions/postgres-demo/postgres_demo.py +0 -125
- llama_stack/distributions/starter/build.yaml +0 -61
- llama_stack/distributions/starter-gpu/build.yaml +0 -61
- llama_stack/distributions/watsonx/build.yaml +0 -33
- llama_stack/providers/inline/agents/meta_reference/agent_instance.py +0 -1024
- llama_stack/providers/inline/agents/meta_reference/persistence.py +0 -228
- llama_stack/providers/inline/telemetry/__init__.py +0 -5
- llama_stack/providers/inline/telemetry/meta_reference/__init__.py +0 -21
- llama_stack/providers/inline/telemetry/meta_reference/config.py +0 -47
- llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +0 -252
- llama_stack/providers/remote/inference/bedrock/models.py +0 -29
- llama_stack/providers/utils/kvstore/sqlite/config.py +0 -20
- llama_stack/providers/utils/sqlstore/__init__.py +0 -5
- llama_stack/providers/utils/sqlstore/api.py +0 -128
- llama_stack/providers/utils/telemetry/__init__.py +0 -5
- llama_stack/providers/utils/telemetry/trace_protocol.py +0 -142
- llama_stack/providers/utils/telemetry/tracing.py +0 -384
- llama_stack/strong_typing/__init__.py +0 -19
- llama_stack/strong_typing/auxiliary.py +0 -228
- llama_stack/strong_typing/classdef.py +0 -440
- llama_stack/strong_typing/core.py +0 -46
- llama_stack/strong_typing/deserializer.py +0 -877
- llama_stack/strong_typing/docstring.py +0 -409
- llama_stack/strong_typing/exception.py +0 -23
- llama_stack/strong_typing/inspection.py +0 -1085
- llama_stack/strong_typing/mapping.py +0 -40
- llama_stack/strong_typing/name.py +0 -182
- llama_stack/strong_typing/schema.py +0 -792
- llama_stack/strong_typing/serialization.py +0 -97
- llama_stack/strong_typing/serializer.py +0 -500
- llama_stack/strong_typing/slots.py +0 -27
- llama_stack/strong_typing/topological.py +0 -89
- llama_stack/ui/node_modules/flatted/python/flatted.py +0 -149
- llama_stack-0.3.5.dist-info/RECORD +0 -625
- llama_stack-0.3.5.dist-info/top_level.txt +0 -1
- /llama_stack/{providers/utils → core/storage}/kvstore/config.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/mongodb/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/postgres/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/redis/__init__.py +0 -0
- /llama_stack/{providers/utils → core/storage}/kvstore/sqlite/__init__.py +0 -0
- /llama_stack/{apis → providers/inline/file_processor}/__init__.py +0 -0
- /llama_stack/{apis/common → telemetry}/__init__.py +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.5.dist-info → llama_stack-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack/core/ui → llama_stack_api/common}/__init__.py +0 -0
- {llama_stack/strong_typing → llama_stack_api}/py.typed +0 -0
- {llama_stack/apis → llama_stack_api}/version.py +0 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
|
|
10
|
+
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
|
|
11
|
+
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
|
12
|
+
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
|
13
|
+
from llama_stack.providers.remote.inference.oci.config import OCIConfig
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_distribution_template(name: str = "oci") -> DistributionTemplate:
|
|
17
|
+
providers = {
|
|
18
|
+
"inference": [BuildProvider(provider_type="remote::oci")],
|
|
19
|
+
"vector_io": [
|
|
20
|
+
BuildProvider(provider_type="inline::faiss"),
|
|
21
|
+
BuildProvider(provider_type="remote::chromadb"),
|
|
22
|
+
BuildProvider(provider_type="remote::pgvector"),
|
|
23
|
+
],
|
|
24
|
+
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
|
25
|
+
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
|
26
|
+
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
|
27
|
+
"datasetio": [
|
|
28
|
+
BuildProvider(provider_type="remote::huggingface"),
|
|
29
|
+
BuildProvider(provider_type="inline::localfs"),
|
|
30
|
+
],
|
|
31
|
+
"scoring": [
|
|
32
|
+
BuildProvider(provider_type="inline::basic"),
|
|
33
|
+
BuildProvider(provider_type="inline::llm-as-judge"),
|
|
34
|
+
BuildProvider(provider_type="inline::braintrust"),
|
|
35
|
+
],
|
|
36
|
+
"tool_runtime": [
|
|
37
|
+
BuildProvider(provider_type="remote::brave-search"),
|
|
38
|
+
BuildProvider(provider_type="remote::tavily-search"),
|
|
39
|
+
BuildProvider(provider_type="inline::rag-runtime"),
|
|
40
|
+
BuildProvider(provider_type="remote::model-context-protocol"),
|
|
41
|
+
],
|
|
42
|
+
"files": [BuildProvider(provider_type="inline::localfs")],
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
inference_provider = Provider(
|
|
46
|
+
provider_id="oci",
|
|
47
|
+
provider_type="remote::oci",
|
|
48
|
+
config=OCIConfig.sample_run_config(),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
vector_io_provider = Provider(
|
|
52
|
+
provider_id="faiss",
|
|
53
|
+
provider_type="inline::faiss",
|
|
54
|
+
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
files_provider = Provider(
|
|
58
|
+
provider_id="meta-reference-files",
|
|
59
|
+
provider_type="inline::localfs",
|
|
60
|
+
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
61
|
+
)
|
|
62
|
+
default_tool_groups = [
|
|
63
|
+
ToolGroupInput(
|
|
64
|
+
toolgroup_id="builtin::websearch",
|
|
65
|
+
provider_id="tavily-search",
|
|
66
|
+
),
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
return DistributionTemplate(
|
|
70
|
+
name=name,
|
|
71
|
+
distro_type="remote_hosted",
|
|
72
|
+
description="Use Oracle Cloud Infrastructure (OCI) Generative AI for running LLM inference with scalable cloud services",
|
|
73
|
+
container_image=None,
|
|
74
|
+
template_path=Path(__file__).parent / "doc_template.md",
|
|
75
|
+
providers=providers,
|
|
76
|
+
run_configs={
|
|
77
|
+
"config.yaml": RunConfigSettings(
|
|
78
|
+
provider_overrides={
|
|
79
|
+
"inference": [inference_provider],
|
|
80
|
+
"vector_io": [vector_io_provider],
|
|
81
|
+
"files": [files_provider],
|
|
82
|
+
},
|
|
83
|
+
default_tool_groups=default_tool_groups,
|
|
84
|
+
),
|
|
85
|
+
},
|
|
86
|
+
run_config_env_vars={
|
|
87
|
+
"OCI_AUTH_TYPE": (
|
|
88
|
+
"instance_principal",
|
|
89
|
+
"OCI authentication type (instance_principal or config_file)",
|
|
90
|
+
),
|
|
91
|
+
"OCI_REGION": (
|
|
92
|
+
"",
|
|
93
|
+
"OCI region (e.g., us-ashburn-1, us-chicago-1, us-phoenix-1, eu-frankfurt-1)",
|
|
94
|
+
),
|
|
95
|
+
"OCI_COMPARTMENT_OCID": (
|
|
96
|
+
"",
|
|
97
|
+
"OCI compartment ID for the Generative AI service",
|
|
98
|
+
),
|
|
99
|
+
"OCI_CONFIG_FILE_PATH": (
|
|
100
|
+
"~/.oci/config",
|
|
101
|
+
"OCI config file path (required if OCI_AUTH_TYPE is config_file)",
|
|
102
|
+
),
|
|
103
|
+
"OCI_CLI_PROFILE": (
|
|
104
|
+
"DEFAULT",
|
|
105
|
+
"OCI CLI profile name to use from config file",
|
|
106
|
+
),
|
|
107
|
+
},
|
|
108
|
+
)
|
|
@@ -27,12 +27,12 @@ providers:
|
|
|
27
27
|
- provider_id: groq
|
|
28
28
|
provider_type: remote::groq
|
|
29
29
|
config:
|
|
30
|
-
|
|
30
|
+
base_url: https://api.groq.com/openai/v1
|
|
31
31
|
api_key: ${env.GROQ_API_KEY:=}
|
|
32
32
|
- provider_id: together
|
|
33
33
|
provider_type: remote::together
|
|
34
34
|
config:
|
|
35
|
-
|
|
35
|
+
base_url: https://api.together.xyz/v1
|
|
36
36
|
api_key: ${env.TOGETHER_API_KEY:=}
|
|
37
37
|
vector_io:
|
|
38
38
|
- provider_id: sqlite-vec
|
|
@@ -142,6 +142,9 @@ storage:
|
|
|
142
142
|
conversations:
|
|
143
143
|
table_name: openai_conversations
|
|
144
144
|
backend: sql_default
|
|
145
|
+
prompts:
|
|
146
|
+
namespace: prompts
|
|
147
|
+
backend: kv_default
|
|
145
148
|
registered_resources:
|
|
146
149
|
models:
|
|
147
150
|
- metadata: {}
|
|
@@ -248,5 +251,3 @@ registered_resources:
|
|
|
248
251
|
provider_id: rag-runtime
|
|
249
252
|
server:
|
|
250
253
|
port: 8321
|
|
251
|
-
telemetry:
|
|
252
|
-
enabled: true
|
|
@@ -5,8 +5,6 @@
|
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
|
|
9
|
-
from llama_stack.apis.models import ModelType
|
|
10
8
|
from llama_stack.core.datatypes import (
|
|
11
9
|
BenchmarkInput,
|
|
12
10
|
BuildProvider,
|
|
@@ -34,6 +32,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
|
|
|
34
32
|
PGVectorVectorIOConfig,
|
|
35
33
|
)
|
|
36
34
|
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
|
35
|
+
from llama_stack_api import DatasetPurpose, ModelType, URIDataSource
|
|
37
36
|
|
|
38
37
|
|
|
39
38
|
def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
|
|
@@ -262,7 +261,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|
|
262
261
|
providers=providers,
|
|
263
262
|
available_models_by_provider=available_models,
|
|
264
263
|
run_configs={
|
|
265
|
-
"
|
|
264
|
+
"config.yaml": RunConfigSettings(
|
|
266
265
|
provider_overrides={
|
|
267
266
|
"inference": inference_providers,
|
|
268
267
|
"vector_io": vector_io_providers,
|
|
@@ -11,7 +11,7 @@ providers:
|
|
|
11
11
|
- provider_id: vllm-inference
|
|
12
12
|
provider_type: remote::vllm
|
|
13
13
|
config:
|
|
14
|
-
|
|
14
|
+
base_url: ${env.VLLM_URL:=}
|
|
15
15
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
16
16
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
17
17
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
@@ -87,6 +87,9 @@ storage:
|
|
|
87
87
|
conversations:
|
|
88
88
|
table_name: openai_conversations
|
|
89
89
|
backend: sql_default
|
|
90
|
+
prompts:
|
|
91
|
+
namespace: prompts
|
|
92
|
+
backend: kv_default
|
|
90
93
|
registered_resources:
|
|
91
94
|
models:
|
|
92
95
|
- metadata: {}
|
|
@@ -111,5 +114,3 @@ registered_resources:
|
|
|
111
114
|
provider_id: rag-runtime
|
|
112
115
|
server:
|
|
113
116
|
port: 8321
|
|
114
|
-
telemetry:
|
|
115
|
-
enabled: true
|
|
@@ -17,41 +17,43 @@ providers:
|
|
|
17
17
|
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
|
18
18
|
provider_type: remote::cerebras
|
|
19
19
|
config:
|
|
20
|
-
base_url: https://api.cerebras.ai
|
|
20
|
+
base_url: https://api.cerebras.ai/v1
|
|
21
21
|
api_key: ${env.CEREBRAS_API_KEY:=}
|
|
22
22
|
- provider_id: ${env.OLLAMA_URL:+ollama}
|
|
23
23
|
provider_type: remote::ollama
|
|
24
24
|
config:
|
|
25
|
-
|
|
25
|
+
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
|
26
26
|
- provider_id: ${env.VLLM_URL:+vllm}
|
|
27
27
|
provider_type: remote::vllm
|
|
28
28
|
config:
|
|
29
|
-
|
|
29
|
+
base_url: ${env.VLLM_URL:=}
|
|
30
30
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
31
31
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
32
32
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
33
33
|
- provider_id: ${env.TGI_URL:+tgi}
|
|
34
34
|
provider_type: remote::tgi
|
|
35
35
|
config:
|
|
36
|
-
|
|
36
|
+
base_url: ${env.TGI_URL:=}
|
|
37
37
|
- provider_id: fireworks
|
|
38
38
|
provider_type: remote::fireworks
|
|
39
39
|
config:
|
|
40
|
-
|
|
40
|
+
base_url: https://api.fireworks.ai/inference/v1
|
|
41
41
|
api_key: ${env.FIREWORKS_API_KEY:=}
|
|
42
42
|
- provider_id: together
|
|
43
43
|
provider_type: remote::together
|
|
44
44
|
config:
|
|
45
|
-
|
|
45
|
+
base_url: https://api.together.xyz/v1
|
|
46
46
|
api_key: ${env.TOGETHER_API_KEY:=}
|
|
47
47
|
- provider_id: bedrock
|
|
48
48
|
provider_type: remote::bedrock
|
|
49
|
+
config:
|
|
50
|
+
api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
|
|
51
|
+
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
|
|
49
52
|
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
|
50
53
|
provider_type: remote::nvidia
|
|
51
54
|
config:
|
|
52
|
-
|
|
55
|
+
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
|
53
56
|
api_key: ${env.NVIDIA_API_KEY:=}
|
|
54
|
-
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
|
55
57
|
- provider_id: openai
|
|
56
58
|
provider_type: remote::openai
|
|
57
59
|
config:
|
|
@@ -73,18 +75,18 @@ providers:
|
|
|
73
75
|
- provider_id: groq
|
|
74
76
|
provider_type: remote::groq
|
|
75
77
|
config:
|
|
76
|
-
|
|
78
|
+
base_url: https://api.groq.com/openai/v1
|
|
77
79
|
api_key: ${env.GROQ_API_KEY:=}
|
|
78
80
|
- provider_id: sambanova
|
|
79
81
|
provider_type: remote::sambanova
|
|
80
82
|
config:
|
|
81
|
-
|
|
83
|
+
base_url: https://api.sambanova.ai/v1
|
|
82
84
|
api_key: ${env.SAMBANOVA_API_KEY:=}
|
|
83
85
|
- provider_id: ${env.AZURE_API_KEY:+azure}
|
|
84
86
|
provider_type: remote::azure
|
|
85
87
|
config:
|
|
86
88
|
api_key: ${env.AZURE_API_KEY:=}
|
|
87
|
-
|
|
89
|
+
base_url: ${env.AZURE_API_BASE:=}
|
|
88
90
|
api_version: ${env.AZURE_API_VERSION:=}
|
|
89
91
|
api_type: ${env.AZURE_API_TYPE:=}
|
|
90
92
|
- provider_id: sentence-transformers
|
|
@@ -247,6 +249,9 @@ storage:
|
|
|
247
249
|
conversations:
|
|
248
250
|
table_name: openai_conversations
|
|
249
251
|
backend: sql_default
|
|
252
|
+
prompts:
|
|
253
|
+
namespace: prompts
|
|
254
|
+
backend: kv_default
|
|
250
255
|
registered_resources:
|
|
251
256
|
models: []
|
|
252
257
|
shields:
|
|
@@ -267,10 +272,56 @@ registered_resources:
|
|
|
267
272
|
provider_id: rag-runtime
|
|
268
273
|
server:
|
|
269
274
|
port: 8321
|
|
270
|
-
telemetry:
|
|
271
|
-
enabled: true
|
|
272
275
|
vector_stores:
|
|
273
276
|
default_provider_id: faiss
|
|
274
277
|
default_embedding_model:
|
|
275
278
|
provider_id: sentence-transformers
|
|
276
279
|
model_id: nomic-ai/nomic-embed-text-v1.5
|
|
280
|
+
file_search_params:
|
|
281
|
+
header_template: 'knowledge_search tool found {num_chunks} chunks:
|
|
282
|
+
|
|
283
|
+
BEGIN of knowledge_search tool results.
|
|
284
|
+
|
|
285
|
+
'
|
|
286
|
+
footer_template: 'END of knowledge_search tool results.
|
|
287
|
+
|
|
288
|
+
'
|
|
289
|
+
context_prompt_params:
|
|
290
|
+
chunk_annotation_template: 'Result {index}
|
|
291
|
+
|
|
292
|
+
Content: {chunk.content}
|
|
293
|
+
|
|
294
|
+
Metadata: {metadata}
|
|
295
|
+
|
|
296
|
+
'
|
|
297
|
+
context_template: 'The above results were retrieved to help answer the user''s
|
|
298
|
+
query: "{query}". Use them as supporting information only in answering this
|
|
299
|
+
query. {annotation_instruction}
|
|
300
|
+
|
|
301
|
+
'
|
|
302
|
+
annotation_prompt_params:
|
|
303
|
+
enable_annotations: true
|
|
304
|
+
annotation_instruction_template: Cite sources immediately at the end of sentences
|
|
305
|
+
before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'.
|
|
306
|
+
Do not add extra punctuation. Use only the file IDs provided, do not invent
|
|
307
|
+
new ones.
|
|
308
|
+
chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
|
|
309
|
+
|
|
310
|
+
{chunk_text}
|
|
311
|
+
|
|
312
|
+
'
|
|
313
|
+
file_ingestion_params:
|
|
314
|
+
default_chunk_size_tokens: 512
|
|
315
|
+
default_chunk_overlap_tokens: 128
|
|
316
|
+
chunk_retrieval_params:
|
|
317
|
+
chunk_multiplier: 5
|
|
318
|
+
max_tokens_in_context: 4000
|
|
319
|
+
default_reranker_strategy: rrf
|
|
320
|
+
rrf_impact_factor: 60.0
|
|
321
|
+
weighted_search_alpha: 0.5
|
|
322
|
+
file_batch_params:
|
|
323
|
+
max_concurrent_files_per_batch: 3
|
|
324
|
+
file_batch_chunk_size: 10
|
|
325
|
+
cleanup_interval_seconds: 86400
|
|
326
|
+
safety:
|
|
327
|
+
default_shield_id: llama-guard
|
|
@@ -17,41 +17,43 @@ providers:
|
|
|
17
17
|
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
|
18
18
|
provider_type: remote::cerebras
|
|
19
19
|
config:
|
|
20
|
-
base_url: https://api.cerebras.ai
|
|
20
|
+
base_url: https://api.cerebras.ai/v1
|
|
21
21
|
api_key: ${env.CEREBRAS_API_KEY:=}
|
|
22
22
|
- provider_id: ${env.OLLAMA_URL:+ollama}
|
|
23
23
|
provider_type: remote::ollama
|
|
24
24
|
config:
|
|
25
|
-
|
|
25
|
+
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
|
26
26
|
- provider_id: ${env.VLLM_URL:+vllm}
|
|
27
27
|
provider_type: remote::vllm
|
|
28
28
|
config:
|
|
29
|
-
|
|
29
|
+
base_url: ${env.VLLM_URL:=}
|
|
30
30
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
31
31
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
32
32
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
33
33
|
- provider_id: ${env.TGI_URL:+tgi}
|
|
34
34
|
provider_type: remote::tgi
|
|
35
35
|
config:
|
|
36
|
-
|
|
36
|
+
base_url: ${env.TGI_URL:=}
|
|
37
37
|
- provider_id: fireworks
|
|
38
38
|
provider_type: remote::fireworks
|
|
39
39
|
config:
|
|
40
|
-
|
|
40
|
+
base_url: https://api.fireworks.ai/inference/v1
|
|
41
41
|
api_key: ${env.FIREWORKS_API_KEY:=}
|
|
42
42
|
- provider_id: together
|
|
43
43
|
provider_type: remote::together
|
|
44
44
|
config:
|
|
45
|
-
|
|
45
|
+
base_url: https://api.together.xyz/v1
|
|
46
46
|
api_key: ${env.TOGETHER_API_KEY:=}
|
|
47
47
|
- provider_id: bedrock
|
|
48
48
|
provider_type: remote::bedrock
|
|
49
|
+
config:
|
|
50
|
+
api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
|
|
51
|
+
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
|
|
49
52
|
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
|
50
53
|
provider_type: remote::nvidia
|
|
51
54
|
config:
|
|
52
|
-
|
|
55
|
+
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
|
53
56
|
api_key: ${env.NVIDIA_API_KEY:=}
|
|
54
|
-
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
|
55
57
|
- provider_id: openai
|
|
56
58
|
provider_type: remote::openai
|
|
57
59
|
config:
|
|
@@ -73,18 +75,18 @@ providers:
|
|
|
73
75
|
- provider_id: groq
|
|
74
76
|
provider_type: remote::groq
|
|
75
77
|
config:
|
|
76
|
-
|
|
78
|
+
base_url: https://api.groq.com/openai/v1
|
|
77
79
|
api_key: ${env.GROQ_API_KEY:=}
|
|
78
80
|
- provider_id: sambanova
|
|
79
81
|
provider_type: remote::sambanova
|
|
80
82
|
config:
|
|
81
|
-
|
|
83
|
+
base_url: https://api.sambanova.ai/v1
|
|
82
84
|
api_key: ${env.SAMBANOVA_API_KEY:=}
|
|
83
85
|
- provider_id: ${env.AZURE_API_KEY:+azure}
|
|
84
86
|
provider_type: remote::azure
|
|
85
87
|
config:
|
|
86
88
|
api_key: ${env.AZURE_API_KEY:=}
|
|
87
|
-
|
|
89
|
+
base_url: ${env.AZURE_API_BASE:=}
|
|
88
90
|
api_version: ${env.AZURE_API_VERSION:=}
|
|
89
91
|
api_type: ${env.AZURE_API_TYPE:=}
|
|
90
92
|
- provider_id: sentence-transformers
|
|
@@ -256,6 +258,9 @@ storage:
|
|
|
256
258
|
conversations:
|
|
257
259
|
table_name: openai_conversations
|
|
258
260
|
backend: sql_default
|
|
261
|
+
prompts:
|
|
262
|
+
namespace: prompts
|
|
263
|
+
backend: kv_default
|
|
259
264
|
registered_resources:
|
|
260
265
|
models: []
|
|
261
266
|
shields:
|
|
@@ -276,10 +281,56 @@ registered_resources:
|
|
|
276
281
|
provider_id: rag-runtime
|
|
277
282
|
server:
|
|
278
283
|
port: 8321
|
|
279
|
-
telemetry:
|
|
280
|
-
enabled: true
|
|
281
284
|
vector_stores:
|
|
282
285
|
default_provider_id: faiss
|
|
283
286
|
default_embedding_model:
|
|
284
287
|
provider_id: sentence-transformers
|
|
285
288
|
model_id: nomic-ai/nomic-embed-text-v1.5
|
|
289
|
+
file_search_params:
|
|
290
|
+
header_template: 'knowledge_search tool found {num_chunks} chunks:
|
|
291
|
+
|
|
292
|
+
BEGIN of knowledge_search tool results.
|
|
293
|
+
|
|
294
|
+
'
|
|
295
|
+
footer_template: 'END of knowledge_search tool results.
|
|
296
|
+
|
|
297
|
+
'
|
|
298
|
+
context_prompt_params:
|
|
299
|
+
chunk_annotation_template: 'Result {index}
|
|
300
|
+
|
|
301
|
+
Content: {chunk.content}
|
|
302
|
+
|
|
303
|
+
Metadata: {metadata}
|
|
304
|
+
|
|
305
|
+
'
|
|
306
|
+
context_template: 'The above results were retrieved to help answer the user''s
|
|
307
|
+
query: "{query}". Use them as supporting information only in answering this
|
|
308
|
+
query. {annotation_instruction}
|
|
309
|
+
|
|
310
|
+
'
|
|
311
|
+
annotation_prompt_params:
|
|
312
|
+
enable_annotations: true
|
|
313
|
+
annotation_instruction_template: Cite sources immediately at the end of sentences
|
|
314
|
+
before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'.
|
|
315
|
+
Do not add extra punctuation. Use only the file IDs provided, do not invent
|
|
316
|
+
new ones.
|
|
317
|
+
chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
|
|
318
|
+
|
|
319
|
+
{chunk_text}
|
|
320
|
+
|
|
321
|
+
'
|
|
322
|
+
file_ingestion_params:
|
|
323
|
+
default_chunk_size_tokens: 512
|
|
324
|
+
default_chunk_overlap_tokens: 128
|
|
325
|
+
chunk_retrieval_params:
|
|
326
|
+
chunk_multiplier: 5
|
|
327
|
+
max_tokens_in_context: 4000
|
|
328
|
+
default_reranker_strategy: rrf
|
|
329
|
+
rrf_impact_factor: 60.0
|
|
330
|
+
weighted_search_alpha: 0.5
|
|
331
|
+
file_batch_params:
|
|
332
|
+
max_concurrent_files_per_batch: 3
|
|
333
|
+
file_batch_chunk_size: 10
|
|
334
|
+
cleanup_interval_seconds: 86400
|
|
335
|
+
safety:
|
|
336
|
+
default_shield_id: llama-guard
|
|
@@ -12,13 +12,15 @@ from llama_stack.core.datatypes import (
|
|
|
12
12
|
Provider,
|
|
13
13
|
ProviderSpec,
|
|
14
14
|
QualifiedModel,
|
|
15
|
+
SafetyConfig,
|
|
15
16
|
ShieldInput,
|
|
16
17
|
ToolGroupInput,
|
|
17
18
|
VectorStoresConfig,
|
|
18
19
|
)
|
|
20
|
+
from llama_stack.core.storage.kvstore.config import PostgresKVStoreConfig
|
|
21
|
+
from llama_stack.core.storage.sqlstore.sqlstore import PostgresSqlStoreConfig
|
|
19
22
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
|
20
23
|
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
|
|
21
|
-
from llama_stack.providers.datatypes import RemoteProviderSpec
|
|
22
24
|
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
|
23
25
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
|
24
26
|
SentenceTransformersInferenceConfig,
|
|
@@ -35,8 +37,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
|
|
|
35
37
|
)
|
|
36
38
|
from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
|
|
37
39
|
from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
|
|
38
|
-
from
|
|
39
|
-
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
|
|
40
|
+
from llama_stack_api import RemoteProviderSpec
|
|
40
41
|
|
|
41
42
|
|
|
42
43
|
def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]:
|
|
@@ -252,6 +253,9 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|
|
252
253
|
model_id="nomic-ai/nomic-embed-text-v1.5",
|
|
253
254
|
),
|
|
254
255
|
),
|
|
256
|
+
safety_config=SafetyConfig(
|
|
257
|
+
default_shield_id="llama-guard",
|
|
258
|
+
),
|
|
255
259
|
)
|
|
256
260
|
|
|
257
261
|
postgres_run_settings = base_run_settings.model_copy(
|
|
@@ -271,9 +275,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|
|
271
275
|
container_image=None,
|
|
272
276
|
template_path=None,
|
|
273
277
|
providers=providers,
|
|
274
|
-
additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
|
|
275
278
|
run_configs={
|
|
276
|
-
"
|
|
279
|
+
"config.yaml": base_run_settings,
|
|
277
280
|
"run-with-postgres-store.yaml": postgres_run_settings,
|
|
278
281
|
},
|
|
279
282
|
run_config_env_vars={
|
|
@@ -17,41 +17,43 @@ providers:
|
|
|
17
17
|
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
|
18
18
|
provider_type: remote::cerebras
|
|
19
19
|
config:
|
|
20
|
-
base_url: https://api.cerebras.ai
|
|
20
|
+
base_url: https://api.cerebras.ai/v1
|
|
21
21
|
api_key: ${env.CEREBRAS_API_KEY:=}
|
|
22
22
|
- provider_id: ${env.OLLAMA_URL:+ollama}
|
|
23
23
|
provider_type: remote::ollama
|
|
24
24
|
config:
|
|
25
|
-
|
|
25
|
+
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
|
26
26
|
- provider_id: ${env.VLLM_URL:+vllm}
|
|
27
27
|
provider_type: remote::vllm
|
|
28
28
|
config:
|
|
29
|
-
|
|
29
|
+
base_url: ${env.VLLM_URL:=}
|
|
30
30
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
31
31
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
32
32
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
33
33
|
- provider_id: ${env.TGI_URL:+tgi}
|
|
34
34
|
provider_type: remote::tgi
|
|
35
35
|
config:
|
|
36
|
-
|
|
36
|
+
base_url: ${env.TGI_URL:=}
|
|
37
37
|
- provider_id: fireworks
|
|
38
38
|
provider_type: remote::fireworks
|
|
39
39
|
config:
|
|
40
|
-
|
|
40
|
+
base_url: https://api.fireworks.ai/inference/v1
|
|
41
41
|
api_key: ${env.FIREWORKS_API_KEY:=}
|
|
42
42
|
- provider_id: together
|
|
43
43
|
provider_type: remote::together
|
|
44
44
|
config:
|
|
45
|
-
|
|
45
|
+
base_url: https://api.together.xyz/v1
|
|
46
46
|
api_key: ${env.TOGETHER_API_KEY:=}
|
|
47
47
|
- provider_id: bedrock
|
|
48
48
|
provider_type: remote::bedrock
|
|
49
|
+
config:
|
|
50
|
+
api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
|
|
51
|
+
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
|
|
49
52
|
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
|
50
53
|
provider_type: remote::nvidia
|
|
51
54
|
config:
|
|
52
|
-
|
|
55
|
+
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
|
53
56
|
api_key: ${env.NVIDIA_API_KEY:=}
|
|
54
|
-
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
|
55
57
|
- provider_id: openai
|
|
56
58
|
provider_type: remote::openai
|
|
57
59
|
config:
|
|
@@ -73,18 +75,18 @@ providers:
|
|
|
73
75
|
- provider_id: groq
|
|
74
76
|
provider_type: remote::groq
|
|
75
77
|
config:
|
|
76
|
-
|
|
78
|
+
base_url: https://api.groq.com/openai/v1
|
|
77
79
|
api_key: ${env.GROQ_API_KEY:=}
|
|
78
80
|
- provider_id: sambanova
|
|
79
81
|
provider_type: remote::sambanova
|
|
80
82
|
config:
|
|
81
|
-
|
|
83
|
+
base_url: https://api.sambanova.ai/v1
|
|
82
84
|
api_key: ${env.SAMBANOVA_API_KEY:=}
|
|
83
85
|
- provider_id: ${env.AZURE_API_KEY:+azure}
|
|
84
86
|
provider_type: remote::azure
|
|
85
87
|
config:
|
|
86
88
|
api_key: ${env.AZURE_API_KEY:=}
|
|
87
|
-
|
|
89
|
+
base_url: ${env.AZURE_API_BASE:=}
|
|
88
90
|
api_version: ${env.AZURE_API_VERSION:=}
|
|
89
91
|
api_type: ${env.AZURE_API_TYPE:=}
|
|
90
92
|
- provider_id: sentence-transformers
|
|
@@ -250,6 +252,9 @@ storage:
|
|
|
250
252
|
conversations:
|
|
251
253
|
table_name: openai_conversations
|
|
252
254
|
backend: sql_default
|
|
255
|
+
prompts:
|
|
256
|
+
namespace: prompts
|
|
257
|
+
backend: kv_default
|
|
253
258
|
registered_resources:
|
|
254
259
|
models: []
|
|
255
260
|
shields:
|
|
@@ -270,10 +275,56 @@ registered_resources:
|
|
|
270
275
|
provider_id: rag-runtime
|
|
271
276
|
server:
|
|
272
277
|
port: 8321
|
|
273
|
-
telemetry:
|
|
274
|
-
enabled: true
|
|
275
278
|
vector_stores:
|
|
276
279
|
default_provider_id: faiss
|
|
277
280
|
default_embedding_model:
|
|
278
281
|
provider_id: sentence-transformers
|
|
279
282
|
model_id: nomic-ai/nomic-embed-text-v1.5
|
|
283
|
+
file_search_params:
|
|
284
|
+
header_template: 'knowledge_search tool found {num_chunks} chunks:
|
|
285
|
+
|
|
286
|
+
BEGIN of knowledge_search tool results.
|
|
287
|
+
|
|
288
|
+
'
|
|
289
|
+
footer_template: 'END of knowledge_search tool results.
|
|
290
|
+
|
|
291
|
+
'
|
|
292
|
+
context_prompt_params:
|
|
293
|
+
chunk_annotation_template: 'Result {index}
|
|
294
|
+
|
|
295
|
+
Content: {chunk.content}
|
|
296
|
+
|
|
297
|
+
Metadata: {metadata}
|
|
298
|
+
|
|
299
|
+
'
|
|
300
|
+
context_template: 'The above results were retrieved to help answer the user''s
|
|
301
|
+
query: "{query}". Use them as supporting information only in answering this
|
|
302
|
+
query. {annotation_instruction}
|
|
303
|
+
|
|
304
|
+
'
|
|
305
|
+
annotation_prompt_params:
|
|
306
|
+
enable_annotations: true
|
|
307
|
+
annotation_instruction_template: Cite sources immediately at the end of sentences
|
|
308
|
+
before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'.
|
|
309
|
+
Do not add extra punctuation. Use only the file IDs provided, do not invent
|
|
310
|
+
new ones.
|
|
311
|
+
chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
|
|
312
|
+
|
|
313
|
+
{chunk_text}
|
|
314
|
+
|
|
315
|
+
'
|
|
316
|
+
file_ingestion_params:
|
|
317
|
+
default_chunk_size_tokens: 512
|
|
318
|
+
default_chunk_overlap_tokens: 128
|
|
319
|
+
chunk_retrieval_params:
|
|
320
|
+
chunk_multiplier: 5
|
|
321
|
+
max_tokens_in_context: 4000
|
|
322
|
+
default_reranker_strategy: rrf
|
|
323
|
+
rrf_impact_factor: 60.0
|
|
324
|
+
weighted_search_alpha: 0.5
|
|
325
|
+
file_batch_params:
|
|
326
|
+
max_concurrent_files_per_batch: 3
|
|
327
|
+
file_batch_chunk_size: 10
|
|
328
|
+
cleanup_interval_seconds: 86400
|
|
329
|
+
safety:
|
|
330
|
+
default_shield_id: llama-guard
|