llama-stack 0.4.4__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/_list_deps.py +11 -7
- llama_stack/cli/stack/run.py +3 -25
- llama_stack/core/access_control/datatypes.py +78 -0
- llama_stack/core/configure.py +2 -2
- llama_stack/{distributions/meta-reference-gpu → core/connectors}/__init__.py +3 -1
- llama_stack/core/connectors/connectors.py +162 -0
- llama_stack/core/conversations/conversations.py +61 -58
- llama_stack/core/datatypes.py +54 -8
- llama_stack/core/library_client.py +60 -13
- llama_stack/core/prompts/prompts.py +43 -42
- llama_stack/core/routers/datasets.py +20 -17
- llama_stack/core/routers/eval_scoring.py +143 -53
- llama_stack/core/routers/inference.py +20 -9
- llama_stack/core/routers/safety.py +30 -42
- llama_stack/core/routers/vector_io.py +15 -7
- llama_stack/core/routing_tables/models.py +42 -3
- llama_stack/core/routing_tables/scoring_functions.py +19 -19
- llama_stack/core/routing_tables/shields.py +20 -17
- llama_stack/core/routing_tables/vector_stores.py +8 -5
- llama_stack/core/server/auth.py +192 -17
- llama_stack/core/server/fastapi_router_registry.py +40 -5
- llama_stack/core/server/server.py +24 -5
- llama_stack/core/stack.py +54 -10
- llama_stack/core/storage/datatypes.py +9 -0
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/exec.py +2 -2
- llama_stack/core/utils/type_inspection.py +16 -2
- llama_stack/distributions/dell/config.yaml +4 -1
- llama_stack/distributions/dell/run-with-safety.yaml +4 -1
- llama_stack/distributions/nvidia/config.yaml +4 -1
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
- llama_stack/distributions/oci/config.yaml +4 -1
- llama_stack/distributions/open-benchmark/config.yaml +9 -1
- llama_stack/distributions/postgres-demo/config.yaml +1 -1
- llama_stack/distributions/starter/build.yaml +62 -0
- llama_stack/distributions/starter/config.yaml +22 -3
- llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/starter/starter.py +13 -1
- llama_stack/distributions/starter-gpu/build.yaml +62 -0
- llama_stack/distributions/starter-gpu/config.yaml +22 -3
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/template.py +10 -2
- llama_stack/distributions/watsonx/config.yaml +4 -1
- llama_stack/log.py +1 -0
- llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +57 -61
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +49 -51
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +94 -22
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
- llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
- llama_stack/providers/inline/batches/reference/batches.py +2 -1
- llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
- llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
- llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
- llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +15 -18
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
- llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
- llama_stack/providers/registry/agents.py +1 -0
- llama_stack/providers/registry/inference.py +1 -9
- llama_stack/providers/registry/vector_io.py +136 -16
- llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
- llama_stack/providers/remote/files/s3/config.py +5 -3
- llama_stack/providers/remote/files/s3/files.py +2 -2
- llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
- llama_stack/providers/remote/inference/openai/openai.py +2 -0
- llama_stack/providers/remote/inference/together/together.py +4 -0
- llama_stack/providers/remote/inference/vertexai/config.py +3 -3
- llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
- llama_stack/providers/remote/inference/vllm/config.py +37 -18
- llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
- llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
- llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
- llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
- llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
- llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
- llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
- llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
- llama_stack/providers/remote/vector_io/oci/config.py +41 -0
- llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
- llama_stack/providers/utils/bedrock/client.py +3 -3
- llama_stack/providers/utils/bedrock/config.py +7 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
- llama_stack/providers/utils/inference/http_client.py +239 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +5 -0
- llama_stack/providers/utils/inference/model_registry.py +148 -2
- llama_stack/providers/utils/inference/openai_compat.py +2 -1
- llama_stack/providers/utils/inference/openai_mixin.py +41 -2
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
- llama_stack/providers/utils/memory/vector_store.py +46 -19
- llama_stack/providers/utils/responses/responses_store.py +7 -7
- llama_stack/providers/utils/safety.py +114 -0
- llama_stack/providers/utils/tools/mcp.py +44 -3
- llama_stack/testing/api_recorder.py +9 -3
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/METADATA +14 -2
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/RECORD +111 -144
- llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
- llama_stack/distributions/meta-reference-gpu/doc_template.md +0 -119
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
- llama_stack/models/llama/hadamard_utils.py +0 -88
- llama_stack/models/llama/llama3/args.py +0 -74
- llama_stack/models/llama/llama3/dog.jpg +0 -0
- llama_stack/models/llama/llama3/generation.py +0 -378
- llama_stack/models/llama/llama3/model.py +0 -304
- llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
- llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
- llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
- llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
- llama_stack/models/llama/llama3/pasta.jpeg +0 -0
- llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama3/quantization/loader.py +0 -316
- llama_stack/models/llama/llama3_1/__init__.py +0 -12
- llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
- llama_stack/models/llama/llama3_1/prompts.py +0 -258
- llama_stack/models/llama/llama3_2/__init__.py +0 -5
- llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
- llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
- llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
- llama_stack/models/llama/llama3_3/__init__.py +0 -5
- llama_stack/models/llama/llama3_3/prompts.py +0 -259
- llama_stack/models/llama/llama4/args.py +0 -107
- llama_stack/models/llama/llama4/ffn.py +0 -58
- llama_stack/models/llama/llama4/moe.py +0 -214
- llama_stack/models/llama/llama4/preprocess.py +0 -435
- llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama4/quantization/loader.py +0 -226
- llama_stack/models/llama/llama4/vision/__init__.py +0 -5
- llama_stack/models/llama/llama4/vision/embedding.py +0 -210
- llama_stack/models/llama/llama4/vision/encoder.py +0 -412
- llama_stack/models/llama/quantize_impls.py +0 -316
- llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
- llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
- llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
- llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
- llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/WHEEL +0 -0
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0
- {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -190,7 +190,7 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
|
|
|
190
190
|
|
|
191
191
|
|
|
192
192
|
async def create_dist_registry(
|
|
193
|
-
metadata_store: KVStoreReference,
|
|
193
|
+
metadata_store: KVStoreReference, distro_name: str
|
|
194
194
|
) -> tuple[CachedDiskDistributionRegistry, KVStore]:
|
|
195
195
|
# instantiate kvstore for storing and retrieving distribution metadata
|
|
196
196
|
dist_kvstore = await kvstore_impl(metadata_store)
|
llama_stack/core/utils/exec.py
CHANGED
|
@@ -17,10 +17,10 @@ from llama_stack.log import get_logger
|
|
|
17
17
|
log = get_logger(name=__name__, category="core")
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
def formulate_run_args(image_type: str,
|
|
20
|
+
def formulate_run_args(image_type: str, distro_name: str) -> list:
|
|
21
21
|
# Only venv is supported now
|
|
22
22
|
current_venv = os.environ.get("VIRTUAL_ENV")
|
|
23
|
-
env_name =
|
|
23
|
+
env_name = distro_name or current_venv
|
|
24
24
|
if not env_name:
|
|
25
25
|
cprint(
|
|
26
26
|
"No current virtual environment detected, please specify a virtual environment name with --image-name",
|
|
@@ -36,10 +36,24 @@ def is_unwrapped_body_param(param_type: Any) -> bool:
|
|
|
36
36
|
base_type = args[0]
|
|
37
37
|
metadata = args[1:]
|
|
38
38
|
|
|
39
|
-
# Look for Body annotation
|
|
39
|
+
# Look for Body annotation; treat embed=None (default) as unwrapped
|
|
40
40
|
# Body() returns a FieldInfo object, so we check for that type and the embed attribute
|
|
41
41
|
for item in metadata:
|
|
42
|
-
if isinstance(item, FieldInfo) and hasattr(item, "embed") and
|
|
42
|
+
if isinstance(item, FieldInfo) and hasattr(item, "embed") and item.embed is not True:
|
|
43
43
|
return inspect.isclass(base_type) and issubclass(base_type, BaseModel)
|
|
44
44
|
|
|
45
45
|
return False
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def is_body_param(param_type: Any) -> bool:
|
|
49
|
+
"""
|
|
50
|
+
Check if a parameter type represents a body parameter (Annotated with Body()).
|
|
51
|
+
"""
|
|
52
|
+
if get_origin(param_type) is typing.Annotated:
|
|
53
|
+
args = get_args(param_type)
|
|
54
|
+
base_type = args[0]
|
|
55
|
+
metadata = args[1:]
|
|
56
|
+
for item in metadata:
|
|
57
|
+
if isinstance(item, FieldInfo):
|
|
58
|
+
return inspect.isclass(base_type) and issubclass(base_type, BaseModel)
|
|
59
|
+
return False
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
version: 2
|
|
2
|
-
|
|
2
|
+
distro_name: dell
|
|
3
3
|
apis:
|
|
4
4
|
- agents
|
|
5
5
|
- datasetio
|
|
@@ -108,6 +108,9 @@ storage:
|
|
|
108
108
|
prompts:
|
|
109
109
|
namespace: prompts
|
|
110
110
|
backend: kv_default
|
|
111
|
+
connectors:
|
|
112
|
+
namespace: connectors
|
|
113
|
+
backend: kv_default
|
|
111
114
|
registered_resources:
|
|
112
115
|
models:
|
|
113
116
|
- metadata: {}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
version: 2
|
|
2
|
-
|
|
2
|
+
distro_name: dell
|
|
3
3
|
apis:
|
|
4
4
|
- agents
|
|
5
5
|
- datasetio
|
|
@@ -112,6 +112,9 @@ storage:
|
|
|
112
112
|
prompts:
|
|
113
113
|
namespace: prompts
|
|
114
114
|
backend: kv_default
|
|
115
|
+
connectors:
|
|
116
|
+
namespace: connectors
|
|
117
|
+
backend: kv_default
|
|
115
118
|
registered_resources:
|
|
116
119
|
models:
|
|
117
120
|
- metadata: {}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
version: 2
|
|
2
|
-
|
|
2
|
+
distro_name: nvidia
|
|
3
3
|
apis:
|
|
4
4
|
- agents
|
|
5
5
|
- datasetio
|
|
@@ -102,6 +102,9 @@ storage:
|
|
|
102
102
|
prompts:
|
|
103
103
|
namespace: prompts
|
|
104
104
|
backend: kv_default
|
|
105
|
+
connectors:
|
|
106
|
+
namespace: connectors
|
|
107
|
+
backend: kv_default
|
|
105
108
|
registered_resources:
|
|
106
109
|
models: []
|
|
107
110
|
shields: []
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
version: 2
|
|
2
|
-
|
|
2
|
+
distro_name: nvidia
|
|
3
3
|
apis:
|
|
4
4
|
- agents
|
|
5
5
|
- datasetio
|
|
@@ -113,6 +113,9 @@ storage:
|
|
|
113
113
|
prompts:
|
|
114
114
|
namespace: prompts
|
|
115
115
|
backend: kv_default
|
|
116
|
+
connectors:
|
|
117
|
+
namespace: connectors
|
|
118
|
+
backend: kv_default
|
|
116
119
|
registered_resources:
|
|
117
120
|
models:
|
|
118
121
|
- metadata: {}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
version: 2
|
|
2
|
-
|
|
2
|
+
distro_name: oci
|
|
3
3
|
apis:
|
|
4
4
|
- agents
|
|
5
5
|
- datasetio
|
|
@@ -120,6 +120,9 @@ storage:
|
|
|
120
120
|
prompts:
|
|
121
121
|
namespace: prompts
|
|
122
122
|
backend: kv_default
|
|
123
|
+
connectors:
|
|
124
|
+
namespace: connectors
|
|
125
|
+
backend: kv_default
|
|
123
126
|
registered_resources:
|
|
124
127
|
models: []
|
|
125
128
|
shields: []
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
version: 2
|
|
2
|
-
|
|
2
|
+
distro_name: open-benchmark
|
|
3
3
|
apis:
|
|
4
4
|
- agents
|
|
5
5
|
- datasetio
|
|
@@ -57,6 +57,11 @@ providers:
|
|
|
57
57
|
db: ${env.PGVECTOR_DB:=}
|
|
58
58
|
user: ${env.PGVECTOR_USER:=}
|
|
59
59
|
password: ${env.PGVECTOR_PASSWORD:=}
|
|
60
|
+
distance_metric: COSINE
|
|
61
|
+
vector_index:
|
|
62
|
+
type: HNSW
|
|
63
|
+
m: 16
|
|
64
|
+
ef_construction: 64
|
|
60
65
|
persistence:
|
|
61
66
|
namespace: vector_io::pgvector
|
|
62
67
|
backend: kv_default
|
|
@@ -145,6 +150,9 @@ storage:
|
|
|
145
150
|
prompts:
|
|
146
151
|
namespace: prompts
|
|
147
152
|
backend: kv_default
|
|
153
|
+
connectors:
|
|
154
|
+
namespace: connectors
|
|
155
|
+
backend: kv_default
|
|
148
156
|
registered_resources:
|
|
149
157
|
models:
|
|
150
158
|
- metadata: {}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
distribution_spec:
|
|
3
|
+
description: Quick start template for running Llama Stack with several popular providers.
|
|
4
|
+
This distribution is intended for CPU-only environments.
|
|
5
|
+
providers:
|
|
6
|
+
inference:
|
|
7
|
+
- provider_type: remote::cerebras
|
|
8
|
+
- provider_type: remote::ollama
|
|
9
|
+
- provider_type: remote::vllm
|
|
10
|
+
- provider_type: remote::tgi
|
|
11
|
+
- provider_type: remote::fireworks
|
|
12
|
+
- provider_type: remote::together
|
|
13
|
+
- provider_type: remote::bedrock
|
|
14
|
+
- provider_type: remote::nvidia
|
|
15
|
+
- provider_type: remote::openai
|
|
16
|
+
- provider_type: remote::anthropic
|
|
17
|
+
- provider_type: remote::gemini
|
|
18
|
+
- provider_type: remote::vertexai
|
|
19
|
+
- provider_type: remote::groq
|
|
20
|
+
- provider_type: remote::sambanova
|
|
21
|
+
- provider_type: remote::azure
|
|
22
|
+
- provider_type: inline::sentence-transformers
|
|
23
|
+
vector_io:
|
|
24
|
+
- provider_type: inline::faiss
|
|
25
|
+
- provider_type: inline::sqlite-vec
|
|
26
|
+
- provider_type: inline::milvus
|
|
27
|
+
- provider_type: remote::chromadb
|
|
28
|
+
- provider_type: remote::pgvector
|
|
29
|
+
- provider_type: remote::qdrant
|
|
30
|
+
- provider_type: remote::weaviate
|
|
31
|
+
- provider_type: remote::elasticsearch
|
|
32
|
+
files:
|
|
33
|
+
- provider_type: inline::localfs
|
|
34
|
+
safety:
|
|
35
|
+
- provider_type: inline::llama-guard
|
|
36
|
+
- provider_type: inline::code-scanner
|
|
37
|
+
agents:
|
|
38
|
+
- provider_type: inline::meta-reference
|
|
39
|
+
post_training:
|
|
40
|
+
- provider_type: inline::torchtune-cpu
|
|
41
|
+
eval:
|
|
42
|
+
- provider_type: inline::meta-reference
|
|
43
|
+
datasetio:
|
|
44
|
+
- provider_type: remote::huggingface
|
|
45
|
+
- provider_type: inline::localfs
|
|
46
|
+
scoring:
|
|
47
|
+
- provider_type: inline::basic
|
|
48
|
+
- provider_type: inline::llm-as-judge
|
|
49
|
+
- provider_type: inline::braintrust
|
|
50
|
+
tool_runtime:
|
|
51
|
+
- provider_type: remote::brave-search
|
|
52
|
+
- provider_type: remote::tavily-search
|
|
53
|
+
- provider_type: inline::rag-runtime
|
|
54
|
+
- provider_type: remote::model-context-protocol
|
|
55
|
+
batches:
|
|
56
|
+
- provider_type: inline::reference
|
|
57
|
+
image_type: venv
|
|
58
|
+
additional_pip_packages:
|
|
59
|
+
- aiosqlite
|
|
60
|
+
- asyncpg
|
|
61
|
+
- psycopg2-binary
|
|
62
|
+
- sqlalchemy[asyncio]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
version: 2
|
|
2
|
-
|
|
2
|
+
distro_name: starter
|
|
3
3
|
apis:
|
|
4
4
|
- agents
|
|
5
5
|
- batches
|
|
@@ -29,7 +29,9 @@ providers:
|
|
|
29
29
|
base_url: ${env.VLLM_URL:=}
|
|
30
30
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
31
31
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
32
|
-
|
|
32
|
+
network:
|
|
33
|
+
tls:
|
|
34
|
+
verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
33
35
|
- provider_id: ${env.TGI_URL:+tgi}
|
|
34
36
|
provider_type: remote::tgi
|
|
35
37
|
config:
|
|
@@ -71,7 +73,7 @@ providers:
|
|
|
71
73
|
provider_type: remote::vertexai
|
|
72
74
|
config:
|
|
73
75
|
project: ${env.VERTEX_AI_PROJECT:=}
|
|
74
|
-
location: ${env.VERTEX_AI_LOCATION:=
|
|
76
|
+
location: ${env.VERTEX_AI_LOCATION:=global}
|
|
75
77
|
- provider_id: groq
|
|
76
78
|
provider_type: remote::groq
|
|
77
79
|
config:
|
|
@@ -127,6 +129,11 @@ providers:
|
|
|
127
129
|
db: ${env.PGVECTOR_DB:=}
|
|
128
130
|
user: ${env.PGVECTOR_USER:=}
|
|
129
131
|
password: ${env.PGVECTOR_PASSWORD:=}
|
|
132
|
+
distance_metric: COSINE
|
|
133
|
+
vector_index:
|
|
134
|
+
type: HNSW
|
|
135
|
+
m: 16
|
|
136
|
+
ef_construction: 64
|
|
130
137
|
persistence:
|
|
131
138
|
namespace: vector_io::pgvector
|
|
132
139
|
backend: kv_default
|
|
@@ -145,6 +152,14 @@ providers:
|
|
|
145
152
|
persistence:
|
|
146
153
|
namespace: vector_io::weaviate
|
|
147
154
|
backend: kv_default
|
|
155
|
+
- provider_id: ${env.ELASTICSEARCH_URL:+elasticsearch}
|
|
156
|
+
provider_type: remote::elasticsearch
|
|
157
|
+
config:
|
|
158
|
+
elasticsearch_url: ${env.ELASTICSEARCH_URL:=localhost:9200}
|
|
159
|
+
elasticsearch_api_key: ${env.ELASTICSEARCH_API_KEY:=}
|
|
160
|
+
persistence:
|
|
161
|
+
namespace: vector_io::elasticsearch
|
|
162
|
+
backend: kv_default
|
|
148
163
|
files:
|
|
149
164
|
- provider_id: meta-reference-files
|
|
150
165
|
provider_type: inline::localfs
|
|
@@ -252,6 +267,9 @@ storage:
|
|
|
252
267
|
prompts:
|
|
253
268
|
namespace: prompts
|
|
254
269
|
backend: kv_default
|
|
270
|
+
connectors:
|
|
271
|
+
namespace: connectors
|
|
272
|
+
backend: kv_default
|
|
255
273
|
registered_resources:
|
|
256
274
|
models: []
|
|
257
275
|
shields:
|
|
@@ -325,3 +343,4 @@ vector_stores:
|
|
|
325
343
|
cleanup_interval_seconds: 86400
|
|
326
344
|
safety:
|
|
327
345
|
default_shield_id: llama-guard
|
|
346
|
+
connectors: []
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
version: 2
|
|
2
|
-
|
|
2
|
+
distro_name: starter
|
|
3
3
|
apis:
|
|
4
4
|
- agents
|
|
5
5
|
- batches
|
|
@@ -29,7 +29,9 @@ providers:
|
|
|
29
29
|
base_url: ${env.VLLM_URL:=}
|
|
30
30
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
31
31
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
32
|
-
|
|
32
|
+
network:
|
|
33
|
+
tls:
|
|
34
|
+
verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
33
35
|
- provider_id: ${env.TGI_URL:+tgi}
|
|
34
36
|
provider_type: remote::tgi
|
|
35
37
|
config:
|
|
@@ -71,7 +73,7 @@ providers:
|
|
|
71
73
|
provider_type: remote::vertexai
|
|
72
74
|
config:
|
|
73
75
|
project: ${env.VERTEX_AI_PROJECT:=}
|
|
74
|
-
location: ${env.VERTEX_AI_LOCATION:=
|
|
76
|
+
location: ${env.VERTEX_AI_LOCATION:=global}
|
|
75
77
|
- provider_id: groq
|
|
76
78
|
provider_type: remote::groq
|
|
77
79
|
config:
|
|
@@ -127,6 +129,11 @@ providers:
|
|
|
127
129
|
db: ${env.PGVECTOR_DB:=}
|
|
128
130
|
user: ${env.PGVECTOR_USER:=}
|
|
129
131
|
password: ${env.PGVECTOR_PASSWORD:=}
|
|
132
|
+
distance_metric: COSINE
|
|
133
|
+
vector_index:
|
|
134
|
+
type: HNSW
|
|
135
|
+
m: 16
|
|
136
|
+
ef_construction: 64
|
|
130
137
|
persistence:
|
|
131
138
|
namespace: vector_io::pgvector
|
|
132
139
|
backend: kv_default
|
|
@@ -145,6 +152,14 @@ providers:
|
|
|
145
152
|
persistence:
|
|
146
153
|
namespace: vector_io::weaviate
|
|
147
154
|
backend: kv_default
|
|
155
|
+
- provider_id: ${env.ELASTICSEARCH_URL:+elasticsearch}
|
|
156
|
+
provider_type: remote::elasticsearch
|
|
157
|
+
config:
|
|
158
|
+
elasticsearch_url: ${env.ELASTICSEARCH_URL:=localhost:9200}
|
|
159
|
+
elasticsearch_api_key: ${env.ELASTICSEARCH_API_KEY:=}
|
|
160
|
+
persistence:
|
|
161
|
+
namespace: vector_io::elasticsearch
|
|
162
|
+
backend: kv_default
|
|
148
163
|
files:
|
|
149
164
|
- provider_id: meta-reference-files
|
|
150
165
|
provider_type: inline::localfs
|
|
@@ -261,6 +276,9 @@ storage:
|
|
|
261
276
|
prompts:
|
|
262
277
|
namespace: prompts
|
|
263
278
|
backend: kv_default
|
|
279
|
+
connectors:
|
|
280
|
+
namespace: connectors
|
|
281
|
+
backend: kv_default
|
|
264
282
|
registered_resources:
|
|
265
283
|
models: []
|
|
266
284
|
shields:
|
|
@@ -334,3 +352,4 @@ vector_stores:
|
|
|
334
352
|
cleanup_interval_seconds: 86400
|
|
335
353
|
safety:
|
|
336
354
|
default_shield_id: llama-guard
|
|
355
|
+
connectors: []
|
|
@@ -32,6 +32,7 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
|
|
|
32
32
|
)
|
|
33
33
|
from llama_stack.providers.registry.inference import available_providers
|
|
34
34
|
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
|
|
35
|
+
from llama_stack.providers.remote.vector_io.elasticsearch.config import ElasticsearchVectorIOConfig
|
|
35
36
|
from llama_stack.providers.remote.vector_io.pgvector.config import (
|
|
36
37
|
PGVectorVectorIOConfig,
|
|
37
38
|
)
|
|
@@ -121,6 +122,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|
|
121
122
|
BuildProvider(provider_type="remote::pgvector"),
|
|
122
123
|
BuildProvider(provider_type="remote::qdrant"),
|
|
123
124
|
BuildProvider(provider_type="remote::weaviate"),
|
|
125
|
+
BuildProvider(provider_type="remote::elasticsearch"),
|
|
124
126
|
],
|
|
125
127
|
"files": [BuildProvider(provider_type="inline::localfs")],
|
|
126
128
|
"safety": [
|
|
@@ -237,6 +239,15 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|
|
237
239
|
cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
|
|
238
240
|
),
|
|
239
241
|
),
|
|
242
|
+
Provider(
|
|
243
|
+
provider_id="${env.ELASTICSEARCH_URL:+elasticsearch}",
|
|
244
|
+
provider_type="remote::elasticsearch",
|
|
245
|
+
config=ElasticsearchVectorIOConfig.sample_run_config(
|
|
246
|
+
f"~/.llama/distributions/{name}",
|
|
247
|
+
elasticsearch_url="${env.ELASTICSEARCH_URL:=localhost:9200}",
|
|
248
|
+
elasticsearch_api_key="${env.ELASTICSEARCH_API_KEY:=}",
|
|
249
|
+
),
|
|
250
|
+
),
|
|
240
251
|
],
|
|
241
252
|
"files": [files_provider],
|
|
242
253
|
}
|
|
@@ -246,6 +257,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|
|
246
257
|
default_models=[],
|
|
247
258
|
default_tool_groups=default_tool_groups,
|
|
248
259
|
default_shields=default_shields,
|
|
260
|
+
default_connectors=[],
|
|
249
261
|
vector_stores_config=VectorStoresConfig(
|
|
250
262
|
default_provider_id="faiss",
|
|
251
263
|
default_embedding_model=QualifiedModel(
|
|
@@ -309,7 +321,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|
|
309
321
|
"Google Cloud Project ID for Vertex AI",
|
|
310
322
|
),
|
|
311
323
|
"VERTEX_AI_LOCATION": (
|
|
312
|
-
"
|
|
324
|
+
"global",
|
|
313
325
|
"Google Cloud Location for Vertex AI",
|
|
314
326
|
),
|
|
315
327
|
"SAMBANOVA_API_KEY": (
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
distribution_spec:
|
|
3
|
+
description: Quick start template for running Llama Stack with several popular providers.
|
|
4
|
+
This distribution is intended for GPU-enabled environments.
|
|
5
|
+
providers:
|
|
6
|
+
inference:
|
|
7
|
+
- provider_type: remote::cerebras
|
|
8
|
+
- provider_type: remote::ollama
|
|
9
|
+
- provider_type: remote::vllm
|
|
10
|
+
- provider_type: remote::tgi
|
|
11
|
+
- provider_type: remote::fireworks
|
|
12
|
+
- provider_type: remote::together
|
|
13
|
+
- provider_type: remote::bedrock
|
|
14
|
+
- provider_type: remote::nvidia
|
|
15
|
+
- provider_type: remote::openai
|
|
16
|
+
- provider_type: remote::anthropic
|
|
17
|
+
- provider_type: remote::gemini
|
|
18
|
+
- provider_type: remote::vertexai
|
|
19
|
+
- provider_type: remote::groq
|
|
20
|
+
- provider_type: remote::sambanova
|
|
21
|
+
- provider_type: remote::azure
|
|
22
|
+
- provider_type: inline::sentence-transformers
|
|
23
|
+
vector_io:
|
|
24
|
+
- provider_type: inline::faiss
|
|
25
|
+
- provider_type: inline::sqlite-vec
|
|
26
|
+
- provider_type: inline::milvus
|
|
27
|
+
- provider_type: remote::chromadb
|
|
28
|
+
- provider_type: remote::pgvector
|
|
29
|
+
- provider_type: remote::qdrant
|
|
30
|
+
- provider_type: remote::weaviate
|
|
31
|
+
- provider_type: remote::elasticsearch
|
|
32
|
+
files:
|
|
33
|
+
- provider_type: inline::localfs
|
|
34
|
+
safety:
|
|
35
|
+
- provider_type: inline::llama-guard
|
|
36
|
+
- provider_type: inline::code-scanner
|
|
37
|
+
agents:
|
|
38
|
+
- provider_type: inline::meta-reference
|
|
39
|
+
post_training:
|
|
40
|
+
- provider_type: inline::huggingface-gpu
|
|
41
|
+
eval:
|
|
42
|
+
- provider_type: inline::meta-reference
|
|
43
|
+
datasetio:
|
|
44
|
+
- provider_type: remote::huggingface
|
|
45
|
+
- provider_type: inline::localfs
|
|
46
|
+
scoring:
|
|
47
|
+
- provider_type: inline::basic
|
|
48
|
+
- provider_type: inline::llm-as-judge
|
|
49
|
+
- provider_type: inline::braintrust
|
|
50
|
+
tool_runtime:
|
|
51
|
+
- provider_type: remote::brave-search
|
|
52
|
+
- provider_type: remote::tavily-search
|
|
53
|
+
- provider_type: inline::rag-runtime
|
|
54
|
+
- provider_type: remote::model-context-protocol
|
|
55
|
+
batches:
|
|
56
|
+
- provider_type: inline::reference
|
|
57
|
+
image_type: venv
|
|
58
|
+
additional_pip_packages:
|
|
59
|
+
- aiosqlite
|
|
60
|
+
- asyncpg
|
|
61
|
+
- psycopg2-binary
|
|
62
|
+
- sqlalchemy[asyncio]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
version: 2
|
|
2
|
-
|
|
2
|
+
distro_name: starter-gpu
|
|
3
3
|
apis:
|
|
4
4
|
- agents
|
|
5
5
|
- batches
|
|
@@ -29,7 +29,9 @@ providers:
|
|
|
29
29
|
base_url: ${env.VLLM_URL:=}
|
|
30
30
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
31
31
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
32
|
-
|
|
32
|
+
network:
|
|
33
|
+
tls:
|
|
34
|
+
verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
33
35
|
- provider_id: ${env.TGI_URL:+tgi}
|
|
34
36
|
provider_type: remote::tgi
|
|
35
37
|
config:
|
|
@@ -71,7 +73,7 @@ providers:
|
|
|
71
73
|
provider_type: remote::vertexai
|
|
72
74
|
config:
|
|
73
75
|
project: ${env.VERTEX_AI_PROJECT:=}
|
|
74
|
-
location: ${env.VERTEX_AI_LOCATION:=
|
|
76
|
+
location: ${env.VERTEX_AI_LOCATION:=global}
|
|
75
77
|
- provider_id: groq
|
|
76
78
|
provider_type: remote::groq
|
|
77
79
|
config:
|
|
@@ -127,6 +129,11 @@ providers:
|
|
|
127
129
|
db: ${env.PGVECTOR_DB:=}
|
|
128
130
|
user: ${env.PGVECTOR_USER:=}
|
|
129
131
|
password: ${env.PGVECTOR_PASSWORD:=}
|
|
132
|
+
distance_metric: COSINE
|
|
133
|
+
vector_index:
|
|
134
|
+
type: HNSW
|
|
135
|
+
m: 16
|
|
136
|
+
ef_construction: 64
|
|
130
137
|
persistence:
|
|
131
138
|
namespace: vector_io::pgvector
|
|
132
139
|
backend: kv_default
|
|
@@ -145,6 +152,14 @@ providers:
|
|
|
145
152
|
persistence:
|
|
146
153
|
namespace: vector_io::weaviate
|
|
147
154
|
backend: kv_default
|
|
155
|
+
- provider_id: ${env.ELASTICSEARCH_URL:+elasticsearch}
|
|
156
|
+
provider_type: remote::elasticsearch
|
|
157
|
+
config:
|
|
158
|
+
elasticsearch_url: ${env.ELASTICSEARCH_URL:=localhost:9200}
|
|
159
|
+
elasticsearch_api_key: ${env.ELASTICSEARCH_API_KEY:=}
|
|
160
|
+
persistence:
|
|
161
|
+
namespace: vector_io::elasticsearch
|
|
162
|
+
backend: kv_default
|
|
148
163
|
files:
|
|
149
164
|
- provider_id: meta-reference-files
|
|
150
165
|
provider_type: inline::localfs
|
|
@@ -255,6 +270,9 @@ storage:
|
|
|
255
270
|
prompts:
|
|
256
271
|
namespace: prompts
|
|
257
272
|
backend: kv_default
|
|
273
|
+
connectors:
|
|
274
|
+
namespace: connectors
|
|
275
|
+
backend: kv_default
|
|
258
276
|
registered_resources:
|
|
259
277
|
models: []
|
|
260
278
|
shields:
|
|
@@ -328,3 +346,4 @@ vector_stores:
|
|
|
328
346
|
cleanup_interval_seconds: 86400
|
|
329
347
|
safety:
|
|
330
348
|
default_shield_id: llama-guard
|
|
349
|
+
connectors: []
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
version: 2
|
|
2
|
-
|
|
2
|
+
distro_name: starter-gpu
|
|
3
3
|
apis:
|
|
4
4
|
- agents
|
|
5
5
|
- batches
|
|
@@ -29,7 +29,9 @@ providers:
|
|
|
29
29
|
base_url: ${env.VLLM_URL:=}
|
|
30
30
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
31
31
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
32
|
-
|
|
32
|
+
network:
|
|
33
|
+
tls:
|
|
34
|
+
verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
33
35
|
- provider_id: ${env.TGI_URL:+tgi}
|
|
34
36
|
provider_type: remote::tgi
|
|
35
37
|
config:
|
|
@@ -71,7 +73,7 @@ providers:
|
|
|
71
73
|
provider_type: remote::vertexai
|
|
72
74
|
config:
|
|
73
75
|
project: ${env.VERTEX_AI_PROJECT:=}
|
|
74
|
-
location: ${env.VERTEX_AI_LOCATION:=
|
|
76
|
+
location: ${env.VERTEX_AI_LOCATION:=global}
|
|
75
77
|
- provider_id: groq
|
|
76
78
|
provider_type: remote::groq
|
|
77
79
|
config:
|
|
@@ -127,6 +129,11 @@ providers:
|
|
|
127
129
|
db: ${env.PGVECTOR_DB:=}
|
|
128
130
|
user: ${env.PGVECTOR_USER:=}
|
|
129
131
|
password: ${env.PGVECTOR_PASSWORD:=}
|
|
132
|
+
distance_metric: COSINE
|
|
133
|
+
vector_index:
|
|
134
|
+
type: HNSW
|
|
135
|
+
m: 16
|
|
136
|
+
ef_construction: 64
|
|
130
137
|
persistence:
|
|
131
138
|
namespace: vector_io::pgvector
|
|
132
139
|
backend: kv_default
|
|
@@ -145,6 +152,14 @@ providers:
|
|
|
145
152
|
persistence:
|
|
146
153
|
namespace: vector_io::weaviate
|
|
147
154
|
backend: kv_default
|
|
155
|
+
- provider_id: ${env.ELASTICSEARCH_URL:+elasticsearch}
|
|
156
|
+
provider_type: remote::elasticsearch
|
|
157
|
+
config:
|
|
158
|
+
elasticsearch_url: ${env.ELASTICSEARCH_URL:=localhost:9200}
|
|
159
|
+
elasticsearch_api_key: ${env.ELASTICSEARCH_API_KEY:=}
|
|
160
|
+
persistence:
|
|
161
|
+
namespace: vector_io::elasticsearch
|
|
162
|
+
backend: kv_default
|
|
148
163
|
files:
|
|
149
164
|
- provider_id: meta-reference-files
|
|
150
165
|
provider_type: inline::localfs
|
|
@@ -264,6 +279,9 @@ storage:
|
|
|
264
279
|
prompts:
|
|
265
280
|
namespace: prompts
|
|
266
281
|
backend: kv_default
|
|
282
|
+
connectors:
|
|
283
|
+
namespace: connectors
|
|
284
|
+
backend: kv_default
|
|
267
285
|
registered_resources:
|
|
268
286
|
models: []
|
|
269
287
|
shields:
|
|
@@ -337,3 +355,4 @@ vector_stores:
|
|
|
337
355
|
cleanup_interval_seconds: 86400
|
|
338
356
|
safety:
|
|
339
357
|
default_shield_id: llama-guard
|
|
358
|
+
connectors: []
|
|
@@ -36,7 +36,7 @@ from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig
|
|
|
36
36
|
from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig
|
|
37
37
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
|
38
38
|
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
|
39
|
-
from llama_stack_api import DatasetPurpose, ModelType
|
|
39
|
+
from llama_stack_api import ConnectorInput, DatasetPurpose, ModelType
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
def filter_empty_values(obj: Any) -> Any:
|
|
@@ -181,6 +181,7 @@ class RunConfigSettings(BaseModel):
|
|
|
181
181
|
default_tool_groups: list[ToolGroupInput] | None = None
|
|
182
182
|
default_datasets: list[DatasetInput] | None = None
|
|
183
183
|
default_benchmarks: list[BenchmarkInput] | None = None
|
|
184
|
+
default_connectors: list[ConnectorInput] | None = None
|
|
184
185
|
vector_stores_config: VectorStoresConfig | None = None
|
|
185
186
|
safety_config: SafetyConfig | None = None
|
|
186
187
|
storage_backends: dict[str, Any] | None = None
|
|
@@ -255,6 +256,10 @@ class RunConfigSettings(BaseModel):
|
|
|
255
256
|
backend="kv_default",
|
|
256
257
|
namespace="prompts",
|
|
257
258
|
).model_dump(exclude_none=True),
|
|
259
|
+
"connectors": KVStoreReference(
|
|
260
|
+
backend="kv_default",
|
|
261
|
+
namespace="connectors",
|
|
262
|
+
).model_dump(exclude_none=True),
|
|
258
263
|
}
|
|
259
264
|
|
|
260
265
|
storage_config = dict(
|
|
@@ -265,7 +270,7 @@ class RunConfigSettings(BaseModel):
|
|
|
265
270
|
# Return a dict that matches StackRunConfig structure
|
|
266
271
|
config = {
|
|
267
272
|
"version": LLAMA_STACK_RUN_CONFIG_VERSION,
|
|
268
|
-
"
|
|
273
|
+
"distro_name": name,
|
|
269
274
|
"container_image": container_image,
|
|
270
275
|
"apis": apis,
|
|
271
276
|
"providers": provider_configs,
|
|
@@ -290,6 +295,9 @@ class RunConfigSettings(BaseModel):
|
|
|
290
295
|
if self.safety_config:
|
|
291
296
|
config["safety"] = self.safety_config.model_dump(exclude_none=True)
|
|
292
297
|
|
|
298
|
+
if self.default_connectors is not None:
|
|
299
|
+
config["connectors"] = [c.model_dump(exclude_none=True) for c in self.default_connectors]
|
|
300
|
+
|
|
293
301
|
return config
|
|
294
302
|
|
|
295
303
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
version: 2
|
|
2
|
-
|
|
2
|
+
distro_name: watsonx
|
|
3
3
|
apis:
|
|
4
4
|
- agents
|
|
5
5
|
- datasetio
|
|
@@ -118,6 +118,9 @@ storage:
|
|
|
118
118
|
prompts:
|
|
119
119
|
namespace: prompts
|
|
120
120
|
backend: kv_default
|
|
121
|
+
connectors:
|
|
122
|
+
namespace: connectors
|
|
123
|
+
backend: kv_default
|
|
121
124
|
registered_resources:
|
|
122
125
|
models: []
|
|
123
126
|
shields: []
|