llama-stack 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. llama_stack/cli/stack/_list_deps.py +11 -7
  2. llama_stack/cli/stack/run.py +3 -25
  3. llama_stack/core/access_control/datatypes.py +78 -0
  4. llama_stack/core/configure.py +2 -2
  5. llama_stack/{distributions/meta-reference-gpu → core/connectors}/__init__.py +3 -1
  6. llama_stack/core/connectors/connectors.py +162 -0
  7. llama_stack/core/conversations/conversations.py +61 -58
  8. llama_stack/core/datatypes.py +54 -8
  9. llama_stack/core/library_client.py +60 -13
  10. llama_stack/core/prompts/prompts.py +43 -42
  11. llama_stack/core/routers/datasets.py +20 -17
  12. llama_stack/core/routers/eval_scoring.py +143 -53
  13. llama_stack/core/routers/inference.py +20 -9
  14. llama_stack/core/routers/safety.py +30 -42
  15. llama_stack/core/routers/vector_io.py +15 -7
  16. llama_stack/core/routing_tables/models.py +42 -3
  17. llama_stack/core/routing_tables/scoring_functions.py +19 -19
  18. llama_stack/core/routing_tables/shields.py +20 -17
  19. llama_stack/core/routing_tables/vector_stores.py +8 -5
  20. llama_stack/core/server/auth.py +192 -17
  21. llama_stack/core/server/fastapi_router_registry.py +40 -5
  22. llama_stack/core/server/server.py +24 -5
  23. llama_stack/core/stack.py +54 -10
  24. llama_stack/core/storage/datatypes.py +9 -0
  25. llama_stack/core/store/registry.py +1 -1
  26. llama_stack/core/utils/exec.py +2 -2
  27. llama_stack/core/utils/type_inspection.py +16 -2
  28. llama_stack/distributions/dell/config.yaml +4 -1
  29. llama_stack/distributions/dell/run-with-safety.yaml +4 -1
  30. llama_stack/distributions/nvidia/config.yaml +4 -1
  31. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
  32. llama_stack/distributions/oci/config.yaml +4 -1
  33. llama_stack/distributions/open-benchmark/config.yaml +9 -1
  34. llama_stack/distributions/postgres-demo/config.yaml +1 -1
  35. llama_stack/distributions/starter/build.yaml +62 -0
  36. llama_stack/distributions/starter/config.yaml +22 -3
  37. llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
  38. llama_stack/distributions/starter/starter.py +13 -1
  39. llama_stack/distributions/starter-gpu/build.yaml +62 -0
  40. llama_stack/distributions/starter-gpu/config.yaml +22 -3
  41. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
  42. llama_stack/distributions/template.py +10 -2
  43. llama_stack/distributions/watsonx/config.yaml +4 -1
  44. llama_stack/log.py +1 -0
  45. llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
  46. llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
  47. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +53 -51
  48. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
  49. llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
  50. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
  51. llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
  52. llama_stack/providers/inline/batches/reference/batches.py +2 -1
  53. llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
  54. llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
  55. llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
  56. llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
  57. llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
  58. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
  59. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
  60. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
  61. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
  62. llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
  63. llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
  64. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
  65. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
  66. llama_stack/providers/registry/agents.py +1 -0
  67. llama_stack/providers/registry/inference.py +1 -9
  68. llama_stack/providers/registry/vector_io.py +136 -16
  69. llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
  70. llama_stack/providers/remote/files/s3/config.py +5 -3
  71. llama_stack/providers/remote/files/s3/files.py +2 -2
  72. llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
  73. llama_stack/providers/remote/inference/openai/openai.py +2 -0
  74. llama_stack/providers/remote/inference/together/together.py +4 -0
  75. llama_stack/providers/remote/inference/vertexai/config.py +3 -3
  76. llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
  77. llama_stack/providers/remote/inference/vllm/config.py +37 -18
  78. llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
  79. llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
  80. llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
  81. llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
  82. llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
  83. llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
  84. llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
  85. llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
  86. llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
  87. llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
  88. llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
  89. llama_stack/providers/remote/vector_io/oci/config.py +41 -0
  90. llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
  91. llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
  92. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
  93. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
  94. llama_stack/providers/utils/bedrock/client.py +3 -3
  95. llama_stack/providers/utils/bedrock/config.py +7 -7
  96. llama_stack/providers/utils/inference/__init__.py +0 -25
  97. llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
  98. llama_stack/providers/utils/inference/http_client.py +239 -0
  99. llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
  100. llama_stack/providers/utils/inference/model_registry.py +148 -2
  101. llama_stack/providers/utils/inference/openai_compat.py +1 -158
  102. llama_stack/providers/utils/inference/openai_mixin.py +42 -2
  103. llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
  104. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
  105. llama_stack/providers/utils/memory/vector_store.py +46 -19
  106. llama_stack/providers/utils/responses/responses_store.py +7 -7
  107. llama_stack/providers/utils/safety.py +114 -0
  108. llama_stack/providers/utils/tools/mcp.py +44 -3
  109. llama_stack/testing/api_recorder.py +9 -3
  110. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
  111. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/RECORD +115 -148
  112. llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
  113. llama_stack/distributions/meta-reference-gpu/doc_template.md +0 -119
  114. llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
  115. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
  116. llama_stack/models/llama/hadamard_utils.py +0 -88
  117. llama_stack/models/llama/llama3/args.py +0 -74
  118. llama_stack/models/llama/llama3/dog.jpg +0 -0
  119. llama_stack/models/llama/llama3/generation.py +0 -378
  120. llama_stack/models/llama/llama3/model.py +0 -304
  121. llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
  122. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
  123. llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
  124. llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
  125. llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
  126. llama_stack/models/llama/llama3/pasta.jpeg +0 -0
  127. llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
  128. llama_stack/models/llama/llama3/quantization/loader.py +0 -316
  129. llama_stack/models/llama/llama3_1/__init__.py +0 -12
  130. llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
  131. llama_stack/models/llama/llama3_1/prompts.py +0 -258
  132. llama_stack/models/llama/llama3_2/__init__.py +0 -5
  133. llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
  134. llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
  135. llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
  136. llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
  137. llama_stack/models/llama/llama3_3/__init__.py +0 -5
  138. llama_stack/models/llama/llama3_3/prompts.py +0 -259
  139. llama_stack/models/llama/llama4/args.py +0 -107
  140. llama_stack/models/llama/llama4/ffn.py +0 -58
  141. llama_stack/models/llama/llama4/moe.py +0 -214
  142. llama_stack/models/llama/llama4/preprocess.py +0 -435
  143. llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
  144. llama_stack/models/llama/llama4/quantization/loader.py +0 -226
  145. llama_stack/models/llama/llama4/vision/__init__.py +0 -5
  146. llama_stack/models/llama/llama4/vision/embedding.py +0 -210
  147. llama_stack/models/llama/llama4/vision/encoder.py +0 -412
  148. llama_stack/models/llama/quantize_impls.py +0 -316
  149. llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
  150. llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
  151. llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
  152. llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
  153. llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
  154. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
  155. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
  156. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
  157. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
  158. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0
  159. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/top_level.txt +0 -0
@@ -190,7 +190,7 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
190
190
 
191
191
 
192
192
  async def create_dist_registry(
193
- metadata_store: KVStoreReference, image_name: str
193
+ metadata_store: KVStoreReference, distro_name: str
194
194
  ) -> tuple[CachedDiskDistributionRegistry, KVStore]:
195
195
  # instantiate kvstore for storing and retrieving distribution metadata
196
196
  dist_kvstore = await kvstore_impl(metadata_store)
@@ -17,10 +17,10 @@ from llama_stack.log import get_logger
17
17
  log = get_logger(name=__name__, category="core")
18
18
 
19
19
 
20
- def formulate_run_args(image_type: str, image_name: str) -> list:
20
+ def formulate_run_args(image_type: str, distro_name: str) -> list:
21
21
  # Only venv is supported now
22
22
  current_venv = os.environ.get("VIRTUAL_ENV")
23
- env_name = image_name or current_venv
23
+ env_name = distro_name or current_venv
24
24
  if not env_name:
25
25
  cprint(
26
26
  "No current virtual environment detected, please specify a virtual environment name with --image-name",
@@ -36,10 +36,24 @@ def is_unwrapped_body_param(param_type: Any) -> bool:
36
36
  base_type = args[0]
37
37
  metadata = args[1:]
38
38
 
39
- # Look for Body annotation with embed=False
39
+ # Look for Body annotation; treat embed=None (default) as unwrapped
40
40
  # Body() returns a FieldInfo object, so we check for that type and the embed attribute
41
41
  for item in metadata:
42
- if isinstance(item, FieldInfo) and hasattr(item, "embed") and not item.embed:
42
+ if isinstance(item, FieldInfo) and hasattr(item, "embed") and item.embed is not True:
43
43
  return inspect.isclass(base_type) and issubclass(base_type, BaseModel)
44
44
 
45
45
  return False
46
+
47
+
48
+ def is_body_param(param_type: Any) -> bool:
49
+ """
50
+ Check if a parameter type represents a body parameter (Annotated with Body()).
51
+ """
52
+ if get_origin(param_type) is typing.Annotated:
53
+ args = get_args(param_type)
54
+ base_type = args[0]
55
+ metadata = args[1:]
56
+ for item in metadata:
57
+ if isinstance(item, FieldInfo):
58
+ return inspect.isclass(base_type) and issubclass(base_type, BaseModel)
59
+ return False
@@ -1,5 +1,5 @@
1
1
  version: 2
2
- image_name: dell
2
+ distro_name: dell
3
3
  apis:
4
4
  - agents
5
5
  - datasetio
@@ -108,6 +108,9 @@ storage:
108
108
  prompts:
109
109
  namespace: prompts
110
110
  backend: kv_default
111
+ connectors:
112
+ namespace: connectors
113
+ backend: kv_default
111
114
  registered_resources:
112
115
  models:
113
116
  - metadata: {}
@@ -1,5 +1,5 @@
1
1
  version: 2
2
- image_name: dell
2
+ distro_name: dell
3
3
  apis:
4
4
  - agents
5
5
  - datasetio
@@ -112,6 +112,9 @@ storage:
112
112
  prompts:
113
113
  namespace: prompts
114
114
  backend: kv_default
115
+ connectors:
116
+ namespace: connectors
117
+ backend: kv_default
115
118
  registered_resources:
116
119
  models:
117
120
  - metadata: {}
@@ -1,5 +1,5 @@
1
1
  version: 2
2
- image_name: nvidia
2
+ distro_name: nvidia
3
3
  apis:
4
4
  - agents
5
5
  - datasetio
@@ -102,6 +102,9 @@ storage:
102
102
  prompts:
103
103
  namespace: prompts
104
104
  backend: kv_default
105
+ connectors:
106
+ namespace: connectors
107
+ backend: kv_default
105
108
  registered_resources:
106
109
  models: []
107
110
  shields: []
@@ -1,5 +1,5 @@
1
1
  version: 2
2
- image_name: nvidia
2
+ distro_name: nvidia
3
3
  apis:
4
4
  - agents
5
5
  - datasetio
@@ -113,6 +113,9 @@ storage:
113
113
  prompts:
114
114
  namespace: prompts
115
115
  backend: kv_default
116
+ connectors:
117
+ namespace: connectors
118
+ backend: kv_default
116
119
  registered_resources:
117
120
  models:
118
121
  - metadata: {}
@@ -1,5 +1,5 @@
1
1
  version: 2
2
- image_name: oci
2
+ distro_name: oci
3
3
  apis:
4
4
  - agents
5
5
  - datasetio
@@ -120,6 +120,9 @@ storage:
120
120
  prompts:
121
121
  namespace: prompts
122
122
  backend: kv_default
123
+ connectors:
124
+ namespace: connectors
125
+ backend: kv_default
123
126
  registered_resources:
124
127
  models: []
125
128
  shields: []
@@ -1,5 +1,5 @@
1
1
  version: 2
2
- image_name: open-benchmark
2
+ distro_name: open-benchmark
3
3
  apis:
4
4
  - agents
5
5
  - datasetio
@@ -57,6 +57,11 @@ providers:
57
57
  db: ${env.PGVECTOR_DB:=}
58
58
  user: ${env.PGVECTOR_USER:=}
59
59
  password: ${env.PGVECTOR_PASSWORD:=}
60
+ distance_metric: COSINE
61
+ vector_index:
62
+ type: HNSW
63
+ m: 16
64
+ ef_construction: 64
60
65
  persistence:
61
66
  namespace: vector_io::pgvector
62
67
  backend: kv_default
@@ -145,6 +150,9 @@ storage:
145
150
  prompts:
146
151
  namespace: prompts
147
152
  backend: kv_default
153
+ connectors:
154
+ namespace: connectors
155
+ backend: kv_default
148
156
  registered_resources:
149
157
  models:
150
158
  - metadata: {}
@@ -1,5 +1,5 @@
1
1
  version: 2
2
- image_name: postgres-demo
2
+ distro_name: postgres-demo
3
3
  apis:
4
4
  - agents
5
5
  - inference
@@ -0,0 +1,62 @@
1
+ version: 2
2
+ distribution_spec:
3
+ description: Quick start template for running Llama Stack with several popular providers.
4
+ This distribution is intended for CPU-only environments.
5
+ providers:
6
+ inference:
7
+ - provider_type: remote::cerebras
8
+ - provider_type: remote::ollama
9
+ - provider_type: remote::vllm
10
+ - provider_type: remote::tgi
11
+ - provider_type: remote::fireworks
12
+ - provider_type: remote::together
13
+ - provider_type: remote::bedrock
14
+ - provider_type: remote::nvidia
15
+ - provider_type: remote::openai
16
+ - provider_type: remote::anthropic
17
+ - provider_type: remote::gemini
18
+ - provider_type: remote::vertexai
19
+ - provider_type: remote::groq
20
+ - provider_type: remote::sambanova
21
+ - provider_type: remote::azure
22
+ - provider_type: inline::sentence-transformers
23
+ vector_io:
24
+ - provider_type: inline::faiss
25
+ - provider_type: inline::sqlite-vec
26
+ - provider_type: inline::milvus
27
+ - provider_type: remote::chromadb
28
+ - provider_type: remote::pgvector
29
+ - provider_type: remote::qdrant
30
+ - provider_type: remote::weaviate
31
+ - provider_type: remote::elasticsearch
32
+ files:
33
+ - provider_type: inline::localfs
34
+ safety:
35
+ - provider_type: inline::llama-guard
36
+ - provider_type: inline::code-scanner
37
+ agents:
38
+ - provider_type: inline::meta-reference
39
+ post_training:
40
+ - provider_type: inline::torchtune-cpu
41
+ eval:
42
+ - provider_type: inline::meta-reference
43
+ datasetio:
44
+ - provider_type: remote::huggingface
45
+ - provider_type: inline::localfs
46
+ scoring:
47
+ - provider_type: inline::basic
48
+ - provider_type: inline::llm-as-judge
49
+ - provider_type: inline::braintrust
50
+ tool_runtime:
51
+ - provider_type: remote::brave-search
52
+ - provider_type: remote::tavily-search
53
+ - provider_type: inline::rag-runtime
54
+ - provider_type: remote::model-context-protocol
55
+ batches:
56
+ - provider_type: inline::reference
57
+ image_type: venv
58
+ additional_pip_packages:
59
+ - aiosqlite
60
+ - asyncpg
61
+ - psycopg2-binary
62
+ - sqlalchemy[asyncio]
@@ -1,5 +1,5 @@
1
1
  version: 2
2
- image_name: starter
2
+ distro_name: starter
3
3
  apis:
4
4
  - agents
5
5
  - batches
@@ -29,7 +29,9 @@ providers:
29
29
  base_url: ${env.VLLM_URL:=}
30
30
  max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
31
31
  api_token: ${env.VLLM_API_TOKEN:=fake}
32
- tls_verify: ${env.VLLM_TLS_VERIFY:=true}
32
+ network:
33
+ tls:
34
+ verify: ${env.VLLM_TLS_VERIFY:=true}
33
35
  - provider_id: ${env.TGI_URL:+tgi}
34
36
  provider_type: remote::tgi
35
37
  config:
@@ -71,7 +73,7 @@ providers:
71
73
  provider_type: remote::vertexai
72
74
  config:
73
75
  project: ${env.VERTEX_AI_PROJECT:=}
74
- location: ${env.VERTEX_AI_LOCATION:=us-central1}
76
+ location: ${env.VERTEX_AI_LOCATION:=global}
75
77
  - provider_id: groq
76
78
  provider_type: remote::groq
77
79
  config:
@@ -127,6 +129,11 @@ providers:
127
129
  db: ${env.PGVECTOR_DB:=}
128
130
  user: ${env.PGVECTOR_USER:=}
129
131
  password: ${env.PGVECTOR_PASSWORD:=}
132
+ distance_metric: COSINE
133
+ vector_index:
134
+ type: HNSW
135
+ m: 16
136
+ ef_construction: 64
130
137
  persistence:
131
138
  namespace: vector_io::pgvector
132
139
  backend: kv_default
@@ -145,6 +152,14 @@ providers:
145
152
  persistence:
146
153
  namespace: vector_io::weaviate
147
154
  backend: kv_default
155
+ - provider_id: ${env.ELASTICSEARCH_URL:+elasticsearch}
156
+ provider_type: remote::elasticsearch
157
+ config:
158
+ elasticsearch_url: ${env.ELASTICSEARCH_URL:=localhost:9200}
159
+ elasticsearch_api_key: ${env.ELASTICSEARCH_API_KEY:=}
160
+ persistence:
161
+ namespace: vector_io::elasticsearch
162
+ backend: kv_default
148
163
  files:
149
164
  - provider_id: meta-reference-files
150
165
  provider_type: inline::localfs
@@ -252,6 +267,9 @@ storage:
252
267
  prompts:
253
268
  namespace: prompts
254
269
  backend: kv_default
270
+ connectors:
271
+ namespace: connectors
272
+ backend: kv_default
255
273
  registered_resources:
256
274
  models: []
257
275
  shields:
@@ -325,3 +343,4 @@ vector_stores:
325
343
  cleanup_interval_seconds: 86400
326
344
  safety:
327
345
  default_shield_id: llama-guard
346
+ connectors: []
@@ -1,5 +1,5 @@
1
1
  version: 2
2
- image_name: starter
2
+ distro_name: starter
3
3
  apis:
4
4
  - agents
5
5
  - batches
@@ -29,7 +29,9 @@ providers:
29
29
  base_url: ${env.VLLM_URL:=}
30
30
  max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
31
31
  api_token: ${env.VLLM_API_TOKEN:=fake}
32
- tls_verify: ${env.VLLM_TLS_VERIFY:=true}
32
+ network:
33
+ tls:
34
+ verify: ${env.VLLM_TLS_VERIFY:=true}
33
35
  - provider_id: ${env.TGI_URL:+tgi}
34
36
  provider_type: remote::tgi
35
37
  config:
@@ -71,7 +73,7 @@ providers:
71
73
  provider_type: remote::vertexai
72
74
  config:
73
75
  project: ${env.VERTEX_AI_PROJECT:=}
74
- location: ${env.VERTEX_AI_LOCATION:=us-central1}
76
+ location: ${env.VERTEX_AI_LOCATION:=global}
75
77
  - provider_id: groq
76
78
  provider_type: remote::groq
77
79
  config:
@@ -127,6 +129,11 @@ providers:
127
129
  db: ${env.PGVECTOR_DB:=}
128
130
  user: ${env.PGVECTOR_USER:=}
129
131
  password: ${env.PGVECTOR_PASSWORD:=}
132
+ distance_metric: COSINE
133
+ vector_index:
134
+ type: HNSW
135
+ m: 16
136
+ ef_construction: 64
130
137
  persistence:
131
138
  namespace: vector_io::pgvector
132
139
  backend: kv_default
@@ -145,6 +152,14 @@ providers:
145
152
  persistence:
146
153
  namespace: vector_io::weaviate
147
154
  backend: kv_default
155
+ - provider_id: ${env.ELASTICSEARCH_URL:+elasticsearch}
156
+ provider_type: remote::elasticsearch
157
+ config:
158
+ elasticsearch_url: ${env.ELASTICSEARCH_URL:=localhost:9200}
159
+ elasticsearch_api_key: ${env.ELASTICSEARCH_API_KEY:=}
160
+ persistence:
161
+ namespace: vector_io::elasticsearch
162
+ backend: kv_default
148
163
  files:
149
164
  - provider_id: meta-reference-files
150
165
  provider_type: inline::localfs
@@ -261,6 +276,9 @@ storage:
261
276
  prompts:
262
277
  namespace: prompts
263
278
  backend: kv_default
279
+ connectors:
280
+ namespace: connectors
281
+ backend: kv_default
264
282
  registered_resources:
265
283
  models: []
266
284
  shields:
@@ -334,3 +352,4 @@ vector_stores:
334
352
  cleanup_interval_seconds: 86400
335
353
  safety:
336
354
  default_shield_id: llama-guard
355
+ connectors: []
@@ -32,6 +32,7 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
32
32
  )
33
33
  from llama_stack.providers.registry.inference import available_providers
34
34
  from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
35
+ from llama_stack.providers.remote.vector_io.elasticsearch.config import ElasticsearchVectorIOConfig
35
36
  from llama_stack.providers.remote.vector_io.pgvector.config import (
36
37
  PGVectorVectorIOConfig,
37
38
  )
@@ -121,6 +122,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
121
122
  BuildProvider(provider_type="remote::pgvector"),
122
123
  BuildProvider(provider_type="remote::qdrant"),
123
124
  BuildProvider(provider_type="remote::weaviate"),
125
+ BuildProvider(provider_type="remote::elasticsearch"),
124
126
  ],
125
127
  "files": [BuildProvider(provider_type="inline::localfs")],
126
128
  "safety": [
@@ -237,6 +239,15 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
237
239
  cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
238
240
  ),
239
241
  ),
242
+ Provider(
243
+ provider_id="${env.ELASTICSEARCH_URL:+elasticsearch}",
244
+ provider_type="remote::elasticsearch",
245
+ config=ElasticsearchVectorIOConfig.sample_run_config(
246
+ f"~/.llama/distributions/{name}",
247
+ elasticsearch_url="${env.ELASTICSEARCH_URL:=localhost:9200}",
248
+ elasticsearch_api_key="${env.ELASTICSEARCH_API_KEY:=}",
249
+ ),
250
+ ),
240
251
  ],
241
252
  "files": [files_provider],
242
253
  }
@@ -246,6 +257,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
246
257
  default_models=[],
247
258
  default_tool_groups=default_tool_groups,
248
259
  default_shields=default_shields,
260
+ default_connectors=[],
249
261
  vector_stores_config=VectorStoresConfig(
250
262
  default_provider_id="faiss",
251
263
  default_embedding_model=QualifiedModel(
@@ -309,7 +321,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
309
321
  "Google Cloud Project ID for Vertex AI",
310
322
  ),
311
323
  "VERTEX_AI_LOCATION": (
312
- "us-central1",
324
+ "global",
313
325
  "Google Cloud Location for Vertex AI",
314
326
  ),
315
327
  "SAMBANOVA_API_KEY": (
@@ -0,0 +1,62 @@
1
+ version: 2
2
+ distribution_spec:
3
+ description: Quick start template for running Llama Stack with several popular providers.
4
+ This distribution is intended for GPU-enabled environments.
5
+ providers:
6
+ inference:
7
+ - provider_type: remote::cerebras
8
+ - provider_type: remote::ollama
9
+ - provider_type: remote::vllm
10
+ - provider_type: remote::tgi
11
+ - provider_type: remote::fireworks
12
+ - provider_type: remote::together
13
+ - provider_type: remote::bedrock
14
+ - provider_type: remote::nvidia
15
+ - provider_type: remote::openai
16
+ - provider_type: remote::anthropic
17
+ - provider_type: remote::gemini
18
+ - provider_type: remote::vertexai
19
+ - provider_type: remote::groq
20
+ - provider_type: remote::sambanova
21
+ - provider_type: remote::azure
22
+ - provider_type: inline::sentence-transformers
23
+ vector_io:
24
+ - provider_type: inline::faiss
25
+ - provider_type: inline::sqlite-vec
26
+ - provider_type: inline::milvus
27
+ - provider_type: remote::chromadb
28
+ - provider_type: remote::pgvector
29
+ - provider_type: remote::qdrant
30
+ - provider_type: remote::weaviate
31
+ - provider_type: remote::elasticsearch
32
+ files:
33
+ - provider_type: inline::localfs
34
+ safety:
35
+ - provider_type: inline::llama-guard
36
+ - provider_type: inline::code-scanner
37
+ agents:
38
+ - provider_type: inline::meta-reference
39
+ post_training:
40
+ - provider_type: inline::huggingface-gpu
41
+ eval:
42
+ - provider_type: inline::meta-reference
43
+ datasetio:
44
+ - provider_type: remote::huggingface
45
+ - provider_type: inline::localfs
46
+ scoring:
47
+ - provider_type: inline::basic
48
+ - provider_type: inline::llm-as-judge
49
+ - provider_type: inline::braintrust
50
+ tool_runtime:
51
+ - provider_type: remote::brave-search
52
+ - provider_type: remote::tavily-search
53
+ - provider_type: inline::rag-runtime
54
+ - provider_type: remote::model-context-protocol
55
+ batches:
56
+ - provider_type: inline::reference
57
+ image_type: venv
58
+ additional_pip_packages:
59
+ - aiosqlite
60
+ - asyncpg
61
+ - psycopg2-binary
62
+ - sqlalchemy[asyncio]
@@ -1,5 +1,5 @@
1
1
  version: 2
2
- image_name: starter-gpu
2
+ distro_name: starter-gpu
3
3
  apis:
4
4
  - agents
5
5
  - batches
@@ -29,7 +29,9 @@ providers:
29
29
  base_url: ${env.VLLM_URL:=}
30
30
  max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
31
31
  api_token: ${env.VLLM_API_TOKEN:=fake}
32
- tls_verify: ${env.VLLM_TLS_VERIFY:=true}
32
+ network:
33
+ tls:
34
+ verify: ${env.VLLM_TLS_VERIFY:=true}
33
35
  - provider_id: ${env.TGI_URL:+tgi}
34
36
  provider_type: remote::tgi
35
37
  config:
@@ -71,7 +73,7 @@ providers:
71
73
  provider_type: remote::vertexai
72
74
  config:
73
75
  project: ${env.VERTEX_AI_PROJECT:=}
74
- location: ${env.VERTEX_AI_LOCATION:=us-central1}
76
+ location: ${env.VERTEX_AI_LOCATION:=global}
75
77
  - provider_id: groq
76
78
  provider_type: remote::groq
77
79
  config:
@@ -127,6 +129,11 @@ providers:
127
129
  db: ${env.PGVECTOR_DB:=}
128
130
  user: ${env.PGVECTOR_USER:=}
129
131
  password: ${env.PGVECTOR_PASSWORD:=}
132
+ distance_metric: COSINE
133
+ vector_index:
134
+ type: HNSW
135
+ m: 16
136
+ ef_construction: 64
130
137
  persistence:
131
138
  namespace: vector_io::pgvector
132
139
  backend: kv_default
@@ -145,6 +152,14 @@ providers:
145
152
  persistence:
146
153
  namespace: vector_io::weaviate
147
154
  backend: kv_default
155
+ - provider_id: ${env.ELASTICSEARCH_URL:+elasticsearch}
156
+ provider_type: remote::elasticsearch
157
+ config:
158
+ elasticsearch_url: ${env.ELASTICSEARCH_URL:=localhost:9200}
159
+ elasticsearch_api_key: ${env.ELASTICSEARCH_API_KEY:=}
160
+ persistence:
161
+ namespace: vector_io::elasticsearch
162
+ backend: kv_default
148
163
  files:
149
164
  - provider_id: meta-reference-files
150
165
  provider_type: inline::localfs
@@ -255,6 +270,9 @@ storage:
255
270
  prompts:
256
271
  namespace: prompts
257
272
  backend: kv_default
273
+ connectors:
274
+ namespace: connectors
275
+ backend: kv_default
258
276
  registered_resources:
259
277
  models: []
260
278
  shields:
@@ -328,3 +346,4 @@ vector_stores:
328
346
  cleanup_interval_seconds: 86400
329
347
  safety:
330
348
  default_shield_id: llama-guard
349
+ connectors: []
@@ -1,5 +1,5 @@
1
1
  version: 2
2
- image_name: starter-gpu
2
+ distro_name: starter-gpu
3
3
  apis:
4
4
  - agents
5
5
  - batches
@@ -29,7 +29,9 @@ providers:
29
29
  base_url: ${env.VLLM_URL:=}
30
30
  max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
31
31
  api_token: ${env.VLLM_API_TOKEN:=fake}
32
- tls_verify: ${env.VLLM_TLS_VERIFY:=true}
32
+ network:
33
+ tls:
34
+ verify: ${env.VLLM_TLS_VERIFY:=true}
33
35
  - provider_id: ${env.TGI_URL:+tgi}
34
36
  provider_type: remote::tgi
35
37
  config:
@@ -71,7 +73,7 @@ providers:
71
73
  provider_type: remote::vertexai
72
74
  config:
73
75
  project: ${env.VERTEX_AI_PROJECT:=}
74
- location: ${env.VERTEX_AI_LOCATION:=us-central1}
76
+ location: ${env.VERTEX_AI_LOCATION:=global}
75
77
  - provider_id: groq
76
78
  provider_type: remote::groq
77
79
  config:
@@ -127,6 +129,11 @@ providers:
127
129
  db: ${env.PGVECTOR_DB:=}
128
130
  user: ${env.PGVECTOR_USER:=}
129
131
  password: ${env.PGVECTOR_PASSWORD:=}
132
+ distance_metric: COSINE
133
+ vector_index:
134
+ type: HNSW
135
+ m: 16
136
+ ef_construction: 64
130
137
  persistence:
131
138
  namespace: vector_io::pgvector
132
139
  backend: kv_default
@@ -145,6 +152,14 @@ providers:
145
152
  persistence:
146
153
  namespace: vector_io::weaviate
147
154
  backend: kv_default
155
+ - provider_id: ${env.ELASTICSEARCH_URL:+elasticsearch}
156
+ provider_type: remote::elasticsearch
157
+ config:
158
+ elasticsearch_url: ${env.ELASTICSEARCH_URL:=localhost:9200}
159
+ elasticsearch_api_key: ${env.ELASTICSEARCH_API_KEY:=}
160
+ persistence:
161
+ namespace: vector_io::elasticsearch
162
+ backend: kv_default
148
163
  files:
149
164
  - provider_id: meta-reference-files
150
165
  provider_type: inline::localfs
@@ -264,6 +279,9 @@ storage:
264
279
  prompts:
265
280
  namespace: prompts
266
281
  backend: kv_default
282
+ connectors:
283
+ namespace: connectors
284
+ backend: kv_default
267
285
  registered_resources:
268
286
  models: []
269
287
  shields:
@@ -337,3 +355,4 @@ vector_stores:
337
355
  cleanup_interval_seconds: 86400
338
356
  safety:
339
357
  default_shield_id: llama-guard
358
+ connectors: []
@@ -36,7 +36,7 @@ from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig
36
36
  from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig
37
37
  from llama_stack.core.utils.dynamic import instantiate_class_type
38
38
  from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
39
- from llama_stack_api import DatasetPurpose, ModelType
39
+ from llama_stack_api import ConnectorInput, DatasetPurpose, ModelType
40
40
 
41
41
 
42
42
  def filter_empty_values(obj: Any) -> Any:
@@ -181,6 +181,7 @@ class RunConfigSettings(BaseModel):
181
181
  default_tool_groups: list[ToolGroupInput] | None = None
182
182
  default_datasets: list[DatasetInput] | None = None
183
183
  default_benchmarks: list[BenchmarkInput] | None = None
184
+ default_connectors: list[ConnectorInput] | None = None
184
185
  vector_stores_config: VectorStoresConfig | None = None
185
186
  safety_config: SafetyConfig | None = None
186
187
  storage_backends: dict[str, Any] | None = None
@@ -255,6 +256,10 @@ class RunConfigSettings(BaseModel):
255
256
  backend="kv_default",
256
257
  namespace="prompts",
257
258
  ).model_dump(exclude_none=True),
259
+ "connectors": KVStoreReference(
260
+ backend="kv_default",
261
+ namespace="connectors",
262
+ ).model_dump(exclude_none=True),
258
263
  }
259
264
 
260
265
  storage_config = dict(
@@ -265,7 +270,7 @@ class RunConfigSettings(BaseModel):
265
270
  # Return a dict that matches StackRunConfig structure
266
271
  config = {
267
272
  "version": LLAMA_STACK_RUN_CONFIG_VERSION,
268
- "image_name": name,
273
+ "distro_name": name,
269
274
  "container_image": container_image,
270
275
  "apis": apis,
271
276
  "providers": provider_configs,
@@ -290,6 +295,9 @@ class RunConfigSettings(BaseModel):
290
295
  if self.safety_config:
291
296
  config["safety"] = self.safety_config.model_dump(exclude_none=True)
292
297
 
298
+ if self.default_connectors is not None:
299
+ config["connectors"] = [c.model_dump(exclude_none=True) for c in self.default_connectors]
300
+
293
301
  return config
294
302
 
295
303
 
@@ -1,5 +1,5 @@
1
1
  version: 2
2
- image_name: watsonx
2
+ distro_name: watsonx
3
3
  apis:
4
4
  - agents
5
5
  - datasetio
@@ -118,6 +118,9 @@ storage:
118
118
  prompts:
119
119
  namespace: prompts
120
120
  backend: kv_default
121
+ connectors:
122
+ namespace: connectors
123
+ backend: kv_default
121
124
  registered_resources:
122
125
  models: []
123
126
  shields: []
llama_stack/log.py CHANGED
@@ -51,6 +51,7 @@ CATEGORIES = [
51
51
  "post_training",
52
52
  "scoring",
53
53
  "tests",
54
+ "connectors",
54
55
  ]
55
56
  UNCATEGORIZED = "uncategorized"
56
57