llama-stack 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/providers/utils/inference/inference_store.py +5 -13
- {llama_stack-0.3.4.dist-info → llama_stack-0.3.5.dist-info}/METADATA +3 -80
- {llama_stack-0.3.4.dist-info → llama_stack-0.3.5.dist-info}/RECORD +7 -7
- {llama_stack-0.3.4.dist-info → llama_stack-0.3.5.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.3.5.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.3.5.dist-info}/licenses/LICENSE +0 -0
- {llama_stack-0.3.4.dist-info → llama_stack-0.3.5.dist-info}/top_level.txt +0 -0
|
@@ -56,7 +56,7 @@ class InferenceStore:
|
|
|
56
56
|
logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
|
|
57
57
|
|
|
58
58
|
await self.sql_store.create_table(
|
|
59
|
-
|
|
59
|
+
self.reference.table_name,
|
|
60
60
|
{
|
|
61
61
|
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
|
|
62
62
|
"created": ColumnType.INTEGER,
|
|
@@ -66,14 +66,6 @@ class InferenceStore:
|
|
|
66
66
|
},
|
|
67
67
|
)
|
|
68
68
|
|
|
69
|
-
if self.enable_write_queue:
|
|
70
|
-
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
|
|
71
|
-
for _ in range(self._num_writers):
|
|
72
|
-
self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
|
|
73
|
-
logger.debug(
|
|
74
|
-
f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
|
|
75
|
-
)
|
|
76
|
-
|
|
77
69
|
async def shutdown(self) -> None:
|
|
78
70
|
if not self._worker_tasks:
|
|
79
71
|
return
|
|
@@ -161,7 +153,7 @@ class InferenceStore:
|
|
|
161
153
|
|
|
162
154
|
try:
|
|
163
155
|
await self.sql_store.insert(
|
|
164
|
-
table=
|
|
156
|
+
table=self.reference.table_name,
|
|
165
157
|
data=record_data,
|
|
166
158
|
)
|
|
167
159
|
except IntegrityError as e:
|
|
@@ -173,7 +165,7 @@ class InferenceStore:
|
|
|
173
165
|
error_message = str(e.orig) if e.orig else str(e)
|
|
174
166
|
if self._is_unique_constraint_error(error_message):
|
|
175
167
|
# Update the existing record instead
|
|
176
|
-
await self.sql_store.update(table=
|
|
168
|
+
await self.sql_store.update(table=self.reference.table_name, data=record_data, where={"id": data["id"]})
|
|
177
169
|
else:
|
|
178
170
|
# Re-raise if it's not a unique constraint error
|
|
179
171
|
raise
|
|
@@ -217,7 +209,7 @@ class InferenceStore:
|
|
|
217
209
|
where_conditions["model"] = model
|
|
218
210
|
|
|
219
211
|
paginated_result = await self.sql_store.fetch_all(
|
|
220
|
-
table=
|
|
212
|
+
table=self.reference.table_name,
|
|
221
213
|
where=where_conditions if where_conditions else None,
|
|
222
214
|
order_by=[("created", order.value)],
|
|
223
215
|
cursor=("id", after) if after else None,
|
|
@@ -246,7 +238,7 @@ class InferenceStore:
|
|
|
246
238
|
raise ValueError("Inference store is not initialized")
|
|
247
239
|
|
|
248
240
|
row = await self.sql_store.fetch_one(
|
|
249
|
-
table=
|
|
241
|
+
table=self.reference.table_name,
|
|
250
242
|
where={"id": completion_id},
|
|
251
243
|
)
|
|
252
244
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llama_stack
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.5
|
|
4
4
|
Summary: Llama Stack
|
|
5
5
|
Author-email: Meta Llama <llama-oss@meta.com>
|
|
6
6
|
License: MIT
|
|
@@ -22,7 +22,7 @@ Requires-Dist: fire
|
|
|
22
22
|
Requires-Dist: httpx
|
|
23
23
|
Requires-Dist: jinja2>=3.1.6
|
|
24
24
|
Requires-Dist: jsonschema
|
|
25
|
-
Requires-Dist: llama-stack-client
|
|
25
|
+
Requires-Dist: llama-stack-client==0.3.5
|
|
26
26
|
Requires-Dist: openai>=1.107
|
|
27
27
|
Requires-Dist: prompt-toolkit
|
|
28
28
|
Requires-Dist: python-dotenv
|
|
@@ -45,7 +45,7 @@ Requires-Dist: starlette>=0.49.1
|
|
|
45
45
|
Provides-Extra: ui
|
|
46
46
|
Requires-Dist: streamlit; extra == "ui"
|
|
47
47
|
Requires-Dist: pandas; extra == "ui"
|
|
48
|
-
Requires-Dist: llama-stack-client
|
|
48
|
+
Requires-Dist: llama-stack-client==0.3.5; extra == "ui"
|
|
49
49
|
Requires-Dist: streamlit-option-menu; extra == "ui"
|
|
50
50
|
Dynamic: license-file
|
|
51
51
|
|
|
@@ -61,83 +61,6 @@ Dynamic: license-file
|
|
|
61
61
|
[**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
|
62
62
|
|
|
63
63
|
|
|
64
|
-
### ✨🎉 Llama 4 Support 🎉✨
|
|
65
|
-
We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
|
|
66
|
-
|
|
67
|
-
<details>
|
|
68
|
-
|
|
69
|
-
<summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
|
|
70
|
-
|
|
71
|
-
\
|
|
72
|
-
*Note you need 8xH100 GPU-host to run these models*
|
|
73
|
-
|
|
74
|
-
```bash
|
|
75
|
-
pip install -U llama_stack
|
|
76
|
-
|
|
77
|
-
MODEL="Llama-4-Scout-17B-16E-Instruct"
|
|
78
|
-
# get meta url from llama.com
|
|
79
|
-
huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
|
|
80
|
-
|
|
81
|
-
# install dependencies for the distribution
|
|
82
|
-
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
|
83
|
-
|
|
84
|
-
# start a llama stack server
|
|
85
|
-
INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
|
|
86
|
-
|
|
87
|
-
# install client to interact with the server
|
|
88
|
-
pip install llama-stack-client
|
|
89
|
-
```
|
|
90
|
-
### CLI
|
|
91
|
-
```bash
|
|
92
|
-
# Run a chat completion
|
|
93
|
-
MODEL="Llama-4-Scout-17B-16E-Instruct"
|
|
94
|
-
|
|
95
|
-
llama-stack-client --endpoint http://localhost:8321 \
|
|
96
|
-
inference chat-completion \
|
|
97
|
-
--model-id meta-llama/$MODEL \
|
|
98
|
-
--message "write a haiku for meta's llama 4 models"
|
|
99
|
-
|
|
100
|
-
OpenAIChatCompletion(
|
|
101
|
-
...
|
|
102
|
-
choices=[
|
|
103
|
-
OpenAIChatCompletionChoice(
|
|
104
|
-
finish_reason='stop',
|
|
105
|
-
index=0,
|
|
106
|
-
message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
|
|
107
|
-
role='assistant',
|
|
108
|
-
content='...**Silent minds awaken,** \n**Whispers of billions of words,** \n**Reasoning breaks the night.** \n\n— \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
|
|
109
|
-
...
|
|
110
|
-
),
|
|
111
|
-
...
|
|
112
|
-
)
|
|
113
|
-
],
|
|
114
|
-
...
|
|
115
|
-
)
|
|
116
|
-
```
|
|
117
|
-
### Python SDK
|
|
118
|
-
```python
|
|
119
|
-
from llama_stack_client import LlamaStackClient
|
|
120
|
-
|
|
121
|
-
client = LlamaStackClient(base_url=f"http://localhost:8321")
|
|
122
|
-
|
|
123
|
-
model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
|
|
124
|
-
prompt = "Write a haiku about coding"
|
|
125
|
-
|
|
126
|
-
print(f"User> {prompt}")
|
|
127
|
-
response = client.chat.completions.create(
|
|
128
|
-
model=model_id,
|
|
129
|
-
messages=[
|
|
130
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
|
131
|
-
{"role": "user", "content": prompt},
|
|
132
|
-
],
|
|
133
|
-
)
|
|
134
|
-
print(f"Assistant> {response.choices[0].message.content}")
|
|
135
|
-
```
|
|
136
|
-
As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
</details>
|
|
140
|
-
|
|
141
64
|
### 🚀 One-Line Installer 🚀
|
|
142
65
|
|
|
143
66
|
To try Llama Stack locally, run:
|
|
@@ -556,7 +556,7 @@ llama_stack/providers/utils/files/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8us
|
|
|
556
556
|
llama_stack/providers/utils/files/form_data.py,sha256=oLDS9gsOWpUnqX51qczjNGTfHJBrZ0SFZbEHFtsfqCs,2291
|
|
557
557
|
llama_stack/providers/utils/inference/__init__.py,sha256=Ocwqyn7ytwdt1vMFXsPBoa5D6uhA1fIljF-HiIsVvKw,1089
|
|
558
558
|
llama_stack/providers/utils/inference/embedding_mixin.py,sha256=Ur9A0VJB0BEDh00Er8Ua-Mc08Sa69YAQW_cCcAdxB88,3336
|
|
559
|
-
llama_stack/providers/utils/inference/inference_store.py,sha256=
|
|
559
|
+
llama_stack/providers/utils/inference/inference_store.py,sha256=p9GwdiWGQw9Tnb-xL7kqNi0odOnecyIhxsrg6VoI-3U,9891
|
|
560
560
|
llama_stack/providers/utils/inference/litellm_openai_mixin.py,sha256=tcRCccOd4fR61TIQjFGb-B6Qybu5q-pklK5fo87Ji3I,13094
|
|
561
561
|
llama_stack/providers/utils/inference/model_registry.py,sha256=ElaDfW67XphDvVLYBBghwSB-2A704ELqpJpm42Hdpc8,8250
|
|
562
562
|
llama_stack/providers/utils/inference/openai_compat.py,sha256=kTjea5GUmaD8UfA6UgoPD8wvmWNBnAwuWLkmNUwy-as,49768
|
|
@@ -617,9 +617,9 @@ llama_stack/strong_typing/topological.py,sha256=I2YyhYW62PBM2wpfn6mbeCRxKGl_oa5t
|
|
|
617
617
|
llama_stack/testing/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
|
|
618
618
|
llama_stack/testing/api_recorder.py,sha256=jt5Fq8HOPTA4rDzwIWWdBQJjxtivhbqoghFql3D--A0,38423
|
|
619
619
|
llama_stack/ui/node_modules/flatted/python/flatted.py,sha256=UYburBDqkySaTfSpntPCUJRxiBGcplusJM7ECX8FEgA,3860
|
|
620
|
-
llama_stack-0.3.
|
|
621
|
-
llama_stack-0.3.
|
|
622
|
-
llama_stack-0.3.
|
|
623
|
-
llama_stack-0.3.
|
|
624
|
-
llama_stack-0.3.
|
|
625
|
-
llama_stack-0.3.
|
|
620
|
+
llama_stack-0.3.5.dist-info/licenses/LICENSE,sha256=42g1gBn9gHYdBt5e6e1aFYhnc-JT9trU9qBD84oUAlY,1087
|
|
621
|
+
llama_stack-0.3.5.dist-info/METADATA,sha256=pQ_p1RWmVzNAMznjofhtlzytTyPOBCdOYinnHHLItHg,12817
|
|
622
|
+
llama_stack-0.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
623
|
+
llama_stack-0.3.5.dist-info/entry_points.txt,sha256=E5xoyAM9064aW_y96eSSwZCNT_ANctrvrhLMJnMQlw0,141
|
|
624
|
+
llama_stack-0.3.5.dist-info/top_level.txt,sha256=2-nbQ1CAn4_w76YD_O6N6ofvjmk4DX5NFaBuApSx5N0,12
|
|
625
|
+
llama_stack-0.3.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|