llama-stack 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,7 +56,7 @@ class InferenceStore:
56
56
  logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
57
57
 
58
58
  await self.sql_store.create_table(
59
- "chat_completions",
59
+ self.reference.table_name,
60
60
  {
61
61
  "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
62
62
  "created": ColumnType.INTEGER,
@@ -66,14 +66,6 @@ class InferenceStore:
66
66
  },
67
67
  )
68
68
 
69
- if self.enable_write_queue:
70
- self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
71
- for _ in range(self._num_writers):
72
- self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
73
- logger.debug(
74
- f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
75
- )
76
-
77
69
  async def shutdown(self) -> None:
78
70
  if not self._worker_tasks:
79
71
  return
@@ -161,7 +153,7 @@ class InferenceStore:
161
153
 
162
154
  try:
163
155
  await self.sql_store.insert(
164
- table="chat_completions",
156
+ table=self.reference.table_name,
165
157
  data=record_data,
166
158
  )
167
159
  except IntegrityError as e:
@@ -173,7 +165,7 @@ class InferenceStore:
173
165
  error_message = str(e.orig) if e.orig else str(e)
174
166
  if self._is_unique_constraint_error(error_message):
175
167
  # Update the existing record instead
176
- await self.sql_store.update(table="chat_completions", data=record_data, where={"id": data["id"]})
168
+ await self.sql_store.update(table=self.reference.table_name, data=record_data, where={"id": data["id"]})
177
169
  else:
178
170
  # Re-raise if it's not a unique constraint error
179
171
  raise
@@ -217,7 +209,7 @@ class InferenceStore:
217
209
  where_conditions["model"] = model
218
210
 
219
211
  paginated_result = await self.sql_store.fetch_all(
220
- table="chat_completions",
212
+ table=self.reference.table_name,
221
213
  where=where_conditions if where_conditions else None,
222
214
  order_by=[("created", order.value)],
223
215
  cursor=("id", after) if after else None,
@@ -246,7 +238,7 @@ class InferenceStore:
246
238
  raise ValueError("Inference store is not initialized")
247
239
 
248
240
  row = await self.sql_store.fetch_one(
249
- table="chat_completions",
241
+ table=self.reference.table_name,
250
242
  where={"id": completion_id},
251
243
  )
252
244
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama_stack
3
- Version: 0.3.4
3
+ Version: 0.3.5
4
4
  Summary: Llama Stack
5
5
  Author-email: Meta Llama <llama-oss@meta.com>
6
6
  License: MIT
@@ -22,7 +22,7 @@ Requires-Dist: fire
22
22
  Requires-Dist: httpx
23
23
  Requires-Dist: jinja2>=3.1.6
24
24
  Requires-Dist: jsonschema
25
- Requires-Dist: llama-stack-client>=0.3.4
25
+ Requires-Dist: llama-stack-client==0.3.5
26
26
  Requires-Dist: openai>=1.107
27
27
  Requires-Dist: prompt-toolkit
28
28
  Requires-Dist: python-dotenv
@@ -45,7 +45,7 @@ Requires-Dist: starlette>=0.49.1
45
45
  Provides-Extra: ui
46
46
  Requires-Dist: streamlit; extra == "ui"
47
47
  Requires-Dist: pandas; extra == "ui"
48
- Requires-Dist: llama-stack-client>=0.3.4; extra == "ui"
48
+ Requires-Dist: llama-stack-client==0.3.5; extra == "ui"
49
49
  Requires-Dist: streamlit-option-menu; extra == "ui"
50
50
  Dynamic: license-file
51
51
 
@@ -61,83 +61,6 @@ Dynamic: license-file
61
61
  [**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
62
62
 
63
63
 
64
- ### ✨🎉 Llama 4 Support 🎉✨
65
- We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
66
-
67
- <details>
68
-
69
- <summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
70
-
71
- \
72
- *Note you need 8xH100 GPU-host to run these models*
73
-
74
- ```bash
75
- pip install -U llama_stack
76
-
77
- MODEL="Llama-4-Scout-17B-16E-Instruct"
78
- # get meta url from llama.com
79
- huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
80
-
81
- # install dependencies for the distribution
82
- llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
83
-
84
- # start a llama stack server
85
- INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
86
-
87
- # install client to interact with the server
88
- pip install llama-stack-client
89
- ```
90
- ### CLI
91
- ```bash
92
- # Run a chat completion
93
- MODEL="Llama-4-Scout-17B-16E-Instruct"
94
-
95
- llama-stack-client --endpoint http://localhost:8321 \
96
- inference chat-completion \
97
- --model-id meta-llama/$MODEL \
98
- --message "write a haiku for meta's llama 4 models"
99
-
100
- OpenAIChatCompletion(
101
- ...
102
- choices=[
103
- OpenAIChatCompletionChoice(
104
- finish_reason='stop',
105
- index=0,
106
- message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
107
- role='assistant',
108
- content='...**Silent minds awaken,** \n**Whispers of billions of words,** \n**Reasoning breaks the night.** \n\n— \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
109
- ...
110
- ),
111
- ...
112
- )
113
- ],
114
- ...
115
- )
116
- ```
117
- ### Python SDK
118
- ```python
119
- from llama_stack_client import LlamaStackClient
120
-
121
- client = LlamaStackClient(base_url=f"http://localhost:8321")
122
-
123
- model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
124
- prompt = "Write a haiku about coding"
125
-
126
- print(f"User> {prompt}")
127
- response = client.chat.completions.create(
128
- model=model_id,
129
- messages=[
130
- {"role": "system", "content": "You are a helpful assistant."},
131
- {"role": "user", "content": prompt},
132
- ],
133
- )
134
- print(f"Assistant> {response.choices[0].message.content}")
135
- ```
136
- As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
137
-
138
-
139
- </details>
140
-
141
64
  ### 🚀 One-Line Installer 🚀
142
65
 
143
66
  To try Llama Stack locally, run:
@@ -556,7 +556,7 @@ llama_stack/providers/utils/files/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8us
556
556
  llama_stack/providers/utils/files/form_data.py,sha256=oLDS9gsOWpUnqX51qczjNGTfHJBrZ0SFZbEHFtsfqCs,2291
557
557
  llama_stack/providers/utils/inference/__init__.py,sha256=Ocwqyn7ytwdt1vMFXsPBoa5D6uhA1fIljF-HiIsVvKw,1089
558
558
  llama_stack/providers/utils/inference/embedding_mixin.py,sha256=Ur9A0VJB0BEDh00Er8Ua-Mc08Sa69YAQW_cCcAdxB88,3336
559
- llama_stack/providers/utils/inference/inference_store.py,sha256=zNscOx7uiIspV8UoAdSlciWvupOWrLDBEtoros5tlpk,10273
559
+ llama_stack/providers/utils/inference/inference_store.py,sha256=p9GwdiWGQw9Tnb-xL7kqNi0odOnecyIhxsrg6VoI-3U,9891
560
560
  llama_stack/providers/utils/inference/litellm_openai_mixin.py,sha256=tcRCccOd4fR61TIQjFGb-B6Qybu5q-pklK5fo87Ji3I,13094
561
561
  llama_stack/providers/utils/inference/model_registry.py,sha256=ElaDfW67XphDvVLYBBghwSB-2A704ELqpJpm42Hdpc8,8250
562
562
  llama_stack/providers/utils/inference/openai_compat.py,sha256=kTjea5GUmaD8UfA6UgoPD8wvmWNBnAwuWLkmNUwy-as,49768
@@ -617,9 +617,9 @@ llama_stack/strong_typing/topological.py,sha256=I2YyhYW62PBM2wpfn6mbeCRxKGl_oa5t
617
617
  llama_stack/testing/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
618
618
  llama_stack/testing/api_recorder.py,sha256=jt5Fq8HOPTA4rDzwIWWdBQJjxtivhbqoghFql3D--A0,38423
619
619
  llama_stack/ui/node_modules/flatted/python/flatted.py,sha256=UYburBDqkySaTfSpntPCUJRxiBGcplusJM7ECX8FEgA,3860
620
- llama_stack-0.3.4.dist-info/licenses/LICENSE,sha256=42g1gBn9gHYdBt5e6e1aFYhnc-JT9trU9qBD84oUAlY,1087
621
- llama_stack-0.3.4.dist-info/METADATA,sha256=K8Y4WWyN4f0HVCOjS7oasv_S6XkI6GuntvI2yfh9A_o,15157
622
- llama_stack-0.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
623
- llama_stack-0.3.4.dist-info/entry_points.txt,sha256=E5xoyAM9064aW_y96eSSwZCNT_ANctrvrhLMJnMQlw0,141
624
- llama_stack-0.3.4.dist-info/top_level.txt,sha256=2-nbQ1CAn4_w76YD_O6N6ofvjmk4DX5NFaBuApSx5N0,12
625
- llama_stack-0.3.4.dist-info/RECORD,,
620
+ llama_stack-0.3.5.dist-info/licenses/LICENSE,sha256=42g1gBn9gHYdBt5e6e1aFYhnc-JT9trU9qBD84oUAlY,1087
621
+ llama_stack-0.3.5.dist-info/METADATA,sha256=pQ_p1RWmVzNAMznjofhtlzytTyPOBCdOYinnHHLItHg,12817
622
+ llama_stack-0.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
623
+ llama_stack-0.3.5.dist-info/entry_points.txt,sha256=E5xoyAM9064aW_y96eSSwZCNT_ANctrvrhLMJnMQlw0,141
624
+ llama_stack-0.3.5.dist-info/top_level.txt,sha256=2-nbQ1CAn4_w76YD_O6N6ofvjmk4DX5NFaBuApSx5N0,12
625
+ llama_stack-0.3.5.dist-info/RECORD,,