PyPI - llama-stack - Versions diffs - 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

llama-stack 0.3.4py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

llama_stack/providers/utils/inference/inference_store.py CHANGED Viewed

@@ -56,7 +56,7 @@ class InferenceStore:
             logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
         await self.sql_store.create_table(
-            "chat_completions",
+            self.reference.table_name,
             {
                 "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
                 "created": ColumnType.INTEGER,
@@ -66,14 +66,6 @@ class InferenceStore:
             },
         )
-        if self.enable_write_queue:
-            self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
-            for _ in range(self._num_writers):
-                self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
-            logger.debug(
-                f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
-            )
     async def shutdown(self) -> None:
         if not self._worker_tasks:
             return
@@ -161,7 +153,7 @@ class InferenceStore:
         try:
             await self.sql_store.insert(
-                table="chat_completions",
+                table=self.reference.table_name,
                 data=record_data,
             )
         except IntegrityError as e:
@@ -173,7 +165,7 @@ class InferenceStore:
             error_message = str(e.orig) if e.orig else str(e)
             if self._is_unique_constraint_error(error_message):
                 # Update the existing record instead
-                await self.sql_store.update(table="chat_completions", data=record_data, where={"id": data["id"]})
+                await self.sql_store.update(table=self.reference.table_name, data=record_data, where={"id": data["id"]})
             else:
                 # Re-raise if it's not a unique constraint error
                 raise
@@ -217,7 +209,7 @@ class InferenceStore:
             where_conditions["model"] = model
         paginated_result = await self.sql_store.fetch_all(
-            table="chat_completions",
+            table=self.reference.table_name,
             where=where_conditions if where_conditions else None,
             order_by=[("created", order.value)],
             cursor=("id", after) if after else None,
@@ -246,7 +238,7 @@ class InferenceStore:
             raise ValueError("Inference store is not initialized")
         row = await self.sql_store.fetch_one(
-            table="chat_completions",
+            table=self.reference.table_name,
             where={"id": completion_id},
         )

{llama_stack-0.3.4.dist-info → llama_stack-0.3.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama_stack
-Version: 0.3.4
+Version: 0.3.5
 Summary: Llama Stack
 Author-email: Meta Llama <llama-oss@meta.com>
 License: MIT
@@ -22,7 +22,7 @@ Requires-Dist: fire
 Requires-Dist: httpx
 Requires-Dist: jinja2>=3.1.6
 Requires-Dist: jsonschema
-Requires-Dist: llama-stack-client>=0.3.4
+Requires-Dist: llama-stack-client==0.3.5
 Requires-Dist: openai>=1.107
 Requires-Dist: prompt-toolkit
 Requires-Dist: python-dotenv
@@ -45,7 +45,7 @@ Requires-Dist: starlette>=0.49.1
 Provides-Extra: ui
 Requires-Dist: streamlit; extra == "ui"
 Requires-Dist: pandas; extra == "ui"
-Requires-Dist: llama-stack-client>=0.3.4; extra == "ui"
+Requires-Dist: llama-stack-client==0.3.5; extra == "ui"
 Requires-Dist: streamlit-option-menu; extra == "ui"
 Dynamic: license-file
@@ -61,83 +61,6 @@ Dynamic: license-file
 [**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
-### ✨🎉 Llama 4 Support  🎉✨
-We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
-<details>
-<summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
-\
-*Note you need 8xH100 GPU-host to run these models*
-```bash
-pip install -U llama_stack
-MODEL="Llama-4-Scout-17B-16E-Instruct"
-# get meta url from llama.com
-huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
-# install dependencies for the distribution
-llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
-# start a llama stack server
-INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
-# install client to interact with the server
-pip install llama-stack-client
-```
-### CLI
-```bash
-# Run a chat completion
-MODEL="Llama-4-Scout-17B-16E-Instruct"
-llama-stack-client --endpoint http://localhost:8321 \
-inference chat-completion \
---model-id meta-llama/$MODEL \
---message "write a haiku for meta's llama 4 models"
-OpenAIChatCompletion(
-    ...
-    choices=[
-        OpenAIChatCompletionChoice(
-            finish_reason='stop',
-            index=0,
-            message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
-                role='assistant',
-                content='...**Silent minds awaken,**  \n**Whispers of billions of words,**  \n**Reasoning breaks the night.**  \n\n—  \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
-                ...
-            ),
-            ...
-        )
-    ],
-    ...
-)
-```
-### Python SDK
-```python
-from llama_stack_client import LlamaStackClient
-client = LlamaStackClient(base_url=f"http://localhost:8321")
-model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
-prompt = "Write a haiku about coding"
-print(f"User> {prompt}")
-response = client.chat.completions.create(
-    model=model_id,
-    messages=[
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": prompt},
-    ],
-)
-print(f"Assistant> {response.choices[0].message.content}")
-```
-As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
-</details>
 ### 🚀 One-Line Installer 🚀
 To try Llama Stack locally, run:

{llama_stack-0.3.4.dist-info → llama_stack-0.3.5.dist-info}/RECORD RENAMED Viewed

@@ -556,7 +556,7 @@ llama_stack/providers/utils/files/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8us
 llama_stack/providers/utils/files/form_data.py,sha256=oLDS9gsOWpUnqX51qczjNGTfHJBrZ0SFZbEHFtsfqCs,2291
 llama_stack/providers/utils/inference/__init__.py,sha256=Ocwqyn7ytwdt1vMFXsPBoa5D6uhA1fIljF-HiIsVvKw,1089
 llama_stack/providers/utils/inference/embedding_mixin.py,sha256=Ur9A0VJB0BEDh00Er8Ua-Mc08Sa69YAQW_cCcAdxB88,3336
-llama_stack/providers/utils/inference/inference_store.py,sha256=zNscOx7uiIspV8UoAdSlciWvupOWrLDBEtoros5tlpk,10273
+llama_stack/providers/utils/inference/inference_store.py,sha256=p9GwdiWGQw9Tnb-xL7kqNi0odOnecyIhxsrg6VoI-3U,9891
 llama_stack/providers/utils/inference/litellm_openai_mixin.py,sha256=tcRCccOd4fR61TIQjFGb-B6Qybu5q-pklK5fo87Ji3I,13094
 llama_stack/providers/utils/inference/model_registry.py,sha256=ElaDfW67XphDvVLYBBghwSB-2A704ELqpJpm42Hdpc8,8250
 llama_stack/providers/utils/inference/openai_compat.py,sha256=kTjea5GUmaD8UfA6UgoPD8wvmWNBnAwuWLkmNUwy-as,49768
@@ -617,9 +617,9 @@ llama_stack/strong_typing/topological.py,sha256=I2YyhYW62PBM2wpfn6mbeCRxKGl_oa5t
 llama_stack/testing/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
 llama_stack/testing/api_recorder.py,sha256=jt5Fq8HOPTA4rDzwIWWdBQJjxtivhbqoghFql3D--A0,38423
 llama_stack/ui/node_modules/flatted/python/flatted.py,sha256=UYburBDqkySaTfSpntPCUJRxiBGcplusJM7ECX8FEgA,3860
-llama_stack-0.3.4.dist-info/licenses/LICENSE,sha256=42g1gBn9gHYdBt5e6e1aFYhnc-JT9trU9qBD84oUAlY,1087
-llama_stack-0.3.4.dist-info/METADATA,sha256=K8Y4WWyN4f0HVCOjS7oasv_S6XkI6GuntvI2yfh9A_o,15157
-llama_stack-0.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-llama_stack-0.3.4.dist-info/entry_points.txt,sha256=E5xoyAM9064aW_y96eSSwZCNT_ANctrvrhLMJnMQlw0,141
-llama_stack-0.3.4.dist-info/top_level.txt,sha256=2-nbQ1CAn4_w76YD_O6N6ofvjmk4DX5NFaBuApSx5N0,12
-llama_stack-0.3.4.dist-info/RECORD,,
+llama_stack-0.3.5.dist-info/licenses/LICENSE,sha256=42g1gBn9gHYdBt5e6e1aFYhnc-JT9trU9qBD84oUAlY,1087
+llama_stack-0.3.5.dist-info/METADATA,sha256=pQ_p1RWmVzNAMznjofhtlzytTyPOBCdOYinnHHLItHg,12817
+llama_stack-0.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+llama_stack-0.3.5.dist-info/entry_points.txt,sha256=E5xoyAM9064aW_y96eSSwZCNT_ANctrvrhLMJnMQlw0,141
+llama_stack-0.3.5.dist-info/top_level.txt,sha256=2-nbQ1CAn4_w76YD_O6N6ofvjmk4DX5NFaBuApSx5N0,12
+llama_stack-0.3.5.dist-info/RECORD,,

{llama_stack-0.3.4.dist-info → llama_stack-0.3.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{llama_stack-0.3.4.dist-info → llama_stack-0.3.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{llama_stack-0.3.4.dist-info → llama_stack-0.3.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{llama_stack-0.3.4.dist-info → llama_stack-0.3.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

llama-stack 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl

llama-stack 0.3.4py3-none-any.whl → 0.3.5py3-none-any.whl