PyPI - llama-stack - Versions diffs - 0.3.4__tar.gz → 0.3.5__tar.gz - Mend

llama-stack 0.3.4tar.gz → 0.3.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (633) hide show

{llama_stack-0.3.4/llama_stack.egg-info → llama_stack-0.3.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama_stack
-Version: 0.3.4
+Version: 0.3.5
 Summary: Llama Stack
 Author-email: Meta Llama <llama-oss@meta.com>
 License: MIT
@@ -22,7 +22,7 @@ Requires-Dist: fire
 Requires-Dist: httpx
 Requires-Dist: jinja2>=3.1.6
 Requires-Dist: jsonschema
-Requires-Dist: llama-stack-client>=0.3.4
+Requires-Dist: llama-stack-client==0.3.5
 Requires-Dist: openai>=1.107
 Requires-Dist: prompt-toolkit
 Requires-Dist: python-dotenv
@@ -45,7 +45,7 @@ Requires-Dist: starlette>=0.49.1
 Provides-Extra: ui
 Requires-Dist: streamlit; extra == "ui"
 Requires-Dist: pandas; extra == "ui"
-Requires-Dist: llama-stack-client>=0.3.4; extra == "ui"
+Requires-Dist: llama-stack-client==0.3.5; extra == "ui"
 Requires-Dist: streamlit-option-menu; extra == "ui"
 Dynamic: license-file
@@ -61,83 +61,6 @@ Dynamic: license-file
 [**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
-### ✨🎉 Llama 4 Support  🎉✨
-We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
-<details>
-<summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
-\
-*Note you need 8xH100 GPU-host to run these models*
-```bash
-pip install -U llama_stack
-MODEL="Llama-4-Scout-17B-16E-Instruct"
-# get meta url from llama.com
-huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
-# install dependencies for the distribution
-llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
-# start a llama stack server
-INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
-# install client to interact with the server
-pip install llama-stack-client
-```
-### CLI
-```bash
-# Run a chat completion
-MODEL="Llama-4-Scout-17B-16E-Instruct"
-llama-stack-client --endpoint http://localhost:8321 \
-inference chat-completion \
---model-id meta-llama/$MODEL \
---message "write a haiku for meta's llama 4 models"
-OpenAIChatCompletion(
-    ...
-    choices=[
-        OpenAIChatCompletionChoice(
-            finish_reason='stop',
-            index=0,
-            message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
-                role='assistant',
-                content='...**Silent minds awaken,**  \n**Whispers of billions of words,**  \n**Reasoning breaks the night.**  \n\n—  \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
-                ...
-            ),
-            ...
-        )
-    ],
-    ...
-)
-```
-### Python SDK
-```python
-from llama_stack_client import LlamaStackClient
-client = LlamaStackClient(base_url=f"http://localhost:8321")
-model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
-prompt = "Write a haiku about coding"
-print(f"User> {prompt}")
-response = client.chat.completions.create(
-    model=model_id,
-    messages=[
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": prompt},
-    ],
-)
-print(f"Assistant> {response.choices[0].message.content}")
-```
-As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
-</details>
 ### 🚀 One-Line Installer 🚀
 To try Llama Stack locally, run:

{llama_stack-0.3.4 → llama_stack-0.3.5}/README.md RENAMED Viewed

@@ -10,83 +10,6 @@
 [**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
-### ✨🎉 Llama 4 Support  🎉✨
-We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
-<details>
-<summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
-\
-*Note you need 8xH100 GPU-host to run these models*
-```bash
-pip install -U llama_stack
-MODEL="Llama-4-Scout-17B-16E-Instruct"
-# get meta url from llama.com
-huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
-# install dependencies for the distribution
-llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
-# start a llama stack server
-INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
-# install client to interact with the server
-pip install llama-stack-client
-```
-### CLI
-```bash
-# Run a chat completion
-MODEL="Llama-4-Scout-17B-16E-Instruct"
-llama-stack-client --endpoint http://localhost:8321 \
-inference chat-completion \
---model-id meta-llama/$MODEL \
---message "write a haiku for meta's llama 4 models"
-OpenAIChatCompletion(
-    ...
-    choices=[
-        OpenAIChatCompletionChoice(
-            finish_reason='stop',
-            index=0,
-            message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
-                role='assistant',
-                content='...**Silent minds awaken,**  \n**Whispers of billions of words,**  \n**Reasoning breaks the night.**  \n\n—  \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
-                ...
-            ),
-            ...
-        )
-    ],
-    ...
-)
-```
-### Python SDK
-```python
-from llama_stack_client import LlamaStackClient
-client = LlamaStackClient(base_url=f"http://localhost:8321")
-model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
-prompt = "Write a haiku about coding"
-print(f"User> {prompt}")
-response = client.chat.completions.create(
-    model=model_id,
-    messages=[
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": prompt},
-    ],
-)
-print(f"Assistant> {response.choices[0].message.content}")
-```
-As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
-</details>
 ### 🚀 One-Line Installer 🚀
 To try Llama Stack locally, run:

{llama_stack-0.3.4 → llama_stack-0.3.5}/llama_stack/providers/utils/inference/inference_store.py RENAMED Viewed

@@ -56,7 +56,7 @@ class InferenceStore:
             logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
         await self.sql_store.create_table(
-            "chat_completions",
+            self.reference.table_name,
             {
                 "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
                 "created": ColumnType.INTEGER,
@@ -66,14 +66,6 @@ class InferenceStore:
             },
         )
-        if self.enable_write_queue:
-            self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
-            for _ in range(self._num_writers):
-                self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
-            logger.debug(
-                f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
-            )
     async def shutdown(self) -> None:
         if not self._worker_tasks:
             return
@@ -161,7 +153,7 @@ class InferenceStore:
         try:
             await self.sql_store.insert(
-                table="chat_completions",
+                table=self.reference.table_name,
                 data=record_data,
             )
         except IntegrityError as e:
@@ -173,7 +165,7 @@ class InferenceStore:
             error_message = str(e.orig) if e.orig else str(e)
             if self._is_unique_constraint_error(error_message):
                 # Update the existing record instead
-                await self.sql_store.update(table="chat_completions", data=record_data, where={"id": data["id"]})
+                await self.sql_store.update(table=self.reference.table_name, data=record_data, where={"id": data["id"]})
             else:
                 # Re-raise if it's not a unique constraint error
                 raise
@@ -217,7 +209,7 @@ class InferenceStore:
             where_conditions["model"] = model
         paginated_result = await self.sql_store.fetch_all(
-            table="chat_completions",
+            table=self.reference.table_name,
             where=where_conditions if where_conditions else None,
             order_by=[("created", order.value)],
             cursor=("id", after) if after else None,
@@ -246,7 +238,7 @@ class InferenceStore:
             raise ValueError("Inference store is not initialized")
         row = await self.sql_store.fetch_one(
-            table="chat_completions",
+            table=self.reference.table_name,
             where={"id": completion_id},
         )

{llama_stack-0.3.4 → llama_stack-0.3.5/llama_stack.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: llama_stack
-Version: 0.3.4
+Version: 0.3.5
 Summary: Llama Stack
 Author-email: Meta Llama <llama-oss@meta.com>
 License: MIT
@@ -22,7 +22,7 @@ Requires-Dist: fire
 Requires-Dist: httpx
 Requires-Dist: jinja2>=3.1.6
 Requires-Dist: jsonschema
-Requires-Dist: llama-stack-client>=0.3.4
+Requires-Dist: llama-stack-client==0.3.5
 Requires-Dist: openai>=1.107
 Requires-Dist: prompt-toolkit
 Requires-Dist: python-dotenv
@@ -45,7 +45,7 @@ Requires-Dist: starlette>=0.49.1
 Provides-Extra: ui
 Requires-Dist: streamlit; extra == "ui"
 Requires-Dist: pandas; extra == "ui"
-Requires-Dist: llama-stack-client>=0.3.4; extra == "ui"
+Requires-Dist: llama-stack-client==0.3.5; extra == "ui"
 Requires-Dist: streamlit-option-menu; extra == "ui"
 Dynamic: license-file
@@ -61,83 +61,6 @@ Dynamic: license-file
 [**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
-### ✨🎉 Llama 4 Support  🎉✨
-We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
-<details>
-<summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
-\
-*Note you need 8xH100 GPU-host to run these models*
-```bash
-pip install -U llama_stack
-MODEL="Llama-4-Scout-17B-16E-Instruct"
-# get meta url from llama.com
-huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
-# install dependencies for the distribution
-llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
-# start a llama stack server
-INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
-# install client to interact with the server
-pip install llama-stack-client
-```
-### CLI
-```bash
-# Run a chat completion
-MODEL="Llama-4-Scout-17B-16E-Instruct"
-llama-stack-client --endpoint http://localhost:8321 \
-inference chat-completion \
---model-id meta-llama/$MODEL \
---message "write a haiku for meta's llama 4 models"
-OpenAIChatCompletion(
-    ...
-    choices=[
-        OpenAIChatCompletionChoice(
-            finish_reason='stop',
-            index=0,
-            message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
-                role='assistant',
-                content='...**Silent minds awaken,**  \n**Whispers of billions of words,**  \n**Reasoning breaks the night.**  \n\n—  \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
-                ...
-            ),
-            ...
-        )
-    ],
-    ...
-)
-```
-### Python SDK
-```python
-from llama_stack_client import LlamaStackClient
-client = LlamaStackClient(base_url=f"http://localhost:8321")
-model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
-prompt = "Write a haiku about coding"
-print(f"User> {prompt}")
-response = client.chat.completions.create(
-    model=model_id,
-    messages=[
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": prompt},
-    ],
-)
-print(f"Assistant> {response.choices[0].message.content}")
-```
-As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
-</details>
 ### 🚀 One-Line Installer 🚀
 To try Llama Stack locally, run:

{llama_stack-0.3.4 → llama_stack-0.3.5}/llama_stack.egg-info/requires.txt RENAMED Viewed

@@ -4,7 +4,7 @@ fire
 httpx
 jinja2>=3.1.6
 jsonschema
-llama-stack-client>=0.3.4
+llama-stack-client==0.3.5
 openai>=1.107
 prompt-toolkit
 python-dotenv
@@ -28,5 +28,5 @@ starlette>=0.49.1
 [ui]
 streamlit
 pandas
-llama-stack-client>=0.3.4
+llama-stack-client==0.3.5
 streamlit-option-menu

{llama_stack-0.3.4 → llama_stack-0.3.5}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ required-version = ">=0.7.0"
 [project]
 name = "llama_stack"
-version = "0.3.4"
+version = "0.3.5"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
 description = "Llama Stack"
 readme = "README.md"
@@ -30,7 +30,7 @@ dependencies = [
     "httpx",
     "jinja2>=3.1.6",
     "jsonschema",
-    "llama-stack-client>=0.3.4",
+    "llama-stack-client==0.3.5",
     "openai>=1.107",                                  # for expires_after support
     "prompt-toolkit",
     "python-dotenv",
@@ -56,7 +56,7 @@ dependencies = [
 ui = [
     "streamlit",
     "pandas",
-    "llama-stack-client>=0.3.4",
+    "llama-stack-client==0.3.5",
     "streamlit-option-menu",
 ]