PyPI - llama-stack - Versions diffs - 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (738) hide show

llama_stack/core/utils/exec.py ADDED Viewed

@@ -0,0 +1,96 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import importlib
+import os
+import signal
+import subprocess
+import sys
+from termcolor import cprint
+from llama_stack.log import get_logger
+log = get_logger(name=__name__, category="core")
+def formulate_run_args(image_type: str, image_name: str) -> list:
+    # Only venv is supported now
+    current_venv = os.environ.get("VIRTUAL_ENV")
+    env_name = image_name or current_venv
+    if not env_name:
+        cprint(
+            "No current virtual environment detected, please specify a virtual environment name with --image-name",
+            color="red",
+            file=sys.stderr,
+        )
+        return []
+    cprint(f"Using virtual environment: {env_name}", file=sys.stderr)
+    script = importlib.resources.files("llama_stack") / "core/start_stack.sh"
+    run_args = [
+        script,
+        image_type,
+        env_name,
+    ]
+    return run_args
+def in_notebook():
+    try:
+        from IPython import get_ipython
+        ipython = get_ipython()
+        if ipython is None or "IPKernelApp" not in ipython.config:  # pragma: no cover
+            return False
+    except ImportError:
+        return False
+    except AttributeError:
+        return False
+    return True
+def run_command(command: list[str]) -> int:
+    """
+    Run a command with interrupt handling and output capture.
+    Uses subprocess.run with direct stream piping for better performance.
+    Args:
+        command (list): The command to run.
+    Returns:
+        int: The return code of the command.
+    """
+    original_sigint = signal.getsignal(signal.SIGINT)
+    ctrl_c_pressed = False
+    def sigint_handler(signum, frame):
+        nonlocal ctrl_c_pressed
+        ctrl_c_pressed = True
+        log.info("\nCtrl-C detected. Aborting...")
+    try:
+        # Set up the signal handler
+        signal.signal(signal.SIGINT, sigint_handler)
+        # Run the command with stdout/stderr piped directly to system streams
+        result = subprocess.run(
+            command,
+            text=True,
+            check=False,
+        )
+        return result.returncode
+    except subprocess.SubprocessError as e:
+        log.error(f"Subprocess error: {e}")
+        return 1
+    except Exception as e:
+        log.exception(f"Unexpected error: {e}")
+        return 1
+    finally:
+        # Restore the original signal handler
+        signal.signal(signal.SIGINT, original_sigint)

llama_stack/{providers/impls/meta_reference/codeshield/config.py → core/utils/image_types.py} RENAMED Viewed

@@ -4,8 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from pydantic import BaseModel
+import enum
-class CodeShieldConfig(BaseModel):
-    pass
+class LlamaStackImageType(enum.Enum):
+    CONTAINER = "container"
+    VENV = "venv"

llama_stack/{distribution → core}/utils/model_utils.py RENAMED Viewed

@@ -4,10 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-import os
+from pathlib import Path
 from .config_dirs import DEFAULT_CHECKPOINT_DIR
 def model_local_dir(descriptor: str) -> str:
-    return os.path.join(DEFAULT_CHECKPOINT_DIR, descriptor)
+    return str(Path(DEFAULT_CHECKPOINT_DIR) / (descriptor.replace(":", "-")))

llama_stack/{distribution → core}/utils/prompt_for_config.py RENAMED Viewed

@@ -7,20 +7,21 @@
 import inspect
 import json
 from enum import Enum
-from typing import Any, get_args, get_origin, List, Literal, Optional, Type, Union
+from typing import Annotated, Any, Literal, Union, get_args, get_origin
 from pydantic import BaseModel
 from pydantic.fields import FieldInfo
 from pydantic_core import PydanticUndefinedType
-from typing_extensions import Annotated
+from llama_stack.log import get_logger
+log = get_logger(name=__name__, category="core")
 def is_list_of_primitives(field_type):
     """Check if a field type is a List of primitive types."""
     origin = get_origin(field_type)
-    if origin is List or origin is list:
+    if origin is list or origin is list:
         args = get_args(field_type)
         if len(args) == 1 and args[0] in (int, float, str, bool):
             return True
@@ -28,15 +29,11 @@ def is_list_of_primitives(field_type):
 def is_basemodel_without_fields(typ):
-    return (
-        inspect.isclass(typ) and issubclass(typ, BaseModel) and len(typ.__fields__) == 0
-    )
+    return inspect.isclass(typ) and issubclass(typ, BaseModel) and len(typ.__fields__) == 0
 def can_recurse(typ):
-    return (
-        inspect.isclass(typ) and issubclass(typ, BaseModel) and len(typ.__fields__) > 0
-    )
+    return inspect.isclass(typ) and issubclass(typ, BaseModel) and len(typ.__fields__) > 0
 def get_literal_values(field):
@@ -56,7 +53,7 @@ def get_non_none_type(field_type):
     return next(arg for arg in get_args(field_type) if arg is not type(None))
-def manually_validate_field(model: Type[BaseModel], field_name: str, value: Any):
+def manually_validate_field(model: type[BaseModel], field_name: str, value: Any):
     validators = model.__pydantic_decorators__.field_validators
     for _name, validator in validators.items():
         if field_name in validator.info.fields:
@@ -69,7 +66,7 @@ def is_discriminated_union(typ) -> bool:
     if isinstance(typ, FieldInfo):
         return typ.discriminator
     else:
-        if not (get_origin(typ) is Annotated):
+        if get_origin(typ) is not Annotated:
             return False
         args = get_args(typ)
         return len(args) >= 2 and args[1].discriminator
@@ -111,11 +108,9 @@ def prompt_for_discriminated_union(
         if discriminator_value in type_map:
             chosen_type = type_map[discriminator_value]
-            print(f"\nConfiguring {chosen_type.__name__}:")
+            log.info(f"\nConfiguring {chosen_type.__name__}:")
-            if existing_value and (
-                getattr(existing_value, discriminator) != discriminator_value
-            ):
+            if existing_value and (getattr(existing_value, discriminator) != discriminator_value):
                 existing_value = None
             sub_config = prompt_for_config(chosen_type, existing_value)
@@ -123,7 +118,7 @@ def prompt_for_discriminated_union(
             setattr(sub_config, discriminator, discriminator_value)
             return sub_config
         else:
-            print(f"Invalid {discriminator}. Please try again.")
+            log.error(f"Invalid {discriminator}. Please try again.")
 # This is somewhat elaborate, but does not purport to be comprehensive in any way.
@@ -131,9 +126,7 @@ def prompt_for_discriminated_union(
 #
 # doesn't support List[nested_class] yet or Dicts of any kind. needs a bunch of
 # unit tests for coverage.
-def prompt_for_config(
-    config_type: type[BaseModel], existing_config: Optional[BaseModel] = None
-) -> BaseModel:
+def prompt_for_config(config_type: type[BaseModel], existing_config: BaseModel | None = None) -> BaseModel:
     """
     Recursively prompt the user for configuration values based on a Pydantic BaseModel.
@@ -147,17 +140,11 @@ def prompt_for_config(
     for field_name, field in config_type.__fields__.items():
         field_type = field.annotation
-        existing_value = (
-            getattr(existing_config, field_name) if existing_config else None
-        )
+        existing_value = getattr(existing_config, field_name) if existing_config else None
         if existing_value:
             default_value = existing_value
         else:
-            default_value = (
-                field.default
-                if not isinstance(field.default, PydanticUndefinedType)
-                else None
-            )
+            default_value = field.default if not isinstance(field.default, PydanticUndefinedType) else None
         is_required = field.is_required
         # Skip fields with Literal type
@@ -180,15 +167,11 @@ def prompt_for_config(
                     config_data[field_name] = validated_value
                     break
                 except KeyError:
-                    print(
-                        f"Invalid choice. Please choose from: {', '.join(e.name for e in field_type)}"
-                    )
+                    log.error(f"Invalid choice. Please choose from: {', '.join(e.name for e in field_type)}")
             continue
         if is_discriminated_union(field):
-            config_data[field_name] = prompt_for_discriminated_union(
-                field_name, field, existing_value
-            )
+            config_data[field_name] = prompt_for_discriminated_union(field_name, field, existing_value)
             continue
         if is_optional(field_type) and can_recurse(get_non_none_type(field_type)):
@@ -197,11 +180,9 @@ def prompt_for_config(
                 config_data[field_name] = None
                 continue
             nested_type = get_non_none_type(field_type)
-            print(f"Entering sub-configuration for {field_name}:")
+            log.info(f"Entering sub-configuration for {field_name}:")
             config_data[field_name] = prompt_for_config(nested_type, existing_value)
-        elif is_optional(field_type) and is_discriminated_union(
-            get_non_none_type(field_type)
-        ):
+        elif is_optional(field_type) and is_discriminated_union(get_non_none_type(field_type)):
             prompt = f"Do you want to configure {field_name}? (y/n): "
             if input(prompt).lower() == "n":
                 config_data[field_name] = None
@@ -213,7 +194,7 @@ def prompt_for_config(
                 existing_value,
             )
         elif can_recurse(field_type):
-            print(f"\nEntering sub-configuration for {field_name}:")
+            log.info(f"\nEntering sub-configuration for {field_name}:")
             config_data[field_name] = prompt_for_config(
                 field_type,
                 existing_value,
@@ -240,7 +221,7 @@ def prompt_for_config(
                         config_data[field_name] = None
                         break
                     else:
-                        print("This field is required. Please provide a value.")
+                        log.error("This field is required. Please provide a value.")
                         continue
                 else:
                     try:
@@ -257,39 +238,29 @@ def prompt_for_config(
                             try:
                                 value = json.loads(user_input)
                                 if not isinstance(value, list):
-                                    raise ValueError(
-                                        "Input must be a JSON-encoded list"
-                                    )
+                                    raise ValueError("Input must be a JSON-encoded list")
                                 element_type = get_args(field_type)[0]
                                 value = [element_type(item) for item in value]
                             except json.JSONDecodeError:
-                                print(
-                                    'Invalid JSON. Please enter a valid JSON-encoded list e.g., ["foo","bar"]'
-                                )
+                                log.error('Invalid JSON. Please enter a valid JSON-encoded list e.g., ["foo","bar"]')
                                 continue
                             except ValueError as e:
-                                print(f"{str(e)}")
+                                log.error(f"{str(e)}")
                                 continue
                         elif get_origin(field_type) is dict:
                             try:
                                 value = json.loads(user_input)
                                 if not isinstance(value, dict):
-                                    raise ValueError(
-                                        "Input must be a JSON-encoded dictionary"
-                                    )
+                                    raise ValueError("Input must be a JSON-encoded dictionary")
                             except json.JSONDecodeError:
-                                print(
-                                    "Invalid JSON. Please enter a valid JSON-encoded dict."
-                                )
+                                log.error("Invalid JSON. Please enter a valid JSON-encoded dict.")
                                 continue
                         # Convert the input to the correct type
-                        elif inspect.isclass(field_type) and issubclass(
-                            field_type, BaseModel
-                        ):
+                        elif inspect.isclass(field_type) and issubclass(field_type, BaseModel):
                             # For nested BaseModels, we assume a dictionary-like string input
                             import ast
@@ -298,19 +269,15 @@ def prompt_for_config(
                             value = field_type(user_input)
                     except ValueError:
-                        print(
-                            f"Invalid input. Expected type: {getattr(field_type, '__name__', str(field_type))}"
-                        )
+                        log.error(f"Invalid input. Expected type: {getattr(field_type, '__name__', str(field_type))}")
                         continue
                 try:
                     # Validate the field using our manual validation function
-                    validated_value = manually_validate_field(
-                        config_type, field_name, value
-                    )
+                    validated_value = manually_validate_field(config_type, field_name, value)
                     config_data[field_name] = validated_value
                     break
                 except ValueError as e:
-                    print(f"Validation error: {str(e)}")
+                    log.error(f"Validation error: {str(e)}")
     return config_type(**config_data)

llama_stack/{apis/batch_inference → distributions/dell}/__init__.py RENAMED Viewed

@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .batch_inference import *  # noqa: F401 F403
+from .dell import get_distribution_template  # noqa: F401

llama_stack/distributions/dell/build.yaml ADDED Viewed

@@ -0,0 +1,33 @@
+version: 2
+distribution_spec:
+  description: Dell's distribution of Llama Stack. TGI inference via Dell's custom
+    container
+  providers:
+    inference:
+    - provider_type: remote::tgi
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    safety:
+    - provider_type: inline::llama-guard
+    agents:
+    - provider_type: inline::meta-reference
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]

llama_stack/distributions/dell/dell.py ADDED Viewed

@@ -0,0 +1,158 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.apis.models import ModelType
+from llama_stack.core.datatypes import (
+    BuildProvider,
+    ModelInput,
+    Provider,
+    ShieldInput,
+    ToolGroupInput,
+)
+from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
+from llama_stack.providers.inline.inference.sentence_transformers import (
+    SentenceTransformersInferenceConfig,
+)
+from llama_stack.providers.remote.vector_io.chroma import ChromaVectorIOConfig
+def get_distribution_template() -> DistributionTemplate:
+    providers = {
+        "inference": [
+            BuildProvider(provider_type="remote::tgi"),
+            BuildProvider(provider_type="inline::sentence-transformers"),
+        ],
+        "vector_io": [
+            BuildProvider(provider_type="inline::faiss"),
+            BuildProvider(provider_type="remote::chromadb"),
+            BuildProvider(provider_type="remote::pgvector"),
+        ],
+        "safety": [BuildProvider(provider_type="inline::llama-guard")],
+        "agents": [BuildProvider(provider_type="inline::meta-reference")],
+        "eval": [BuildProvider(provider_type="inline::meta-reference")],
+        "datasetio": [
+            BuildProvider(provider_type="remote::huggingface"),
+            BuildProvider(provider_type="inline::localfs"),
+        ],
+        "scoring": [
+            BuildProvider(provider_type="inline::basic"),
+            BuildProvider(provider_type="inline::llm-as-judge"),
+            BuildProvider(provider_type="inline::braintrust"),
+        ],
+        "tool_runtime": [
+            BuildProvider(provider_type="remote::brave-search"),
+            BuildProvider(provider_type="remote::tavily-search"),
+            BuildProvider(provider_type="inline::rag-runtime"),
+        ],
+    }
+    name = "dell"
+    inference_provider = Provider(
+        provider_id="tgi0",
+        provider_type="remote::tgi",
+        config={
+            "url": "${env.DEH_URL}",
+        },
+    )
+    safety_inference_provider = Provider(
+        provider_id="tgi1",
+        provider_type="remote::tgi",
+        config={
+            "url": "${env.DEH_SAFETY_URL}",
+        },
+    )
+    embedding_provider = Provider(
+        provider_id="sentence-transformers",
+        provider_type="inline::sentence-transformers",
+        config=SentenceTransformersInferenceConfig.sample_run_config(),
+    )
+    chromadb_provider = Provider(
+        provider_id="chromadb",
+        provider_type="remote::chromadb",
+        config=ChromaVectorIOConfig.sample_run_config(
+            f"~/.llama/distributions/{name}/",
+            url="${env.CHROMADB_URL:=}",
+        ),
+    )
+    inference_model = ModelInput(
+        model_id="${env.INFERENCE_MODEL}",
+        provider_id="tgi0",
+    )
+    safety_model = ModelInput(
+        model_id="${env.SAFETY_MODEL}",
+        provider_id="tgi1",
+    )
+    embedding_model = ModelInput(
+        model_id="nomic-embed-text-v1.5",
+        provider_id="sentence-transformers",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 768,
+        },
+    )
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="brave-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+    ]
+    return DistributionTemplate(
+        name=name,
+        distro_type="self_hosted",
+        description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container",
+        container_image=None,
+        providers=providers,
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider, embedding_provider],
+                    "vector_io": [chromadb_provider],
+                },
+                default_models=[inference_model, embedding_model],
+                default_tool_groups=default_tool_groups,
+            ),
+            "run-with-safety.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [
+                        inference_provider,
+                        safety_inference_provider,
+                        embedding_provider,
+                    ],
+                    "vector_io": [chromadb_provider],
+                },
+                default_models=[inference_model, safety_model, embedding_model],
+                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
+                default_tool_groups=default_tool_groups,
+            ),
+        },
+        run_config_env_vars={
+            "DEH_URL": (
+                "http://0.0.0.0:8181",
+                "URL for the Dell inference server",
+            ),
+            "DEH_SAFETY_URL": (
+                "http://0.0.0.0:8282",
+                "URL for the Dell safety inference server",
+            ),
+            "CHROMA_URL": (
+                "http://localhost:6601",
+                "URL for the Chroma server",
+            ),
+            "INFERENCE_MODEL": (
+                "meta-llama/Llama-3.2-3B-Instruct",
+                "Inference model loaded into the TGI server",
+            ),
+            "SAFETY_MODEL": (
+                "meta-llama/Llama-Guard-3-1B",
+                "Name of the safety (Llama-Guard) model to use",
+            ),
+        },
+    )

llama_stack/distributions/dell/run-with-safety.yaml ADDED Viewed

@@ -0,0 +1,141 @@
+version: 2
+image_name: dell
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: tgi0
+    provider_type: remote::tgi
+    config:
+      url: ${env.DEH_URL}
+  - provider_id: tgi1
+    provider_type: remote::tgi
+    config:
+      url: ${env.DEH_SAFETY_URL}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: chromadb
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: tgi0
+    model_type: llm
+  - metadata: {}
+    model_id: ${env.SAFETY_MODEL}
+    provider_id: tgi1
+    model_type: llm
+  - metadata:
+      embedding_dimension: 768
+    model_id: nomic-embed-text-v1.5
+    provider_id: sentence-transformers
+    model_type: embedding
+  shields:
+  - shield_id: ${env.SAFETY_MODEL}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: brave-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true

llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl