PyPI - llama-stack - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

llama-stack 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (460) hide show

llama_stack/{providers/utils → core/storage}/sqlstore/sqlstore.py RENAMED Viewed

@@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+from threading import Lock
 from typing import Annotated, cast
 from pydantic import Field
@@ -15,12 +16,13 @@ from llama_stack.core.storage.datatypes import (
     StorageBackendConfig,
     StorageBackendType,
 )
-from .api import SqlStore
+from llama_stack_api.internal.sqlstore import SqlStore
 sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"]
 _SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {}
+_SQLSTORE_INSTANCES: dict[str, SqlStore] = {}
+_SQLSTORE_LOCKS: dict[str, Lock] = {}
 SqlStoreConfig = Annotated[
@@ -52,19 +54,34 @@ def sqlstore_impl(reference: SqlStoreReference) -> SqlStore:
             f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}"
         )
-    if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig):
-        from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
+    existing = _SQLSTORE_INSTANCES.get(backend_name)
+    if existing:
+        return existing
-        config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy()
-        return SqlAlchemySqlStoreImpl(config)
-    else:
-        raise ValueError(f"Unknown sqlstore type {backend_config.type}")
+    lock = _SQLSTORE_LOCKS.setdefault(backend_name, Lock())
+    with lock:
+        existing = _SQLSTORE_INSTANCES.get(backend_name)
+        if existing:
+            return existing
+        if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig):
+            from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
+            config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy()
+            instance = SqlAlchemySqlStoreImpl(config)
+            _SQLSTORE_INSTANCES[backend_name] = instance
+            return instance
+        else:
+            raise ValueError(f"Unknown sqlstore type {backend_config.type}")
 def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
     """Register the set of available SQL store backends for reference resolution."""
     global _SQLSTORE_BACKENDS
+    global _SQLSTORE_INSTANCES
     _SQLSTORE_BACKENDS.clear()
+    _SQLSTORE_INSTANCES.clear()
+    _SQLSTORE_LOCKS.clear()
     for name, cfg in backends.items():
         _SQLSTORE_BACKENDS[name] = cfg

llama_stack/core/store/registry.py CHANGED Viewed

@@ -12,8 +12,8 @@ import pydantic
 from llama_stack.core.datatypes import RoutableObjectWithProvider
 from llama_stack.core.storage.datatypes import KVStoreReference
+from llama_stack.core.storage.kvstore import KVStore, kvstore_impl
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
 logger = get_logger(__name__, category="core::registry")

llama_stack/core/utils/config.py CHANGED Viewed

@@ -9,7 +9,10 @@ from typing import Any
 def redact_sensitive_fields(data: dict[str, Any]) -> dict[str, Any]:
     """Redact sensitive information from config before printing."""
-    sensitive_patterns = ["api_key", "api_token", "password", "secret"]
+    sensitive_patterns = ["api_key", "api_token", "password", "secret", "token"]
+    # Specific configuration field names that should NOT be redacted despite containing "token"
+    safe_token_fields = ["chunk_size_tokens", "max_tokens", "default_chunk_overlap_tokens"]
     def _redact_value(v: Any) -> Any:
         if isinstance(v, dict):
@@ -21,7 +24,10 @@ def redact_sensitive_fields(data: dict[str, Any]) -> dict[str, Any]:
     def _redact_dict(d: dict[str, Any]) -> dict[str, Any]:
         result = {}
         for k, v in d.items():
-            if any(pattern in k.lower() for pattern in sensitive_patterns):
+            # Don't redact if it's a safe field
+            if any(safe_field in k.lower() for safe_field in safe_token_fields):
+                result[k] = _redact_value(v)
+            elif any(pattern in k.lower() for pattern in sensitive_patterns):
                 result[k] = "********"
             else:
                 result[k] = _redact_value(v)

llama_stack/core/utils/config_resolution.py CHANGED Viewed

@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from enum import StrEnum
 from pathlib import Path
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
@@ -16,21 +15,14 @@ logger = get_logger(name=__name__, category="core")
 DISTRO_DIR = Path(__file__).parent.parent.parent.parent / "llama_stack" / "distributions"
-class Mode(StrEnum):
-    RUN = "run"
-    BUILD = "build"
 def resolve_config_or_distro(
     config_or_distro: str,
-    mode: Mode = Mode.RUN,
 ) -> Path:
     """
     Resolve a config/distro argument to a concrete config file path.
     Args:
         config_or_distro: User input (file path, distribution name, or built distribution)
-        mode: Mode resolving for ("run", "build", "server")
     Returns:
         Path to the resolved config file
@@ -47,38 +39,50 @@ def resolve_config_or_distro(
     # Strategy 2: Try as distribution name (if no .yaml extension)
     if not config_or_distro.endswith(".yaml"):
-        distro_config = _get_distro_config_path(config_or_distro, mode)
+        distro_config = _get_distro_config_path(config_or_distro)
         if distro_config.exists():
             logger.debug(f"Using distribution: {distro_config}")
             return distro_config
-    # Strategy 3: Try as built distribution name
-    distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
+    # Strategy 3: Try as distro config path (if no .yaml extension and contains a slash)
+    # eg: starter::run-with-postgres-store.yaml
+    # Use :: to avoid slash and confusion with a filesystem path
+    if "::" in config_or_distro:
+        distro_name, config_name = config_or_distro.split("::")
+        distro_config = _get_distro_config_path(distro_name, config_name)
+        if distro_config.exists():
+            logger.info(f"Using distribution: {distro_config}")
+            return distro_config
+    # Strategy 4: Try as built distribution name
+    distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-config.yaml"
     if distrib_config.exists():
         logger.debug(f"Using built distribution: {distrib_config}")
         return distrib_config
-    distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
+    distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / "config.yaml"
     if distrib_config.exists():
         logger.debug(f"Using built distribution: {distrib_config}")
         return distrib_config
-    # Strategy 4: Failed - provide helpful error
-    raise ValueError(_format_resolution_error(config_or_distro, mode))
+    # Strategy 5: Failed - provide helpful error
+    raise ValueError(_format_resolution_error(config_or_distro))
-def _get_distro_config_path(distro_name: str, mode: Mode) -> Path:
+def _get_distro_config_path(distro_name: str, path: str | None = None) -> Path:
     """Get the config file path for a distro."""
-    return DISTRO_DIR / distro_name / f"{mode}.yaml"
+    if not path or not path.endswith(".yaml"):
+        path = "config.yaml"
+    return DISTRO_DIR / distro_name / path
-def _format_resolution_error(config_or_distro: str, mode: Mode) -> str:
+def _format_resolution_error(config_or_distro: str) -> str:
     """Format a helpful error message for resolution failures."""
     from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
-    distro_path = _get_distro_config_path(config_or_distro, mode)
-    distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
-    distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
+    distro_path = _get_distro_config_path(config_or_distro)
+    distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-config.yaml"
+    distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-config.yaml"
     available_distros = _get_available_distros()
     distros_str = ", ".join(available_distros) if available_distros else "none found"
@@ -99,15 +103,14 @@ Did you mean one of these distributions?
 def _get_available_distros() -> list[str]:
     """Get list of available distro names."""
-    if not DISTRO_DIR.exists() and not DISTRIBS_BASE_DIR.exists():
-        return []
-    return list(
-        set(
-            [d.name for d in DISTRO_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")]
-            + [d.name for d in DISTRIBS_BASE_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")]
-        )
-    )
+    distros = []
+    if DISTRO_DIR.exists():
+        distros.extend([d.name for d in DISTRO_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")])
+    if DISTRIBS_BASE_DIR.exists():
+        distros.extend([d.name for d in DISTRIBS_BASE_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")])
+    return list(set(distros))
 def _format_distro_suggestions(distros: list[str], user_input: str) -> str:

llama_stack/core/utils/context.py CHANGED Viewed

@@ -7,8 +7,6 @@
 from collections.abc import AsyncGenerator
 from contextvars import ContextVar
-from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT
 _MISSING = object()
@@ -69,16 +67,12 @@ def preserve_contexts_async_generator[T](
             try:
                 yield item
                 # Update our tracked values with any changes made during this iteration
-                # Only for non-trace context vars - trace context must persist across yields
-                # to allow nested span tracking for telemetry
+                # This allows context changes to persist across generator iterations
                 for context_var in context_vars:
-                    if context_var is not CURRENT_TRACE_CONTEXT:
-                        initial_context_values[context_var.name] = context_var.get()
+                    initial_context_values[context_var.name] = context_var.get()
             finally:
-                # Restore non-trace context vars after each yield to prevent leaks between requests
-                # CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
+                # Restore context vars after each yield to prevent leaks between requests
                 for context_var in context_vars:
-                    if context_var is not CURRENT_TRACE_CONTEXT:
-                        _restore_context_var(context_var)
+                    _restore_context_var(context_var)
     return wrapper()

llama_stack/core/utils/exec.py CHANGED Viewed

@@ -84,6 +84,15 @@ def run_command(command: list[str]) -> int:
             text=True,
             check=False,
         )
+        # Print stdout and stderr if command failed
+        if result.returncode != 0:
+            log.error(f"Command {' '.join(command)} failed with returncode {result.returncode}")
+            if result.stdout:
+                log.error(f"STDOUT: {result.stdout}")
+            if result.stderr:
+                log.error(f"STDERR: {result.stderr}")
         return result.returncode
     except subprocess.SubprocessError as e:
         log.error(f"Subprocess error: {e}")

llama_stack/core/utils/type_inspection.py ADDED Viewed

@@ -0,0 +1,45 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+"""
+Utility functions for type inspection and parameter handling.
+"""
+import inspect
+import typing
+from typing import Any, get_args, get_origin
+from pydantic import BaseModel
+from pydantic.fields import FieldInfo
+def is_unwrapped_body_param(param_type: Any) -> bool:
+    """
+    Check if a parameter type represents an unwrapped body parameter.
+    An unwrapped body parameter is an Annotated type with Body(embed=False)
+    This is used to determine whether request parameters should be flattened
+    in OpenAPI specs and client libraries (matching FastAPI's embed=False behavior).
+    Args:
+        param_type: The parameter type annotation to check
+    Returns:
+        True if the parameter should be treated as an unwrapped body parameter
+    """
+    # Check if it's Annotated with Body(embed=False)
+    if get_origin(param_type) is typing.Annotated:
+        args = get_args(param_type)
+        base_type = args[0]
+        metadata = args[1:]
+        # Look for Body annotation with embed=False
+        # Body() returns a FieldInfo object, so we check for that type and the embed attribute
+        for item in metadata:
+            if isinstance(item, FieldInfo) and hasattr(item, "embed") and not item.embed:
+                return inspect.isclass(base_type) and issubclass(base_type, BaseModel)
+    return False

llama_stack/distributions/dell/{run.yaml → config.yaml} RENAMED Viewed

@@ -105,6 +105,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata: {}
@@ -128,5 +131,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true

llama_stack/distributions/dell/dell.py CHANGED Viewed

@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from llama_stack.apis.models import ModelType
 from llama_stack.core.datatypes import (
     BuildProvider,
     ModelInput,
@@ -17,6 +16,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.remote.vector_io.chroma import ChromaVectorIOConfig
+from llama_stack_api import ModelType
 def get_distribution_template() -> DistributionTemplate:
@@ -111,7 +111,7 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         providers=providers,
         run_configs={
-            "run.yaml": RunConfigSettings(
+            "config.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider, embedding_provider],
                     "vector_io": [chromadb_provider],

llama_stack/distributions/dell/run-with-safety.yaml CHANGED Viewed

@@ -109,6 +109,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata: {}
@@ -137,5 +140,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true

llama_stack/distributions/meta-reference-gpu/{run.yaml → config.yaml} RENAMED Viewed

@@ -112,6 +112,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata: {}
@@ -135,5 +138,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true

llama_stack/distributions/meta-reference-gpu/meta_reference.py CHANGED Viewed

@@ -6,7 +6,6 @@
 from pathlib import Path
-from llama_stack.apis.models import ModelType
 from llama_stack.core.datatypes import (
     BuildProvider,
     ModelInput,
@@ -22,6 +21,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
+from llama_stack_api import ModelType
 def get_distribution_template() -> DistributionTemplate:
@@ -105,7 +105,7 @@ def get_distribution_template() -> DistributionTemplate:
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
         run_configs={
-            "run.yaml": RunConfigSettings(
+            "config.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider, embedding_provider],
                     "vector_io": [vector_io_provider],

llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml CHANGED Viewed

@@ -122,6 +122,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata: {}
@@ -150,5 +153,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true

llama_stack/distributions/nvidia/{run.yaml → config.yaml} RENAMED Viewed

@@ -16,9 +16,8 @@ providers:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -100,6 +99,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models: []
   shields: []
@@ -112,5 +114,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true

llama_stack/distributions/nvidia/nvidia.py CHANGED Viewed

@@ -81,7 +81,7 @@ def get_distribution_template(name: str = "nvidia") -> DistributionTemplate:
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
         run_configs={
-            "run.yaml": RunConfigSettings(
+            "config.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
                     "datasetio": [datasetio_provider],

llama_stack/distributions/nvidia/run-with-safety.yaml CHANGED Viewed

@@ -16,9 +16,8 @@ providers:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
@@ -111,6 +110,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata: {}
@@ -133,5 +135,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true

llama_stack/{apis/datasetio → distributions/oci}/__init__.py RENAMED Viewed

@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .datasetio import *
+from .oci import get_distribution_template  # noqa: F401

llama_stack/distributions/oci/config.yaml ADDED Viewed

@@ -0,0 +1,134 @@
+version: 2
+image_name: oci
+apis:
+- agents
+- datasetio
+- eval
+- files
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: oci
+    provider_type: remote::oci
+    config:
+      oci_auth_type: ${env.OCI_AUTH_TYPE:=instance_principal}
+      oci_config_file_path: ${env.OCI_CONFIG_FILE_PATH:=~/.oci/config}
+      oci_config_profile: ${env.OCI_CLI_PROFILE:=DEFAULT}
+      oci_region: ${env.OCI_REGION:=us-ashburn-1}
+      oci_compartment_id: ${env.OCI_COMPARTMENT_OCID:=}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/oci/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/oci}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: []
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+server:
+  port: 8321

llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

llama-stack 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl