PyPI - llama-stack - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

llama-stack 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (458) hide show

llama_stack/core/datatypes.py CHANGED Viewed

@@ -11,27 +11,40 @@ from urllib.parse import urlparse
 from pydantic import BaseModel, Field, field_validator, model_validator
-from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Dataset, DatasetInput
-from llama_stack.apis.eval import Eval
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.models import Model, ModelInput
-from llama_stack.apis.resource import Resource
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.scoring import Scoring
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
-from llama_stack.apis.shields import Shield, ShieldInput
-from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
-from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import (
     KVStoreReference,
     StorageBackendType,
     StorageConfig,
 )
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack.log import LoggingConfig
+from llama_stack_api import (
+    Api,
+    Benchmark,
+    BenchmarkInput,
+    ConnectorInput,
+    Dataset,
+    DatasetInput,
+    DatasetIO,
+    Eval,
+    Inference,
+    Model,
+    ModelInput,
+    ProviderSpec,
+    Resource,
+    Safety,
+    Scoring,
+    ScoringFn,
+    ScoringFnInput,
+    Shield,
+    ShieldInput,
+    ToolGroup,
+    ToolGroupInput,
+    ToolRuntime,
+    VectorIO,
+    VectorStore,
+    VectorStoreInput,
+)
 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
@@ -179,30 +192,6 @@ class DistributionSpec(BaseModel):
     )
-class TelemetryConfig(BaseModel):
-    """
-    Configuration for telemetry.
-    Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/
-    for env variables to configure the OpenTelemetry SDK.
-    Example:
-    ```bash
-    OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter
-    ```
-    """
-    enabled: bool = Field(default=False, description="enable or disable telemetry")
-class LoggingConfig(BaseModel):
-    category_levels: dict[str, str] = Field(
-        default_factory=dict,
-        description="""
- Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""",
-    )
 class OAuth2JWKSConfig(BaseModel):
     # The JWKS URI for collecting public keys
     uri: str
@@ -361,6 +350,201 @@ class QualifiedModel(BaseModel):
     model_id: str
+class RewriteQueryParams(BaseModel):
+    """Parameters for query rewriting/expansion."""
+    model: QualifiedModel | None = Field(
+        default=None,
+        description="LLM model for query rewriting/expansion in vector search.",
+    )
+    prompt: str = Field(
+        default="Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:",
+        description="Prompt template for query rewriting. Use {query} as placeholder for the original query.",
+    )
+    max_tokens: int = Field(
+        default=100,
+        description="Maximum number of tokens for query expansion responses.",
+    )
+    temperature: float = Field(
+        default=0.3,
+        description="Temperature for query expansion model (0.0 = deterministic, 1.0 = creative).",
+    )
+    @field_validator("prompt")
+    @classmethod
+    def validate_prompt(cls, v: str) -> str:
+        if "{query}" not in v:
+            raise ValueError("prompt must contain {query} placeholder")
+        return v
+    @field_validator("max_tokens")
+    @classmethod
+    def validate_max_tokens(cls, v: int) -> int:
+        if v <= 0:
+            raise ValueError("max_tokens must be positive")
+        if v > 4096:
+            raise ValueError("max_tokens should not exceed 4096")
+        return v
+    @field_validator("temperature")
+    @classmethod
+    def validate_temperature(cls, v: float) -> float:
+        if v < 0.0 or v > 2.0:
+            raise ValueError("temperature must be between 0.0 and 2.0")
+        return v
+class FileSearchParams(BaseModel):
+    """Configuration for file search tool output formatting."""
+    header_template: str = Field(
+        default="knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n",
+        description="Template for the header text shown before search results. Available placeholders: {num_chunks} number of chunks found.",
+    )
+    footer_template: str = Field(
+        default="END of knowledge_search tool results.\n",
+        description="Template for the footer text shown after search results.",
+    )
+    @field_validator("header_template")
+    @classmethod
+    def validate_header_template(cls, v: str) -> str:
+        if len(v) == 0:
+            raise ValueError("header_template must not be empty")
+        if "{num_chunks}" not in v:
+            raise ValueError("header_template must contain {num_chunks} placeholder")
+        if "knowledge_search" not in v.lower():
+            raise ValueError(
+                "header_template must contain 'knowledge_search' keyword to ensure proper tool identification"
+            )
+        return v
+class ContextPromptParams(BaseModel):
+    """Configuration for LLM prompt content and chunk formatting."""
+    chunk_annotation_template: str = Field(
+        default="Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
+        description="Template for formatting individual chunks in search results. Available placeholders: {index} 1-based chunk index, {chunk.content} chunk content, {metadata} chunk metadata dict.",
+    )
+    context_template: str = Field(
+        default='The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query. {annotation_instruction}\n',
+        description="Template for explaining the search results to the model. Available placeholders: {query} user's query, {num_chunks} number of chunks.",
+    )
+    @field_validator("chunk_annotation_template")
+    @classmethod
+    def validate_chunk_annotation_template(cls, v: str) -> str:
+        if len(v) == 0:
+            raise ValueError("chunk_annotation_template must not be empty")
+        if "{chunk.content}" not in v:
+            raise ValueError("chunk_annotation_template must contain {chunk.content} placeholder")
+        if "{index}" not in v:
+            raise ValueError("chunk_annotation_template must contain {index} placeholder")
+        return v
+    @field_validator("context_template")
+    @classmethod
+    def validate_context_template(cls, v: str) -> str:
+        if len(v) == 0:
+            raise ValueError("context_template must not be empty")
+        if "{query}" not in v:
+            raise ValueError("context_template must contain {query} placeholder")
+        return v
+class AnnotationPromptParams(BaseModel):
+    """Configuration for source annotation and attribution features."""
+    enable_annotations: bool = Field(
+        default=True,
+        description="Whether to include annotation information in results.",
+    )
+    annotation_instruction_template: str = Field(
+        default="Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent new ones.",
+        description="Instructions for how the model should cite sources. Used when enable_annotations is True.",
+    )
+    chunk_annotation_template: str = Field(
+        default="[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n",
+        description="Template for chunks with annotation information. Available placeholders: {index} 1-based chunk index, {metadata_text} formatted metadata, {file_id} document identifier, {chunk_text} chunk content.",
+    )
+    @field_validator("chunk_annotation_template")
+    @classmethod
+    def validate_chunk_annotation_template(cls, v: str) -> str:
+        if len(v) == 0:
+            raise ValueError("chunk_annotation_template must not be empty")
+        if "{index}" not in v:
+            raise ValueError("chunk_annotation_template must contain {index} placeholder")
+        if "{chunk_text}" not in v:
+            raise ValueError("chunk_annotation_template must contain {chunk_text} placeholder")
+        if "{file_id}" not in v:
+            raise ValueError("chunk_annotation_template must contain {file_id} placeholder")
+        return v
+    @field_validator("annotation_instruction_template")
+    @classmethod
+    def validate_annotation_instruction_template(cls, v: str) -> str:
+        if len(v) == 0:
+            raise ValueError("annotation_instruction_template must not be empty")
+        return v
+class FileIngestionParams(BaseModel):
+    """Configuration for file processing during ingestion."""
+    default_chunk_size_tokens: int = Field(
+        default=512,
+        description="Default chunk size for RAG tool operations when not specified",
+    )
+    default_chunk_overlap_tokens: int = Field(
+        default=128,
+        description="Default overlap in tokens between chunks (original default: 512 // 4 = 128)",
+    )
+class ChunkRetrievalParams(BaseModel):
+    """Configuration for chunk retrieval and ranking during search."""
+    chunk_multiplier: int = Field(
+        default=5,
+        description="Multiplier for OpenAI API over-retrieval (affects all providers)",
+    )
+    max_tokens_in_context: int = Field(
+        default=4000,
+        description="Maximum tokens allowed in RAG context before truncation",
+    )
+    default_reranker_strategy: str = Field(
+        default="rrf",
+        description="Default reranker when not specified: 'rrf', 'weighted', or 'normalized'",
+    )
+    rrf_impact_factor: float = Field(
+        default=60.0,
+        description="Impact factor for RRF (Reciprocal Rank Fusion) reranking",
+    )
+    weighted_search_alpha: float = Field(
+        default=0.5,
+        description="Alpha weight for weighted search reranking (0.0-1.0)",
+    )
+class FileBatchParams(BaseModel):
+    """Configuration for file batch processing."""
+    max_concurrent_files_per_batch: int = Field(
+        default=3,
+        description="Maximum files processed concurrently in file batches",
+    )
+    file_batch_chunk_size: int = Field(
+        default=10,
+        description="Number of files to process in each batch chunk",
+    )
+    cleanup_interval_seconds: int = Field(
+        default=86400,  # 24 hours
+        description="Interval for cleaning up expired file batches (seconds)",
+    )
 class VectorStoresConfig(BaseModel):
     """Configuration for vector stores in the stack."""
@@ -372,6 +556,44 @@ class VectorStoresConfig(BaseModel):
         default=None,
         description="Default embedding model configuration for vector stores.",
     )
+    rewrite_query_params: RewriteQueryParams | None = Field(
+        default=None,
+        description="Parameters for query rewriting/expansion. None disables query rewriting.",
+    )
+    file_search_params: FileSearchParams = Field(
+        default_factory=FileSearchParams,
+        description="Configuration for file search tool output formatting.",
+    )
+    context_prompt_params: ContextPromptParams = Field(
+        default_factory=ContextPromptParams,
+        description="Configuration for LLM prompt content and chunk formatting.",
+    )
+    annotation_prompt_params: AnnotationPromptParams = Field(
+        default_factory=AnnotationPromptParams,
+        description="Configuration for source annotation and attribution features.",
+    )
+    file_ingestion_params: FileIngestionParams = Field(
+        default_factory=FileIngestionParams,
+        description="Configuration for file processing during ingestion.",
+    )
+    chunk_retrieval_params: ChunkRetrievalParams = Field(
+        default_factory=ChunkRetrievalParams,
+        description="Configuration for chunk retrieval and ranking during search.",
+    )
+    file_batch_params: FileBatchParams = Field(
+        default_factory=FileBatchParams,
+        description="Configuration for file batch processing.",
+    )
+class SafetyConfig(BaseModel):
+    """Configuration for default moderations model."""
+    default_shield_id: str | None = Field(
+        default=None,
+        description="ID of the shield to use for when `model` is not specified in the `moderations` API request.",
+    )
 class QuotaPeriod(StrEnum):
@@ -432,6 +654,7 @@ class RegisteredResources(BaseModel):
     scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
     benchmarks: list[BenchmarkInput] = Field(default_factory=list)
     tool_groups: list[ToolGroupInput] = Field(default_factory=list)
+    connectors: list[ConnectorInput] = Field(default_factory=list)
 class ServerConfig(BaseModel):
@@ -477,7 +700,7 @@ class ServerConfig(BaseModel):
     )
-class StackRunConfig(BaseModel):
+class StackConfig(BaseModel):
     version: int = LLAMA_STACK_RUN_CONFIG_VERSION
     image_name: str = Field(
@@ -504,6 +727,7 @@ can be instantiated multiple times (with different configs) if necessary.
 """,
     )
     storage: StorageConfig = Field(
+        default_factory=StorageConfig,
         description="Catalog of named storage backends and references available to the stack",
     )
@@ -514,8 +738,6 @@ can be instantiated multiple times (with different configs) if necessary.
     logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
-    telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry")
     server: ServerConfig = Field(
         default_factory=ServerConfig,
         description="Configuration for the HTTP(S) server",
@@ -536,6 +758,11 @@ can be instantiated multiple times (with different configs) if necessary.
         description="Configuration for vector stores, including default embedding model",
     )
+    safety: SafetyConfig | None = Field(
+        default=None,
+        description="Configuration for default moderations model",
+    )
     @field_validator("external_providers_dir")
     @classmethod
     def validate_external_providers_dir(cls, v):
@@ -546,7 +773,7 @@ can be instantiated multiple times (with different configs) if necessary.
         return v
     @model_validator(mode="after")
-    def validate_server_stores(self) -> "StackRunConfig":
+    def validate_server_stores(self) -> "StackConfig":
         backend_map = self.storage.backends
         stores = self.storage.stores
         kv_backends = {
@@ -586,40 +813,5 @@ can be instantiated multiple times (with different configs) if necessary.
         _ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
         _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
         _ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
+        _ensure_backend(stores.prompts, kv_backends, "storage.stores.prompts")
         return self
-class BuildConfig(BaseModel):
-    version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
-    distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
-    image_type: str = Field(
-        default="venv",
-        description="Type of package to build (container | venv)",
-    )
-    image_name: str | None = Field(
-        default=None,
-        description="Name of the distribution to build",
-    )
-    external_providers_dir: Path | None = Field(
-        default=None,
-        description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. "
-        "pip_packages MUST contain the provider package name.",
-    )
-    additional_pip_packages: list[str] = Field(
-        default_factory=list,
-        description="Additional pip packages to install in the distribution. These packages will be installed in the distribution environment.",
-    )
-    external_apis_dir: Path | None = Field(
-        default=None,
-        description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
-    )
-    @field_validator("external_providers_dir")
-    @classmethod
-    def validate_external_providers_dir(cls, v):
-        if v is None:
-            return None
-        if isinstance(v, str):
-            return Path(v)
-        return v

llama_stack/core/distribution.py CHANGED Viewed

@@ -12,10 +12,10 @@ from typing import Any
 import yaml
 from pydantic import BaseModel
-from llama_stack.core.datatypes import BuildConfig, DistributionSpec
+from llama_stack.core.datatypes import StackConfig
 from llama_stack.core.external import load_external_apis
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
+from llama_stack_api import (
     Api,
     InlineProviderSpec,
     ProviderSpec,
@@ -25,7 +25,7 @@ from llama_stack.providers.datatypes import (
 logger = get_logger(name=__name__, category="core")
-INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations, Api.telemetry}
+INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations, Api.connectors, Api.admin}
 def stack_apis() -> list[Api]:
@@ -85,7 +85,9 @@ def _load_inline_provider_spec(spec_data: dict[str, Any], api: Api, provider_nam
     return spec
-def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
+def get_provider_registry(
+    config: StackConfig | None = None, listing: bool = False
+) -> dict[Api, dict[str, ProviderSpec]]:
     """Get the provider registry, optionally including external providers.
     This function loads both built-in providers and external providers from YAML files or from their provided modules.
@@ -109,13 +111,13 @@ def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
         safety/
           llama-guard.yaml
-    This method is overloaded in that it can be called from a variety of places: during build, during run, during stack construction.
-    So when building external providers from a module, there are scenarios where the pip package required to import the module might not be available yet.
+    This method is overloaded in that it can be called from a variety of places: during list-deps, during run, during stack construction.
+    So when listing external providers from a module, there are scenarios where the pip package required to import the module might not be available yet.
     There is special handling for all of the potential cases this method can be called from.
     Args:
         config: Optional object containing the external providers directory path
-        building: Optional bool delineating whether or not this is being called from a build process
+        listing: Optional bool delineating whether or not this is being called from a list-deps process
     Returns:
         A dictionary mapping APIs to their available providers
@@ -161,7 +163,7 @@ def get_provider_registry(config=None) -> dict[Api, dict[str, ProviderSpec]]:
         registry = get_external_providers_from_module(
             registry=registry,
             config=config,
-            building=(isinstance(config, BuildConfig) or isinstance(config, DistributionSpec)),
+            listing=listing,
         )
     return registry
@@ -220,13 +222,10 @@ def get_external_providers_from_dir(
 def get_external_providers_from_module(
-    registry: dict[Api, dict[str, ProviderSpec]], config, building: bool
+    registry: dict[Api, dict[str, ProviderSpec]], config, listing: bool
 ) -> dict[Api, dict[str, ProviderSpec]]:
     provider_list = None
-    if isinstance(config, BuildConfig):
-        provider_list = config.distribution_spec.providers.items()
-    else:
-        provider_list = config.providers.items()
+    provider_list = config.providers.items()
     if provider_list is None:
         logger.warning("Could not get list of providers from config")
         return registry
@@ -236,14 +235,14 @@ def get_external_providers_from_module(
                 continue
             # get provider using module
             try:
-                if not building:
+                if not listing:
                     package_name = provider.module.split("==")[0]
                     module = importlib.import_module(f"{package_name}.provider")
                     # if config class is wrong you will get an error saying module could not be imported
                     spec = module.get_provider_spec()
                 else:
-                    # pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon build and run
-                    # in the case we are building we CANNOT import this module of course because it has not been installed.
+                    # pass in a partially filled out provider spec to satisfy the registry -- knowing we will be overwriting it later upon list-deps and run
+                    # in the case we are listing we CANNOT import this module of course because it has not been installed.
                     spec = ProviderSpec(
                         api=Api(provider_api),
                         provider_type=provider.provider_type,

llama_stack/core/external.py CHANGED Viewed

@@ -7,14 +7,14 @@
 import yaml
-from llama_stack.apis.datatypes import Api, ExternalApiSpec
-from llama_stack.core.datatypes import BuildConfig, StackRunConfig
+from llama_stack.core.datatypes import StackConfig
 from llama_stack.log import get_logger
+from llama_stack_api import Api, ExternalApiSpec
 logger = get_logger(name=__name__, category="core")
-def load_external_apis(config: StackRunConfig | BuildConfig | None) -> dict[Api, ExternalApiSpec]:
+def load_external_apis(config: StackConfig | None) -> dict[Api, ExternalApiSpec]:
     """Load external API specifications from the configured directory.
     Args:

llama-stack 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

llama-stack 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl