PyPI - llama-stack - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

llama-stack 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (460) hide show

llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml CHANGED Viewed

@@ -17,41 +17,43 @@ providers:
   - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
     provider_type: remote::cerebras
     config:
-      base_url: https://api.cerebras.ai
+      base_url: https://api.cerebras.ai/v1
       api_key: ${env.CEREBRAS_API_KEY:=}
   - provider_id: ${env.OLLAMA_URL:+ollama}
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
   - provider_id: ${env.VLLM_URL:+vllm}
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:=}
+      base_url: ${env.VLLM_URL:=}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: ${env.TGI_URL:+tgi}
     provider_type: remote::tgi
     config:
-      url: ${env.TGI_URL:=}
+      base_url: ${env.TGI_URL:=}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
-      url: https://api.fireworks.ai/inference/v1
+      base_url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:=}
   - provider_id: together
     provider_type: remote::together
     config:
-      url: https://api.together.xyz/v1
+      base_url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:=}
   - provider_id: bedrock
     provider_type: remote::bedrock
+    config:
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
+      region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
       api_key: ${env.NVIDIA_API_KEY:=}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -73,18 +75,18 @@ providers:
   - provider_id: groq
     provider_type: remote::groq
     config:
-      url: https://api.groq.com
+      base_url: https://api.groq.com/openai/v1
       api_key: ${env.GROQ_API_KEY:=}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1
+      base_url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
   - provider_id: ${env.AZURE_API_KEY:+azure}
     provider_type: remote::azure
     config:
       api_key: ${env.AZURE_API_KEY:=}
-      api_base: ${env.AZURE_API_BASE:=}
+      base_url: ${env.AZURE_API_BASE:=}
       api_version: ${env.AZURE_API_VERSION:=}
       api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
@@ -259,6 +261,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models: []
   shields:
@@ -279,10 +284,56 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
 vector_stores:
   default_provider_id: faiss
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
+  file_search_params:
+    header_template: 'knowledge_search tool found {num_chunks} chunks:
+      BEGIN of knowledge_search tool results.
+      '
+    footer_template: 'END of knowledge_search tool results.
+      '
+  context_prompt_params:
+    chunk_annotation_template: 'Result {index}
+      Content: {chunk.content}
+      Metadata: {metadata}
+      '
+    context_template: 'The above results were retrieved to help answer the user''s
+      query: "{query}". Use them as supporting information only in answering this
+      query. {annotation_instruction}
+      '
+  annotation_prompt_params:
+    enable_annotations: true
+    annotation_instruction_template: Cite sources immediately at the end of sentences
+      before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'.
+      Do not add extra punctuation. Use only the file IDs provided, do not invent
+      new ones.
+    chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
+      {chunk_text}
+      '
+  file_ingestion_params:
+    default_chunk_size_tokens: 512
+    default_chunk_overlap_tokens: 128
+  chunk_retrieval_params:
+    chunk_multiplier: 5
+    max_tokens_in_context: 4000
+    default_reranker_strategy: rrf
+    rrf_impact_factor: 60.0
+    weighted_search_alpha: 0.5
+  file_batch_params:
+    max_concurrent_files_per_batch: 3
+    file_batch_chunk_size: 10
+    cleanup_interval_seconds: 86400
+safety:
+  default_shield_id: llama-guard

llama_stack/distributions/template.py CHANGED Viewed

@@ -12,20 +12,16 @@ import rich
 import yaml
 from pydantic import BaseModel, Field
-from llama_stack.apis.datasets import DatasetPurpose
-from llama_stack.apis.models import ModelType
 from llama_stack.core.datatypes import (
     LLAMA_STACK_RUN_CONFIG_VERSION,
     Api,
     BenchmarkInput,
-    BuildConfig,
     BuildProvider,
     DatasetInput,
-    DistributionSpec,
     ModelInput,
     Provider,
+    SafetyConfig,
     ShieldInput,
-    TelemetryConfig,
     ToolGroupInput,
     VectorStoresConfig,
 )
@@ -36,13 +32,11 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageBackendType,
 )
+from llama_stack.core.storage.kvstore.config import SqliteKVStoreConfig
+from llama_stack.core.storage.sqlstore.sqlstore import SqliteSqlStoreConfig
 from llama_stack.core.utils.dynamic import instantiate_class_type
-from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
-from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
-from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
+from llama_stack_api import DatasetPurpose, ModelType
 def filter_empty_values(obj: Any) -> Any:
@@ -188,7 +182,7 @@ class RunConfigSettings(BaseModel):
     default_datasets: list[DatasetInput] | None = None
     default_benchmarks: list[BenchmarkInput] | None = None
     vector_stores_config: VectorStoresConfig | None = None
-    telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
+    safety_config: SafetyConfig | None = None
     storage_backends: dict[str, Any] | None = None
     storage_stores: dict[str, Any] | None = None
@@ -257,6 +251,10 @@ class RunConfigSettings(BaseModel):
                 backend="sql_default",
                 table_name="openai_conversations",
             ).model_dump(exclude_none=True),
+            "prompts": KVStoreReference(
+                backend="kv_default",
+                namespace="prompts",
+            ).model_dump(exclude_none=True),
         }
         storage_config = dict(
@@ -284,12 +282,14 @@ class RunConfigSettings(BaseModel):
             "server": {
                 "port": 8321,
             },
-            "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
         }
         if self.vector_stores_config:
             config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True)
+        if self.safety_config:
+            config["safety"] = self.safety_config.model_dump(exclude_none=True)
         return config
@@ -314,55 +314,6 @@ class DistributionTemplate(BaseModel):
     available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None
-    # we may want to specify additional pip packages without necessarily indicating a
-    # specific "default" inference store (which is what typically used to dictate additional
-    # pip packages)
-    additional_pip_packages: list[str] | None = None
-    def build_config(self) -> BuildConfig:
-        additional_pip_packages: list[str] = []
-        for run_config in self.run_configs.values():
-            run_config_ = run_config.run_config(self.name, self.providers, self.container_image)
-            # TODO: This is a hack to get the dependencies for internal APIs into build
-            # We should have a better way to do this by formalizing the concept of "internal" APIs
-            # and providers, with a way to specify dependencies for them.
-            storage_cfg = run_config_.get("storage", {})
-            for backend_cfg in storage_cfg.get("backends", {}).values():
-                store_type = backend_cfg.get("type")
-                if not store_type:
-                    continue
-                if str(store_type).startswith("kv_"):
-                    additional_pip_packages.extend(get_kv_pip_packages(backend_cfg))
-                elif str(store_type).startswith("sql_"):
-                    additional_pip_packages.extend(get_sql_pip_packages(backend_cfg))
-        if self.additional_pip_packages:
-            additional_pip_packages.extend(self.additional_pip_packages)
-        # Create minimal providers for build config (without runtime configs)
-        build_providers = {}
-        for api, providers in self.providers.items():
-            build_providers[api] = []
-            for provider in providers:
-                # Create a minimal build provider object with only essential build information
-                build_provider = BuildProvider(
-                    provider_type=provider.provider_type,
-                    module=provider.module,
-                )
-                build_providers[api].append(build_provider)
-        return BuildConfig(
-            distribution_spec=DistributionSpec(
-                description=self.description,
-                container_image=self.container_image,
-                providers=build_providers,
-            ),
-            image_type=LlamaStackImageType.VENV.value,  # default to venv
-            additional_pip_packages=sorted(set(additional_pip_packages)),
-        )
     def generate_markdown_docs(self) -> str:
         providers_table = "| API | Provider(s) |\n"
         providers_table += "|-----|-------------|\n"
@@ -415,6 +366,7 @@ class DistributionTemplate(BaseModel):
                 providers_table=providers_table,
                 run_config_env_vars=self.run_config_env_vars,
                 default_models=default_models,
+                run_configs=list(self.run_configs.keys()),
             )
         return ""
@@ -433,14 +385,6 @@ class DistributionTemplate(BaseModel):
         for output_dir in [yaml_output_dir, doc_output_dir]:
             output_dir.mkdir(parents=True, exist_ok=True)
-        build_config = self.build_config()
-        with open(yaml_output_dir / "build.yaml", "w") as f:
-            yaml.safe_dump(
-                filter_empty_values(build_config.model_dump(exclude_none=True)),
-                f,
-                sort_keys=False,
-            )
         for yaml_pth, settings in self.run_configs.items():
             run_config = settings.run_config(self.name, self.providers, self.container_image)
             with open(yaml_output_dir / yaml_pth, "w") as f:

llama_stack/distributions/watsonx/{run.yaml → config.yaml} RENAMED Viewed

@@ -15,7 +15,7 @@ providers:
   - provider_id: watsonx
     provider_type: remote::watsonx
     config:
-      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+      base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
       api_key: ${env.WATSONX_API_KEY:=}
       project_id: ${env.WATSONX_PROJECT_ID:=}
   vector_io:
@@ -115,6 +115,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models: []
   shields: []
@@ -129,5 +132,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true

llama_stack/distributions/watsonx/watsonx.py CHANGED Viewed

@@ -69,7 +69,7 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
         template_path=None,
         providers=providers,
         run_configs={
-            "run.yaml": RunConfigSettings(
+            "config.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
                     "files": [files_provider],

llama_stack/log.py CHANGED Viewed

@@ -9,15 +9,23 @@ import os
 import re
 from logging.config import dictConfig  # allow-direct-logging
+from pydantic import BaseModel, Field
 from rich.console import Console
 from rich.errors import MarkupError
 from rich.logging import RichHandler
-from llama_stack.core.datatypes import LoggingConfig
 # Default log level
 DEFAULT_LOG_LEVEL = logging.INFO
+class LoggingConfig(BaseModel):
+    category_levels: dict[str, str] = Field(
+        default_factory=dict,
+        description="""
+Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""",
+    )
 # Predefined categories
 CATEGORIES = [
     "core",
@@ -29,7 +37,6 @@ CATEGORIES = [
     "eval",
     "tools",
     "client",
-    "telemetry",
     "openai",
     "openai_responses",
     "openai_conversations",
@@ -37,6 +44,7 @@ CATEGORIES = [
     "providers",
     "models",
     "files",
+    "file_processors",
     "vector_io",
     "tool_runtime",
     "cli",
@@ -84,10 +92,10 @@ def config_to_category_levels(category: str, level: str):
 def parse_yaml_config(yaml_config: LoggingConfig) -> dict[str, int]:
     """
-    Helper function to parse a yaml logging configuration found in the run.yaml
+    Helper function to parse a yaml logging configuration found in the config.yaml
     Parameters:
-        yaml_config (Logging): the logger config object found in the run.yaml
+        yaml_config (Logging): the logger config object found in the config.yaml
     Returns:
         Dict[str, int]: A dictionary mapping categories to their log levels.
@@ -137,7 +145,8 @@ class CustomRichHandler(RichHandler):
         # Set a reasonable default width for console output, especially when redirected to files
         console_width = int(os.environ.get("LLAMA_STACK_LOG_WIDTH", "120"))
         # Don't force terminal codes to avoid ANSI escape codes in log files
-        kwargs["console"] = Console(width=console_width)
+        # Ensure logs go to stderr, not stdout
+        kwargs["console"] = Console(width=console_width, stderr=True)
         super().__init__(*args, **kwargs)
     def emit(self, record):
@@ -177,6 +186,7 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str |
         log_file (str | None): Path to a log file to additionally pipe the logs into.
             If None, reads from LLAMA_STACK_LOG_FILE environment variable.
     """
+    global _category_levels
     # Read from environment variables if not explicitly provided
     if category_levels is None:
         category_levels = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
@@ -184,6 +194,9 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str |
         if env_config:
             category_levels.update(parse_environment_config(env_config))
+    # Update the module-level _category_levels so that already-created loggers pick up the new levels
+    _category_levels.update(category_levels)
     if log_file is None:
         log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
     log_format = "%(asctime)s %(name)s:%(lineno)d %(category)s: %(message)s"
@@ -268,14 +281,18 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str |
     }
     dictConfig(logging_config)
-    # Ensure third-party libraries follow the root log level, but preserve
-    # already-configured loggers (e.g., uvicorn) and our own llama_stack loggers
+    # Update log levels for all loggers that were created before setup_logging was called
     for name, logger in logging.root.manager.loggerDict.items():
         if isinstance(logger, logging.Logger):
-            # Skip infrastructure loggers (uvicorn, fastapi) and our own loggers
-            if name.startswith(("uvicorn", "fastapi", "llama_stack")):
+            # Skip infrastructure loggers (uvicorn, fastapi) to preserve their configured levels
+            if name.startswith(("uvicorn", "fastapi")):
                 continue
-            logger.setLevel(root_level)
+            # Update llama_stack loggers if root level was explicitly set (e.g., via all=CRITICAL)
+            if name.startswith("llama_stack") and "root" in category_levels:
+                logger.setLevel(root_level)
+            # Update third-party library loggers
+            elif not name.startswith("llama_stack"):
+                logger.setLevel(root_level)
 def get_logger(

llama_stack/models/llama/checkpoint.py CHANGED Viewed

@@ -38,18 +38,18 @@ def maybe_reshard_state_dict(
     mmap: bool = True,
 ) -> dict[str, torch.Tensor]:
     if str(map_location) == "cpu":
-        torch.set_default_tensor_type(torch.BFloat16Tensor)
+        torch.set_default_dtype(torch.bfloat16)
     else:
-        torch.set_default_tensor_type(torch.cuda.BFloat16Tensor)
+        torch.set_default_dtype(torch.bfloat16)
-    ckpt_paths = np.array(sorted(ckpt_paths))
+    ckpt_paths_array = np.array(sorted(ckpt_paths))
     new_mp_size, new_mp_rank = get_model_parallel_world_size(), get_model_parallel_rank()
-    old_mp_size = len(ckpt_paths)
+    old_mp_size = len(ckpt_paths_array)
     old_mp_ranks = map_mp_rank(old_mp_size, new_mp_size, new_mp_rank)
-    print(f"Loading checkpoint shards:\n{str(ckpt_paths[old_mp_ranks])}")  # type: ignore
-    paths = ckpt_paths[old_mp_ranks]  # type: ignore
+    print(f"Loading checkpoint shards:\n{str(ckpt_paths_array[old_mp_ranks])}")  # type: ignore
+    paths = ckpt_paths_array[old_mp_ranks]  # type: ignore
     state_dicts = [torch.load(str(p), map_location=map_location, mmap=mmap) for p in paths]
     if new_mp_size == old_mp_size:

llama_stack/models/llama/hadamard_utils.py CHANGED Viewed

@@ -79,6 +79,8 @@ def add_hadamard_transform_for_spinquant(model: torch.nn.Module, prefix: str = "
     for module_name, module in model.named_children():
         child_full_name = prefix + "." + module_name
         if re.search(pattern_last_linear_ffn, child_full_name):
+            # Module matching this pattern should be nn.Linear with in_features
+            assert isinstance(module, nn.Linear), f"Expected nn.Linear, got {type(module)}"
             new_module = nn.Sequential(HadamardModule(group_size=module.in_features), module)
             del module
             setattr(model, module_name, new_module)

llama_stack/models/llama/llama3/generation.py CHANGED Viewed

@@ -26,8 +26,10 @@ from fairscale.nn.model_parallel.initialize import (
 )
 from termcolor import cprint
+from llama_stack.models.llama.datatypes import ToolPromptFormat
 from ..checkpoint import maybe_reshard_state_dict
-from ..datatypes import GenerationResult, QuantizationMode, RawContent, RawMessage, ToolPromptFormat
+from ..datatypes import GenerationResult, QuantizationMode, RawContent, RawMessage
 from .args import ModelArgs
 from .chat_format import ChatFormat, LLMInput
 from .model import Transformer

llama_stack/models/llama/llama3/interface.py CHANGED Viewed

@@ -15,13 +15,10 @@ from pathlib import Path
 from termcolor import colored
+from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall, ToolDefinition, ToolPromptFormat
 from ..datatypes import (
-    BuiltinTool,
     RawMessage,
-    StopReason,
-    ToolCall,
-    ToolDefinition,
-    ToolPromptFormat,
 )
 from . import template_data
 from .chat_format import ChatFormat

llama_stack/models/llama/llama3/multimodal/encoder_utils.py CHANGED Viewed

@@ -141,15 +141,15 @@ def build_encoder_attention_mask(
     """
     Build vision encoder attention mask that omits padding tokens.
     """
-    masks = []
+    masks_list: list[torch.Tensor] = []
     for arx in ar:
         mask_i = torch.ones((num_chunks, x.shape[2], 1), dtype=x.dtype)
         mask_i[: arx[0] * arx[1], :ntok] = 0
         mask_i = mask_i.view(num_chunks * x.shape[2], -1)
         mask_i = mask_i @ mask_i.T * get_negative_inf_value(x.dtype)
         mask_i = mask_i.unsqueeze(0)
-        masks.append(mask_i)
-    masks = torch.stack(masks).to(x.device).expand(-1, n_heads, -1, -1)
+        masks_list.append(mask_i)
+    masks = torch.stack(masks_list).to(x.device).expand(-1, n_heads, -1, -1)
     return masks

llama_stack/models/llama/llama3/multimodal/image_transform.py CHANGED Viewed

@@ -95,7 +95,7 @@ class VariableSizeImageTransform:
                 factors_set.add(n // i)
         return factors_set
-    def find_supported_resolutions(self, max_num_chunks: int, patch_size: int) -> torch.Tensor:
+    def find_supported_resolutions(self, max_num_chunks: int, patch_size: int) -> list[tuple[int, int]]:
         """
         Computes all of the allowed resoltuions for a fixed number of chunks
         and patch_size. Useful for when dividing an image into chunks.
@@ -198,10 +198,10 @@ class VariableSizeImageTransform:
     def resize_without_distortion(
         self,
-        image: torch.Tensor,
+        image: Image.Image,
         target_size: tuple[int, int],
         max_upscaling_size: int | None,
-    ) -> torch.Tensor:
+    ) -> Image.Image:
         """
         Used to resize an image to target_resolution, without distortion.
@@ -380,12 +380,12 @@ class VariableSizeImageTransform:
         assert isinstance(image, Image.Image), type(image)
         w, h = image.size
-        possible_resolutions = self.find_supported_resolutions(max_num_chunks=max_num_chunks, patch_size=self.size)
-        possible_resolutions = torch.tensor(possible_resolutions)
+        possible_resolutions_list = self.find_supported_resolutions(max_num_chunks=max_num_chunks, patch_size=self.size)
+        possible_resolutions_tensor = torch.tensor(possible_resolutions_list)
         best_resolution = self.get_best_fit(
             image_size=(w, h),
-            possible_resolutions=possible_resolutions,
+            possible_resolutions=possible_resolutions_tensor,
             resize_to_max_canvas=resize_to_max_canvas,
         )

llama_stack/models/llama/llama3/prompt_templates/system_prompts.py CHANGED Viewed

@@ -15,7 +15,7 @@ import textwrap
 from datetime import datetime
 from typing import Any
-from llama_stack.apis.inference import (
+from llama_stack.models.llama.datatypes import (
     BuiltinTool,
     ToolDefinition,
 )

llama_stack/models/llama/llama3/tool_utils.py CHANGED Viewed

@@ -8,8 +8,9 @@ import json
 import re
 from llama_stack.log import get_logger
+from llama_stack.models.llama.datatypes import BuiltinTool, ToolCall, ToolPromptFormat
-from ..datatypes import BuiltinTool, RecursiveType, ToolCall, ToolPromptFormat
+from ..datatypes import RecursiveType
 logger = get_logger(name=__name__, category="models::llama")

llama_stack/models/llama/llama4/prompt_templates/system_prompts.py CHANGED Viewed

@@ -13,7 +13,7 @@
 import textwrap
-from llama_stack.apis.inference import ToolDefinition
+from llama_stack.models.llama.datatypes import ToolDefinition
 from llama_stack.models.llama.llama3.prompt_templates.base import (
     PromptTemplate,
     PromptTemplateGeneratorBase,

llama_stack/providers/inline/agents/meta_reference/__init__.py CHANGED Viewed

@@ -15,7 +15,6 @@ async def get_provider_impl(
     config: MetaReferenceAgentsImplConfig,
     deps: dict[Api, Any],
     policy: list[AccessRule],
-    telemetry_enabled: bool = False,
 ):
     from .agents import MetaReferenceAgentsImpl
@@ -23,12 +22,13 @@ async def get_provider_impl(
         config,
         deps[Api.inference],
         deps[Api.vector_io],
-        deps[Api.safety],
+        deps.get(Api.safety),
         deps[Api.tool_runtime],
         deps[Api.tool_groups],
         deps[Api.conversations],
+        deps[Api.prompts],
+        deps[Api.files],
         policy,
-        telemetry_enabled,
     )
     await impl.initialize()
     return impl

llama-stack 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

llama-stack 0.3.5py3-none-any.whl → 0.4.1py3-none-any.whl