PyPI - llama-stack - Versions diffs - 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (738) hide show

llama_stack/distributions/watsonx/watsonx.py ADDED Viewed

@@ -0,0 +1,95 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
+from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
+from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
+from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
+def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
+    providers = {
+        "inference": [
+            BuildProvider(provider_type="remote::watsonx"),
+            BuildProvider(provider_type="inline::sentence-transformers"),
+        ],
+        "vector_io": [BuildProvider(provider_type="inline::faiss")],
+        "safety": [BuildProvider(provider_type="inline::llama-guard")],
+        "agents": [BuildProvider(provider_type="inline::meta-reference")],
+        "eval": [BuildProvider(provider_type="inline::meta-reference")],
+        "datasetio": [
+            BuildProvider(provider_type="remote::huggingface"),
+            BuildProvider(provider_type="inline::localfs"),
+        ],
+        "scoring": [
+            BuildProvider(provider_type="inline::basic"),
+            BuildProvider(provider_type="inline::llm-as-judge"),
+            BuildProvider(provider_type="inline::braintrust"),
+        ],
+        "tool_runtime": [
+            BuildProvider(provider_type="remote::brave-search"),
+            BuildProvider(provider_type="remote::tavily-search"),
+            BuildProvider(provider_type="inline::rag-runtime"),
+            BuildProvider(provider_type="remote::model-context-protocol"),
+        ],
+        "files": [BuildProvider(provider_type="inline::localfs")],
+    }
+    inference_provider = Provider(
+        provider_id="watsonx",
+        provider_type="remote::watsonx",
+        config=WatsonXConfig.sample_run_config(),
+    )
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+    ]
+    files_provider = Provider(
+        provider_id="meta-reference-files",
+        provider_type="inline::localfs",
+        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
+    return DistributionTemplate(
+        name=name,
+        distro_type="remote_hosted",
+        description="Use watsonx for running LLM inference",
+        container_image=None,
+        template_path=None,
+        providers=providers,
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider],
+                    "files": [files_provider],
+                },
+                default_models=[],
+                default_tool_groups=default_tool_groups,
+            ),
+        },
+        run_config_env_vars={
+            "LLAMASTACK_PORT": (
+                "5001",
+                "Port for the Llama Stack distribution server",
+            ),
+            "WATSONX_API_KEY": (
+                "",
+                "watsonx API Key",
+            ),
+            "WATSONX_PROJECT_ID": (
+                "",
+                "watsonx Project ID",
+            ),
+        },
+    )

llama_stack/env.py ADDED Viewed

@@ -0,0 +1,24 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import os
+class MissingCredentialError(Exception):
+    pass
+def get_env_or_fail(key: str) -> str:
+    """Get environment variable or raise helpful error"""
+    value = os.getenv(key)
+    if not value:
+        raise MissingCredentialError(
+            f"\nMissing {key} in environment. Please set it using one of these methods:"
+            f"\n1. Export in shell: export {key}=your-key"
+            f"\n2. Create .env file in project root with: {key}=your-key"
+            f"\n3. Pass directly to pytest: pytest --env {key}=your-key"
+        )
+    return value

llama_stack/log.py ADDED Viewed

@@ -0,0 +1,314 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import logging  # allow-direct-logging
+import os
+import re
+from logging.config import dictConfig  # allow-direct-logging
+from rich.console import Console
+from rich.errors import MarkupError
+from rich.logging import RichHandler
+from llama_stack.core.datatypes import LoggingConfig
+# Default log level
+DEFAULT_LOG_LEVEL = logging.INFO
+# Predefined categories
+CATEGORIES = [
+    "core",
+    "server",
+    "router",
+    "inference",
+    "agents",
+    "safety",
+    "eval",
+    "tools",
+    "client",
+    "telemetry",
+    "openai",
+    "openai_responses",
+    "openai_conversations",
+    "testing",
+    "providers",
+    "models",
+    "files",
+    "vector_io",
+    "tool_runtime",
+    "cli",
+    "post_training",
+    "scoring",
+    "tests",
+]
+UNCATEGORIZED = "uncategorized"
+# Initialize category levels with default level
+_category_levels: dict[str, int] = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
+def config_to_category_levels(category: str, level: str):
+    """
+    Helper function to be called either by environment parsing or yaml parsing to go from a list of categories and levels to a dictionary ready to be
+    used by the logger dictConfig.
+    Parameters:
+        category (str): logging category to apply the level to
+        level (str): logging level to be used in the category
+    Returns:
+        Dict[str, int]: A dictionary mapping categories to their log levels.
+    """
+    category_levels: dict[str, int] = {}
+    level_value = logging._nameToLevel.get(str(level).upper())
+    if level_value is None:
+        logging.warning(f"Unknown log level '{level}' for category '{category}'. Falling back to default 'INFO'.")
+        return category_levels
+    if category == "all":
+        # Apply the log level to all categories and the root logger
+        for cat in CATEGORIES:
+            category_levels[cat] = level_value
+        # Set the root logger's level to the specified level
+        category_levels["root"] = level_value
+    elif category in CATEGORIES:
+        category_levels[category] = level_value
+    else:
+        logging.warning(f"Unknown logging category: {category}. No changes made.")
+    return category_levels
+def parse_yaml_config(yaml_config: LoggingConfig) -> dict[str, int]:
+    """
+    Helper function to parse a yaml logging configuration found in the run.yaml
+    Parameters:
+        yaml_config (Logging): the logger config object found in the run.yaml
+    Returns:
+        Dict[str, int]: A dictionary mapping categories to their log levels.
+    """
+    category_levels = {}
+    for category, level in yaml_config.category_levels.items():
+        category_levels.update(config_to_category_levels(category=category, level=level))
+    return category_levels
+def parse_environment_config(env_config: str) -> dict[str, int]:
+    """
+    Parse the LLAMA_STACK_LOGGING environment variable and return a dictionary of category log levels.
+    Parameters:
+        env_config (str): The value of the LLAMA_STACK_LOGGING environment variable.
+    Returns:
+        Dict[str, int]: A dictionary mapping categories to their log levels.
+    """
+    category_levels = {}
+    delimiter = ","
+    for pair in env_config.split(delimiter):
+        if not pair.strip():
+            continue
+        try:
+            category, level = pair.split("=", 1)
+            category = category.strip().lower()
+            level = level.strip().upper()  # Convert to uppercase for logging._nameToLevel
+            category_levels.update(config_to_category_levels(category=category, level=level))
+        except ValueError:
+            logging.warning(f"Invalid logging configuration: '{pair}'. Expected format: 'category=level'.")
+    return category_levels
+def strip_rich_markup(text):
+    """Remove Rich markup tags like [dim], [bold magenta], etc."""
+    return re.sub(r"\[/?[a-zA-Z0-9 _#=,]+\]", "", text)
+class CustomRichHandler(RichHandler):
+    def __init__(self, *args, **kwargs):
+        # Set a reasonable default width for console output, especially when redirected to files
+        console_width = int(os.environ.get("LLAMA_STACK_LOG_WIDTH", "120"))
+        # Don't force terminal codes to avoid ANSI escape codes in log files
+        kwargs["console"] = Console(width=console_width)
+        super().__init__(*args, **kwargs)
+    def emit(self, record):
+        """Override emit to handle markup errors gracefully."""
+        try:
+            super().emit(record)
+        except MarkupError:
+            original_markup = self.markup
+            self.markup = False
+            try:
+                super().emit(record)
+            finally:
+                self.markup = original_markup
+class CustomFileHandler(logging.FileHandler):
+    def __init__(self, filename, mode="a", encoding=None, delay=False):
+        super().__init__(filename, mode, encoding, delay)
+        # Default formatter to match console output
+        self.default_formatter = logging.Formatter("%(asctime)s %(name)s:%(lineno)d %(category)s: %(message)s")
+        self.setFormatter(self.default_formatter)
+    def emit(self, record):
+        if hasattr(record, "msg"):
+            record.msg = strip_rich_markup(str(record.msg))
+        super().emit(record)
+def setup_logging(category_levels: dict[str, int] | None = None, log_file: str | None = None) -> None:
+    """
+    Configure logging based on the provided category log levels and an optional log file.
+    If category_levels or log_file are not provided, they will be read from environment variables.
+    Parameters:
+        category_levels (Dict[str, int] | None): A dictionary mapping categories to their log levels.
+            If None, reads from LLAMA_STACK_LOGGING environment variable and uses defaults.
+        log_file (str | None): Path to a log file to additionally pipe the logs into.
+            If None, reads from LLAMA_STACK_LOG_FILE environment variable.
+    """
+    # Read from environment variables if not explicitly provided
+    if category_levels is None:
+        category_levels = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
+        env_config = os.environ.get("LLAMA_STACK_LOGGING", "")
+        if env_config:
+            category_levels.update(parse_environment_config(env_config))
+    if log_file is None:
+        log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
+    log_format = "%(asctime)s %(name)s:%(lineno)d %(category)s: %(message)s"
+    class CategoryFilter(logging.Filter):
+        """Ensure category is always present in log records."""
+        def filter(self, record):
+            if not hasattr(record, "category"):
+                record.category = UNCATEGORIZED  # Default to 'uncategorized' if no category found
+            return True
+    # Determine the root logger's level (default to WARNING if not specified)
+    root_level = category_levels.get("root", logging.WARNING)
+    handlers = {
+        "console": {
+            "()": CustomRichHandler,  # Use custom console handler
+            "formatter": "rich",
+            "rich_tracebacks": True,
+            "show_time": False,
+            "show_path": False,
+            "markup": True,
+            "filters": ["category_filter"],
+        }
+    }
+    # Add a file handler if log_file is set
+    if log_file:
+        handlers["file"] = {
+            "()": CustomFileHandler,
+            "filename": log_file,
+            "mode": "a",
+            "encoding": "utf-8",
+        }
+    logging_config = {
+        "version": 1,
+        "disable_existing_loggers": False,
+        "formatters": {
+            "rich": {
+                "()": logging.Formatter,
+                "format": log_format,
+            }
+        },
+        "handlers": handlers,
+        "filters": {
+            "category_filter": {
+                "()": CategoryFilter,
+            }
+        },
+        "loggers": {
+            **{
+                category: {
+                    "handlers": list(handlers.keys()),  # Apply all handlers
+                    "level": category_levels.get(category, DEFAULT_LOG_LEVEL),
+                    "propagate": False,  # Disable propagation to root logger
+                }
+                for category in CATEGORIES
+            },
+            # Explicitly configure uvicorn loggers to preserve their INFO level
+            "uvicorn": {
+                "handlers": list(handlers.keys()),
+                "level": logging.INFO,
+                "propagate": False,
+            },
+            "uvicorn.error": {
+                "handlers": list(handlers.keys()),
+                "level": logging.INFO,
+                "propagate": False,
+            },
+            "uvicorn.access": {
+                "handlers": list(handlers.keys()),
+                "level": logging.INFO,
+                "propagate": False,
+            },
+        },
+        "root": {
+            "handlers": list(handlers.keys()),
+            "level": root_level,  # Set root logger's level dynamically
+        },
+    }
+    dictConfig(logging_config)
+    # Ensure third-party libraries follow the root log level, but preserve
+    # already-configured loggers (e.g., uvicorn) and our own llama_stack loggers
+    for name, logger in logging.root.manager.loggerDict.items():
+        if isinstance(logger, logging.Logger):
+            # Skip infrastructure loggers (uvicorn, fastapi) and our own loggers
+            if name.startswith(("uvicorn", "fastapi", "llama_stack")):
+                continue
+            logger.setLevel(root_level)
+def get_logger(
+    name: str, category: str = "uncategorized", config: LoggingConfig | None | None = None
+) -> logging.LoggerAdapter:
+    """
+    Returns a logger with the specified name and category.
+    If no category is provided, defaults to 'uncategorized'.
+    Parameters:
+        name (str): The name of the logger (e.g., module or filename).
+        category (str): The category of the logger (default 'uncategorized').
+        config (Logging): optional yaml config to override the existing logger configuration
+    Returns:
+        logging.LoggerAdapter: Configured logger with category support.
+    """
+    if config:
+        _category_levels.update(parse_yaml_config(config))
+    logger = logging.getLogger(name)
+    if category in _category_levels:
+        log_level = _category_levels[category]
+    else:
+        root_category = category.split("::")[0]
+        if root_category in _category_levels:
+            log_level = _category_levels[root_category]
+        else:
+            if category != UNCATEGORIZED:
+                raise ValueError(
+                    f"Unknown logging category: {category}. To resolve, choose a valid category from the CATEGORIES list "
+                    f"or add it to the CATEGORIES list. Available categories: {CATEGORIES}"
+                )
+            log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
+    logger.setLevel(log_level)
+    return logging.LoggerAdapter(logger, {"category": category})

llama_stack/models/llama/checkpoint.py ADDED Viewed

@@ -0,0 +1,164 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import concurrent.futures
+import re
+from pathlib import Path
+from typing import Any
+import numpy as np
+import torch
+from fairscale.nn.model_parallel.initialize import get_model_parallel_rank, get_model_parallel_world_size
+def map_mp_rank(old_mp_size: int, new_mp_size: int, new_mp_rank: int) -> list[int]:
+    """Map a new MP rank to a list of old MP ranks given a change in MP size."""
+    if new_mp_size % old_mp_size == 0:
+        # Read old MP shard and split it into smaller ones
+        return [new_mp_rank * old_mp_size // new_mp_size]
+    elif old_mp_size % new_mp_size == 0:
+        # Merge old MP shards into a single one
+        mp_factor = old_mp_size // new_mp_size
+        return list(range(new_mp_rank * mp_factor, (new_mp_rank + 1) * mp_factor))
+    else:
+        raise ValueError(
+            f"Either old MP size or new MP size should be a multiple of the other: "
+            f"{old_mp_size} % {new_mp_size} != 0 and {new_mp_size} % {old_mp_size} != 0"
+        )
+def maybe_reshard_state_dict(
+    ckpt_paths: list[Path],
+    n_kv_heads: int,
+    moe_num_experts: int | None = None,
+    map_location: str | torch.device = "cpu",
+    mmap: bool = True,
+) -> dict[str, torch.Tensor]:
+    if str(map_location) == "cpu":
+        torch.set_default_tensor_type(torch.BFloat16Tensor)
+    else:
+        torch.set_default_tensor_type(torch.cuda.BFloat16Tensor)
+    ckpt_paths = np.array(sorted(ckpt_paths))
+    new_mp_size, new_mp_rank = get_model_parallel_world_size(), get_model_parallel_rank()
+    old_mp_size = len(ckpt_paths)
+    old_mp_ranks = map_mp_rank(old_mp_size, new_mp_size, new_mp_rank)
+    print(f"Loading checkpoint shards:\n{str(ckpt_paths[old_mp_ranks])}")  # type: ignore
+    paths = ckpt_paths[old_mp_ranks]  # type: ignore
+    state_dicts = [torch.load(str(p), map_location=map_location, mmap=mmap) for p in paths]
+    if new_mp_size == old_mp_size:
+        return state_dicts[0]  # type: ignore
+    if moe_num_experts is not None:
+        state_dicts = [convert_moe_weights(d, moe_num_experts) for d in state_dicts]
+    print(f"Resharding {len(state_dicts)} state dicts from MP size {old_mp_size} to MP size {new_mp_size}")
+    return reshard_mp(
+        state_dicts,
+        size=max(new_mp_size // old_mp_size, 1),
+        rank=new_mp_rank % max(new_mp_size // old_mp_size, 1),
+        repeat_qk_qv=max(new_mp_size // n_kv_heads, 1),
+    )
+_WEIGHT_ROW_KEY = {
+    "feed_forward.w2",
+    "feed_forward.mlp.fc2",
+    "attention.wo",
+    "feed_forward.mlp.fc2_weight",
+    "feed_forward.w_out_shared_DF.weight",
+    "attn.wo.weight",
+    "mlp.c_proj.weight",
+}
+_MOE_WEIGHT_ROW_KEY = {"feed_forward.experts.(moe_w_in_eD_F|moe_w_swiglu_eD_F)"}
+_WEIGHT_COLUMN_KEY = {
+    "output",
+    "feed_forward.(w1|w3)",
+    "feed_forward.mlp.(fc1|fc3)",
+    "feed_forward.mlp.fc1_weight",
+    "attention.(wk|wq|wv|wqkv).weight",
+    "feed_forward.(w_in_shared_FD|w_swiglu_FD)",
+    "attn.(wk|wq|wv).weight",
+    "attn.(wk|wq|wv).bias",
+    "mlp.c_fc.weight",
+    "mlp.c_fc.bias",
+    "conv1._linear.weight",
+    "tok_embeddings.weight",
+    "vision_projection.weight",
+}
+_MOE_WEIGHT_COLUMN_KEY = {"feed_forward.experts.moe_w_out_eF_D"}
+def reshard_mp(
+    state_dicts: list[dict[str, torch.Tensor]],
+    size: int,
+    rank: int,
+    repeat_qk_qv: int = 1,
+) -> dict[str, torch.Tensor]:
+    """
+    Reshard a list of state dicts into a single state dict given a change in MP size.
+    If the list has more than one state dict, we concatenate the values of the same
+    key across all state dicts. Otherwise, we just slice it for the current MP rank.
+    """
+    def concat_or_chunk(tensors: list[torch.Tensor], dim: int) -> torch.Tensor:
+        if len(tensors) > 1:
+            return torch.cat(tensors, dim=dim)
+        return tensors[0].chunk(size, dim=dim)[rank].clone()
+    def process_key(key: str) -> torch.Tensor:
+        if row_regex.search(key):
+            return concat_or_chunk([s[key] for s in state_dicts], dim=-1)
+        elif column_regex.search(key):
+            if "w13" in key or "fc1_weight" in key:
+                dims = state_dicts[0][key].size()
+                values = [s[key].view(2, dims[0] // 2, *dims[1:]) for s in state_dicts]
+                return concat_or_chunk(values, dim=1).flatten(0, 1)
+            elif "qkv" in key:
+                q_dim = state_dicts[0][key.replace("qkv", "o")].size(1)
+                kv_dim = (state_dicts[0][key].size(0) - q_dim) // 2
+                values = [s[key].split((q_dim, kv_dim, kv_dim)) for s in state_dicts]
+                return torch.cat([concat_or_chunk(x, dim=0) for x in zip(*values, strict=False)])  # type: ignore
+            elif "wk.weight" in key or "wv.weight" in key:
+                # Support MP > #kv_head
+                return concat_or_chunk([s[key].repeat(repeat_qk_qv, 1) for s in state_dicts], dim=0)
+            elif key == "output.bias" or key == "fc.weight":
+                return concat_or_chunk([s[key] for s in state_dicts], dim=0)
+            elif "w_" in key:
+                return concat_or_chunk([s[key] for s in state_dicts], dim=-2)
+            else:
+                return concat_or_chunk([s[key] for s in state_dicts], dim=0)
+        else:
+            return state_dicts[0][key].clone()
+    row_keys = _WEIGHT_ROW_KEY | _MOE_WEIGHT_ROW_KEY
+    column_keys = _WEIGHT_COLUMN_KEY | _MOE_WEIGHT_COLUMN_KEY
+    column_regex = re.compile("|".join(column_keys))
+    row_regex = re.compile("|".join(row_keys))
+    output: dict[str, torch.Tensor] = {}
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        # Note: only processes keys in the first state dict.
+        # Assumes keys are the same across all state dicts.
+        mappings = {executor.submit(process_key, key): key for key in state_dicts[0]}
+        for future in concurrent.futures.as_completed(mappings):
+            output[mappings[future]] = future.result()
+    return output
+def convert_moe_weights(state_dict: dict[str, Any], num_experts: int) -> dict[str, Any]:
+    routed_keys = _MOE_WEIGHT_ROW_KEY | _MOE_WEIGHT_COLUMN_KEY
+    routed_regex = re.compile("|".join(routed_keys))
+    keys = list(state_dict.keys())
+    for key in keys:
+        if routed_regex.search(key):
+            state_dict[key] = state_dict.pop(key).unflatten(0, (num_experts, -1)).squeeze(dim=0)
+    return state_dict

llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl