PyPI - llama-stack - Versions diffs - 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (738) hide show

llama_stack/models/llama/sku_types.py ADDED Viewed

@@ -0,0 +1,233 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel, ConfigDict, Field
+class CheckpointQuantizationFormat(Enum):
+    # default format
+    bf16 = "bf16"
+    # used for enabling fp8_rowwise inference, some weights are bf16
+    fp8_mixed = "fp8-mixed"
+    int8 = "int8"
+    int4 = "int4"
+class ModelFamily(Enum):
+    llama2 = "llama2"
+    llama3 = "llama3"
+    llama3_1 = "llama3_1"
+    llama3_2 = "llama3_2"
+    llama3_3 = "llama3_3"
+    llama4 = "llama4"
+    safety = "safety"
+class CoreModelId(Enum):
+    """Each of these models is a unique "SKU". These root models can be served in various garbs (especially by quantizing them)"""
+    # Llama 2 family
+    llama2_7b = "Llama-2-7b"
+    llama2_13b = "Llama-2-13b"
+    llama2_70b = "Llama-2-70b"
+    llama2_7b_chat = "Llama-2-7b-chat"
+    llama2_13b_chat = "Llama-2-13b-chat"
+    llama2_70b_chat = "Llama-2-70b-chat"
+    # Llama 3 family
+    llama3_8b = "Llama-3-8B"
+    llama3_70b = "Llama-3-70B"
+    llama3_8b_instruct = "Llama-3-8B-Instruct"
+    llama3_70b_instruct = "Llama-3-70B-Instruct"
+    # Llama 3.1 family
+    llama3_1_8b = "Llama3.1-8B"
+    llama3_1_70b = "Llama3.1-70B"
+    llama3_1_405b = "Llama3.1-405B"
+    llama3_1_8b_instruct = "Llama3.1-8B-Instruct"
+    llama3_1_70b_instruct = "Llama3.1-70B-Instruct"
+    llama3_1_405b_instruct = "Llama3.1-405B-Instruct"
+    # Llama 3.2 family
+    llama3_2_1b = "Llama3.2-1B"
+    llama3_2_3b = "Llama3.2-3B"
+    llama3_2_1b_instruct = "Llama3.2-1B-Instruct"
+    llama3_2_3b_instruct = "Llama3.2-3B-Instruct"
+    llama3_2_11b_vision = "Llama3.2-11B-Vision"
+    llama3_2_90b_vision = "Llama3.2-90B-Vision"
+    llama3_2_11b_vision_instruct = "Llama3.2-11B-Vision-Instruct"
+    llama3_2_90b_vision_instruct = "Llama3.2-90B-Vision-Instruct"
+    # Llama 3.3 family
+    llama3_3_70b_instruct = "Llama3.3-70B-Instruct"
+    # Llama 4 family
+    llama4_scout_17b_16e = "Llama-4-Scout-17B-16E"
+    llama4_scout_17b_16e_instruct = "Llama-4-Scout-17B-16E-Instruct"
+    llama4_maverick_17b_128e = "Llama-4-Maverick-17B-128E"
+    llama4_maverick_17b_128e_instruct = "Llama-4-Maverick-17B-128E-Instruct"
+    # Safety models
+    llama_guard_3_8b = "Llama-Guard-3-8B"
+    llama_guard_2_8b = "Llama-Guard-2-8B"
+    llama_guard_3_11b_vision = "Llama-Guard-3-11B-Vision"
+    llama_guard_3_1b = "Llama-Guard-3-1B"
+    llama_guard_4_12b = "Llama-Guard-4-12B"
+def is_multimodal(model_id) -> bool:
+    if model_id in [
+        CoreModelId.llama3_2_11b_vision,
+        CoreModelId.llama3_2_90b_vision,
+        CoreModelId.llama3_2_11b_vision_instruct,
+        CoreModelId.llama3_2_90b_vision_instruct,
+    ]:
+        return True
+    else:
+        return False
+def model_family(model_id) -> ModelFamily:
+    if model_id in [
+        CoreModelId.llama2_7b,
+        CoreModelId.llama2_13b,
+        CoreModelId.llama2_70b,
+        CoreModelId.llama2_7b_chat,
+        CoreModelId.llama2_13b_chat,
+        CoreModelId.llama2_70b_chat,
+    ]:
+        return ModelFamily.llama2
+    elif model_id in [
+        CoreModelId.llama3_8b,
+        CoreModelId.llama3_70b,
+        CoreModelId.llama3_8b_instruct,
+        CoreModelId.llama3_70b_instruct,
+    ]:
+        return ModelFamily.llama3
+    elif model_id in [
+        CoreModelId.llama3_1_8b,
+        CoreModelId.llama3_1_70b,
+        CoreModelId.llama3_1_405b,
+        CoreModelId.llama3_1_8b_instruct,
+        CoreModelId.llama3_1_70b_instruct,
+        CoreModelId.llama3_1_405b_instruct,
+    ]:
+        return ModelFamily.llama3_1
+    elif model_id in [
+        CoreModelId.llama3_2_1b,
+        CoreModelId.llama3_2_3b,
+        CoreModelId.llama3_2_1b_instruct,
+        CoreModelId.llama3_2_3b_instruct,
+        CoreModelId.llama3_2_11b_vision,
+        CoreModelId.llama3_2_90b_vision,
+        CoreModelId.llama3_2_11b_vision_instruct,
+        CoreModelId.llama3_2_90b_vision_instruct,
+    ]:
+        return ModelFamily.llama3_2
+    elif model_id in [
+        CoreModelId.llama3_3_70b_instruct,
+    ]:
+        return ModelFamily.llama3_3
+    elif model_id in [
+        CoreModelId.llama4_scout_17b_16e,
+        CoreModelId.llama4_scout_17b_16e_instruct,
+        CoreModelId.llama4_maverick_17b_128e,
+        CoreModelId.llama4_maverick_17b_128e_instruct,
+    ]:
+        return ModelFamily.llama4
+    elif model_id in [
+        CoreModelId.llama_guard_3_8b,
+        CoreModelId.llama_guard_2_8b,
+        CoreModelId.llama_guard_3_11b_vision,
+        CoreModelId.llama_guard_3_1b,
+        CoreModelId.llama_guard_4_12b,
+    ]:
+        return ModelFamily.safety
+    else:
+        raise ValueError(f"Unknown model family for {model_id}")
+class Model(BaseModel):
+    core_model_id: CoreModelId
+    description: str
+    huggingface_repo: str | None = None
+    arch_args: dict[str, Any]
+    variant: str = ""
+    quantization_format: CheckpointQuantizationFormat = CheckpointQuantizationFormat.bf16
+    pth_file_count: int
+    metadata: dict[str, Any] = Field(default_factory=dict)
+    # silence pydantic until we remove the `model_` fields
+    model_config = ConfigDict(protected_namespaces=())
+    @property
+    def model_family(self) -> ModelFamily:
+        return model_family(self.core_model_id)
+    # The SKU is uniquely identified by (model_id, variant) combo
+    def descriptor(self, shorten_default_variant: bool = True) -> str:
+        if not self.variant:
+            return self.core_model_id.value
+        return f"{self.core_model_id.value}:{self.variant}"
+    @property
+    def is_instruct_model(self) -> bool:
+        return "instruct" in self.core_model_id.value
+    # Featured models are shown in the non-exhaustive model list
+    @property
+    def is_featured(self) -> bool:
+        return self.model_family in [
+            ModelFamily.llama3_1,
+            ModelFamily.llama3_2,
+            ModelFamily.llama3_3,
+            ModelFamily.llama4,
+            ModelFamily.safety,
+        ]
+    @property
+    def max_seq_length(self) -> int:
+        if self.model_family == ModelFamily.llama2:
+            return 4096
+        elif self.core_model_id == CoreModelId.llama_guard_2_8b:
+            return 4096
+        elif self.model_family == ModelFamily.llama3:
+            return 8192
+        elif self.model_family in [ModelFamily.llama3_1, ModelFamily.llama3_3]:
+            return 131072
+        elif self.model_family == ModelFamily.llama3_2:
+            if self.quantization_format == CheckpointQuantizationFormat.int4:
+                return 8192
+            return 131072
+        elif self.model_family == ModelFamily.llama4:
+            if self.core_model_id in {
+                CoreModelId.llama4_scout_17b_16e,
+                CoreModelId.llama4_maverick_17b_128e,
+            }:
+                return 262144
+            if self.core_model_id == CoreModelId.llama4_scout_17b_16e_instruct:
+                return 10485760
+            if self.core_model_id == CoreModelId.llama4_maverick_17b_128e_instruct:
+                return 1048576
+            raise AssertionError(f"Unexpected core model id: {self.core_model_id}")
+        elif self.core_model_id in [
+            CoreModelId.llama_guard_3_8b,
+            CoreModelId.llama_guard_3_11b_vision,
+            CoreModelId.llama_guard_3_1b,
+        ]:
+            return 131072
+        elif self.core_model_id == CoreModelId.llama_guard_4_12b:
+            return 8192
+        else:
+            raise ValueError(f"Unknown max_seq_len for {self.core_model_id}")

llama_stack/models/llama/tokenizer_utils.py ADDED Viewed

@@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import base64
+from pathlib import Path
+from llama_stack.log import get_logger
+logger = get_logger(__name__, "models")
+def load_bpe_file(model_path: Path) -> dict[bytes, int]:
+    """
+    Load BPE file directly and return mergeable ranks.
+    Args:
+        model_path (Path): Path to the BPE model file.
+    Returns:
+        dict[bytes, int]: Dictionary mapping byte sequences to their ranks.
+    """
+    mergeable_ranks = {}
+    with open(model_path, encoding="utf-8") as f:
+        content = f.read()
+    for line in content.splitlines():
+        if not line.strip():  # Skip empty lines
+            continue
+        try:
+            token, rank = line.split()
+            mergeable_ranks[base64.b64decode(token)] = int(rank)
+        except Exception as e:
+            logger.warning(f"Failed to parse line '{line}': {e}")
+            continue
+    return mergeable_ranks

llama_stack/providers/datatypes.py CHANGED Viewed

@@ -4,51 +4,96 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from enum import Enum
-from typing import Any, List, Optional, Protocol
+from enum import StrEnum
+from typing import Any, Protocol
+from urllib.parse import urlparse
-from llama_models.schema_utils import json_schema_type
 from pydantic import BaseModel, Field
-from llama_stack.apis.memory_banks import MemoryBankDef
+from llama_stack.apis.benchmarks import Benchmark
+from llama_stack.apis.datasets import Dataset
+from llama_stack.apis.datatypes import Api
+from llama_stack.apis.models import Model
+from llama_stack.apis.scoring_functions import ScoringFn
+from llama_stack.apis.shields import Shield
+from llama_stack.apis.tools import ToolGroup
+from llama_stack.apis.vector_stores import VectorStore
+from llama_stack.schema_utils import json_schema_type
-from llama_stack.apis.models import ModelDef
-from llama_stack.apis.shields import ShieldDef
+class ModelsProtocolPrivate(Protocol):
+    """
+    Protocol for model management.
-@json_schema_type
-class Api(Enum):
-    inference = "inference"
-    safety = "safety"
-    agents = "agents"
-    memory = "memory"
+    This allows users to register their preferred model identifiers.
-    telemetry = "telemetry"
+    Model registration requires -
+     - a provider, used to route the registration request
+     - a model identifier, user's intended name for the model during inference
+     - a provider model identifier, a model identifier supported by the provider
-    models = "models"
-    shields = "shields"
-    memory_banks = "memory_banks"
+    Providers will only accept registration for provider model ids they support.
-    # built-in API
-    inspect = "inspect"
+    Example,
+      register: provider x my-model-id x provider-model-id
+       -> Error if provider does not support provider-model-id
+       -> Error if my-model-id is already registered
+       -> Success if provider supports provider-model-id
+      inference: my-model-id x ...
+       -> Provider uses provider-model-id for inference
+    """
+    # this should be called `on_model_register` or something like that.
+    # the provider should _not_ be able to change the object in this
+    # callback
+    async def register_model(self, model: Model) -> Model: ...
-class ModelsProtocolPrivate(Protocol):
-    async def list_models(self) -> List[ModelDef]: ...
+    async def unregister_model(self, model_id: str) -> None: ...
-    async def register_model(self, model: ModelDef) -> None: ...
+    # the Stack router will query each provider for their list of models
+    # if a `refresh_interval_seconds` is provided, this method will be called
+    # periodically to refresh the list of models
+    #
+    # NOTE: each model returned will be registered with the model registry. this means
+    # a callback to the `register_model()` method will be made. this is duplicative and
+    # may be removed in the future.
+    async def list_models(self) -> list[Model] | None: ...
+    async def should_refresh_models(self) -> bool: ...
 class ShieldsProtocolPrivate(Protocol):
-    async def list_shields(self) -> List[ShieldDef]: ...
+    async def register_shield(self, shield: Shield) -> None: ...
+    async def unregister_shield(self, identifier: str) -> None: ...
+class VectorStoresProtocolPrivate(Protocol):
+    async def register_vector_store(self, vector_store: VectorStore) -> None: ...
+    async def unregister_vector_store(self, vector_store_id: str) -> None: ...
-    async def register_shield(self, shield: ShieldDef) -> None: ...
+class DatasetsProtocolPrivate(Protocol):
+    async def register_dataset(self, dataset: Dataset) -> None: ...
+    async def unregister_dataset(self, dataset_id: str) -> None: ...
-class MemoryBanksProtocolPrivate(Protocol):
-    async def list_memory_banks(self) -> List[MemoryBankDef]: ...
-    async def register_memory_bank(self, memory_bank: MemoryBankDef) -> None: ...
+class ScoringFunctionsProtocolPrivate(Protocol):
+    async def list_scoring_functions(self) -> list[ScoringFn]: ...
+    async def register_scoring_function(self, scoring_fn: ScoringFn) -> None: ...
+class BenchmarksProtocolPrivate(Protocol):
+    async def register_benchmark(self, benchmark: Benchmark) -> None: ...
+class ToolGroupsProtocolPrivate(Protocol):
+    async def register_toolgroup(self, toolgroup: ToolGroup) -> None: ...
+    async def unregister_toolgroup(self, toolgroup_id: str) -> None: ...
 @json_schema_type
@@ -59,130 +104,114 @@ class ProviderSpec(BaseModel):
         ...,
         description="Fully-qualified classname of the config for this provider",
     )
-    api_dependencies: List[Api] = Field(
+    api_dependencies: list[Api] = Field(
         default_factory=list,
         description="Higher-level API surfaces may depend on other providers to provide their functionality",
     )
+    optional_api_dependencies: list[Api] = Field(
+        default_factory=list,
+    )
+    deprecation_warning: str | None = Field(
+        default=None,
+        description="If this provider is deprecated, specify the warning message here",
+    )
+    deprecation_error: str | None = Field(
+        default=None,
+        description="If this provider is deprecated and does NOT work, specify the error message here",
+    )
-    # used internally by the resolver; this is a hack for now
-    deps__: List[str] = Field(default_factory=list)
-class RoutingTable(Protocol):
-    def get_provider_impl(self, routing_key: str) -> Any: ...
+    module: str | None = Field(
+        default=None,
+        description="""
+ Fully-qualified name of the module to import. The module is expected to have:
+  - `get_adapter_impl(config, deps)`: returns the adapter implementation
-@json_schema_type
-class AdapterSpec(BaseModel):
-    adapter_type: str = Field(
-        ...,
-        description="Unique identifier for this adapter",
+  Example: `module: ramalama_stack`
+ """,
     )
-    module: str = Field(
-        ...,
-        description="""
-Fully-qualified name of the module to import. The module is expected to have:
- - `get_adapter_impl(config, deps)`: returns the adapter implementation
-""",
-    )
-    pip_packages: List[str] = Field(
+    pip_packages: list[str] = Field(
         default_factory=list,
         description="The pip dependencies needed for this implementation",
     )
-    config_class: Optional[str] = Field(
-        default=None,
-        description="Fully-qualified classname of the config for this provider",
-    )
-    provider_data_validator: Optional[str] = Field(
+    provider_data_validator: str | None = Field(
         default=None,
     )
+    is_external: bool = Field(default=False, description="Notes whether this provider is an external provider.")
+    # used internally by the resolver; this is a hack for now
+    deps__: list[str] = Field(default_factory=list)
+    @property
+    def is_sample(self) -> bool:
+        return self.provider_type in ("sample", "remote::sample")
+class RoutingTable(Protocol):
+    async def get_provider_impl(self, routing_key: str) -> Any: ...
 @json_schema_type
 class InlineProviderSpec(ProviderSpec):
-    pip_packages: List[str] = Field(
-        default_factory=list,
-        description="The pip dependencies needed for this implementation",
-    )
-    docker_image: Optional[str] = Field(
+    container_image: str | None = Field(
         default=None,
         description="""
-The docker image to use for this implementation. If one is provided, pip_packages will be ignored.
-If a provider depends on other providers, the dependencies MUST NOT specify a docker image.
+The container image to use for this implementation. If one is provided, pip_packages will be ignored.
+If a provider depends on other providers, the dependencies MUST NOT specify a container image.
 """,
     )
-    module: str = Field(
-        ...,
+    description: str | None = Field(
+        default=None,
         description="""
-Fully-qualified name of the module to import. The module is expected to have:
- - `get_provider_impl(config, deps)`: returns the local implementation
+A description of the provider. This is used to display in the documentation.
 """,
     )
-    provider_data_validator: Optional[str] = Field(
-        default=None,
-    )
 class RemoteProviderConfig(BaseModel):
     host: str = "localhost"
-    port: int
+    port: int | None = None
+    protocol: str = "http"
     @property
     def url(self) -> str:
-        return f"http://{self.host}:{self.port}"
+        if self.port is None:
+            return f"{self.protocol}://{self.host}"
+        return f"{self.protocol}://{self.host}:{self.port}"
+    @classmethod
+    def from_url(cls, url: str) -> "RemoteProviderConfig":
+        parsed = urlparse(url)
+        attrs = {k: v for k, v in parsed._asdict().items() if v is not None}
+        return cls(**attrs)
 @json_schema_type
 class RemoteProviderSpec(ProviderSpec):
-    adapter: Optional[AdapterSpec] = Field(
+    adapter_type: str = Field(
+        ...,
+        description="Unique identifier for this adapter",
+    )
+    description: str | None = Field(
         default=None,
         description="""
-If some code is needed to convert the remote responses into Llama Stack compatible
-API responses, specify the adapter here. If not specified, it indicates the remote
-as being "Llama Stack compatible"
+A description of the provider. This is used to display in the documentation.
 """,
     )
     @property
-    def docker_image(self) -> Optional[str]:
+    def container_image(self) -> str | None:
         return None
-    @property
-    def module(self) -> str:
-        if self.adapter:
-            return self.adapter.module
-        return f"llama_stack.apis.{self.api.value}.client"
-    @property
-    def pip_packages(self) -> List[str]:
-        if self.adapter:
-            return self.adapter.pip_packages
-        return []
+class HealthStatus(StrEnum):
+    OK = "OK"
+    ERROR = "Error"
+    NOT_IMPLEMENTED = "Not Implemented"
-    @property
-    def provider_data_validator(self) -> Optional[str]:
-        if self.adapter:
-            return self.adapter.provider_data_validator
-        return None
-def is_passthrough(spec: ProviderSpec) -> bool:
-    return isinstance(spec, RemoteProviderSpec) and spec.adapter is None
-# Can avoid this by using Pydantic computed_field
-def remote_provider_spec(
-    api: Api, adapter: Optional[AdapterSpec] = None
-) -> RemoteProviderSpec:
-    config_class = (
-        adapter.config_class
-        if adapter and adapter.config_class
-        else "llama_stack.distribution.datatypes.RemoteProviderConfig"
-    )
-    provider_type = f"remote::{adapter.adapter_type}" if adapter else "remote"
-    return RemoteProviderSpec(
-        api=api, provider_type=provider_type, config_class=config_class, adapter=adapter
-    )
+HealthResponse = dict[str, Any]

llama_stack/providers/inline/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.

llama_stack/providers/inline/agents/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.

llama_stack/providers/{impls/meta_reference/agents → inline/agents/meta_reference}/__init__.py RENAMED Viewed

@@ -4,24 +4,31 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Dict
+from typing import Any
-from llama_stack.distribution.datatypes import Api, ProviderSpec
+from llama_stack.core.datatypes import AccessRule, Api
 from .config import MetaReferenceAgentsImplConfig
 async def get_provider_impl(
-    config: MetaReferenceAgentsImplConfig, deps: Dict[Api, ProviderSpec]
+    config: MetaReferenceAgentsImplConfig,
+    deps: dict[Api, Any],
+    policy: list[AccessRule],
+    telemetry_enabled: bool = False,
 ):
     from .agents import MetaReferenceAgentsImpl
     impl = MetaReferenceAgentsImpl(
         config,
         deps[Api.inference],
-        deps[Api.memory],
+        deps[Api.vector_io],
         deps[Api.safety],
-        deps[Api.memory_banks],
+        deps[Api.tool_runtime],
+        deps[Api.tool_groups],
+        deps[Api.conversations],
+        policy,
+        telemetry_enabled,
     )
     await impl.initialize()
     return impl

llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl