PyPI - llama-stack - Versions diffs - 0.2.21__py3-none-any.whl → 0.2.22__py3-none-any.whl - Mend

llama-stack 0.2.21py3-none-any.whl → 0.2.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

llama_stack/apis/benchmarks/benchmarks.py CHANGED Viewed

@@ -93,3 +93,11 @@ class Benchmarks(Protocol):
         :param metadata: The metadata to use for the benchmark.
         """
         ...
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE")
+    async def unregister_benchmark(self, benchmark_id: str) -> None:
+        """Unregister a benchmark.
+        :param benchmark_id: The ID of the benchmark to unregister.
+        """
+        ...

llama_stack/apis/scoring_functions/scoring_functions.py CHANGED Viewed

@@ -197,3 +197,11 @@ class ScoringFunctions(Protocol):
         :param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval.
         """
         ...
+    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
+    async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
+        """Unregister a scoring function.
+        :param scoring_fn_id: The ID of the scoring function to unregister.
+        """
+        ...

llama_stack/cli/stack/_build.py CHANGED Viewed

@@ -45,6 +45,7 @@ from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.exec import formulate_run_args, run_command
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
+from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
 DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
@@ -294,6 +295,12 @@ def _generate_run_config(
         if build_config.external_providers_dir
         else EXTERNAL_PROVIDERS_DIR,
     )
+    if not run_config.inference_store:
+        run_config.inference_store = SqliteSqlStoreConfig(
+            **SqliteSqlStoreConfig.sample_run_config(
+                __distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db"
+            )
+        )
     # build providers dict
     provider_registry = get_provider_registry(build_config)
     for api in apis:

llama_stack/cli/verify_download.py CHANGED Viewed

@@ -48,15 +48,12 @@ def setup_verify_download_parser(parser: argparse.ArgumentParser) -> None:
     parser.set_defaults(func=partial(run_verify_cmd, parser=parser))
-def calculate_md5(filepath: Path, chunk_size: int = 8192) -> str:
-    # NOTE: MD5 is used here only for download integrity verification,
-    # not for security purposes
-    # TODO: switch to SHA256
-    md5_hash = hashlib.md5(usedforsecurity=False)
+def calculate_sha256(filepath: Path, chunk_size: int = 8192) -> str:
+    sha256_hash = hashlib.sha256()
     with open(filepath, "rb") as f:
         for chunk in iter(lambda: f.read(chunk_size), b""):
-            md5_hash.update(chunk)
-    return md5_hash.hexdigest()
+            sha256_hash.update(chunk)
+    return sha256_hash.hexdigest()
 def load_checksums(checklist_path: Path) -> dict[str, str]:
@@ -64,10 +61,10 @@ def load_checksums(checklist_path: Path) -> dict[str, str]:
     with open(checklist_path) as f:
         for line in f:
             if line.strip():
-                md5sum, filepath = line.strip().split("  ", 1)
+                sha256sum, filepath = line.strip().split("  ", 1)
                 # Remove leading './' if present
                 filepath = filepath.lstrip("./")
-                checksums[filepath] = md5sum
+                checksums[filepath] = sha256sum
     return checksums
@@ -88,7 +85,7 @@ def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -
             matches = False
             if exists:
-                actual_hash = calculate_md5(full_path)
+                actual_hash = calculate_sha256(full_path)
                 matches = actual_hash == expected_hash
             results.append(

llama_stack/core/datatypes.py CHANGED Viewed

@@ -431,6 +431,12 @@ class ServerConfig(BaseModel):
     )
+class InferenceStoreConfig(BaseModel):
+    sql_store_config: SqlStoreConfig
+    max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
+    num_writers: int = Field(default=4, description="Number of concurrent background writers")
 class StackRunConfig(BaseModel):
     version: int = LLAMA_STACK_RUN_CONFIG_VERSION
@@ -464,11 +470,12 @@ Configuration for the persistence store used by the distribution registry. If no
 a default SQLite store will be used.""",
     )
-    inference_store: SqlStoreConfig | None = Field(
+    inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
         default=None,
         description="""
-Configuration for the persistence store used by the inference API. If not specified,
-a default SQLite store will be used.""",
+Configuration for the persistence store used by the inference API. Can be either a
+InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
+If not specified, a default SQLite store will be used.""",
     )
     # registry of "resources" in the distribution

llama_stack/core/library_client.py CHANGED Viewed

@@ -10,7 +10,6 @@ import json
 import logging  # allow-direct-logging
 import os
 import sys
-from concurrent.futures import ThreadPoolExecutor
 from enum import Enum
 from io import BytesIO
 from pathlib import Path
@@ -148,7 +147,6 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
         self.async_client = AsyncLlamaStackAsLibraryClient(
             config_path_or_distro_name, custom_provider_registry, provider_data, skip_logger_removal
         )
-        self.pool_executor = ThreadPoolExecutor(max_workers=4)
         self.provider_data = provider_data
         self.loop = asyncio.new_event_loop()

llama_stack/core/routers/__init__.py CHANGED Viewed

@@ -78,7 +78,10 @@ async def get_auto_router_impl(
     # TODO: move pass configs to routers instead
     if api == Api.inference and run_config.inference_store:
-        inference_store = InferenceStore(run_config.inference_store, policy)
+        inference_store = InferenceStore(
+            config=run_config.inference_store,
+            policy=policy,
+        )
         await inference_store.initialize()
         api_to_dep_impl["store"] = inference_store

llama_stack/core/routers/inference.py CHANGED Viewed

@@ -63,7 +63,7 @@ from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
-from llama_stack.providers.utils.telemetry.tracing import get_current_span
+from llama_stack.providers.utils.telemetry.tracing import enqueue_event, get_current_span
 logger = get_logger(name=__name__, category="core::routers")
@@ -90,6 +90,11 @@ class InferenceRouter(Inference):
     async def shutdown(self) -> None:
         logger.debug("InferenceRouter.shutdown")
+        if self.store:
+            try:
+                await self.store.shutdown()
+            except Exception as e:
+                logger.warning(f"Error during InferenceStore shutdown: {e}")
     async def register_model(
         self,
@@ -160,7 +165,7 @@ class InferenceRouter(Inference):
         metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
         if self.telemetry:
             for metric in metrics:
-                await self.telemetry.log_event(metric)
+                enqueue_event(metric)
         return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
     async def _count_tokens(
@@ -431,7 +436,7 @@ class InferenceRouter(Inference):
                 model=model_obj,
             )
             for metric in metrics:
-                await self.telemetry.log_event(metric)
+                enqueue_event(metric)
             # these metrics will show up in the client response.
             response.metrics = (
@@ -537,7 +542,7 @@ class InferenceRouter(Inference):
                 model=model_obj,
             )
             for metric in metrics:
-                await self.telemetry.log_event(metric)
+                enqueue_event(metric)
             # these metrics will show up in the client response.
             response.metrics = (
                 metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
@@ -664,7 +669,7 @@ class InferenceRouter(Inference):
                             "completion_tokens",
                             "total_tokens",
                         ]:  # Only log completion and total tokens
-                            await self.telemetry.log_event(metric)
+                            enqueue_event(metric)
                         # Return metrics in response
                         async_metrics = [
@@ -710,7 +715,7 @@ class InferenceRouter(Inference):
             )
             for metric in completion_metrics:
                 if metric.metric in ["completion_tokens", "total_tokens"]:  # Only log completion and total tokens
-                    await self.telemetry.log_event(metric)
+                    enqueue_event(metric)
             # Return metrics in response
             return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics]
@@ -806,7 +811,7 @@ class InferenceRouter(Inference):
                             model=model,
                         )
                         for metric in metrics:
-                            await self.telemetry.log_event(metric)
+                            enqueue_event(metric)
                 yield chunk
         finally:

llama_stack/core/routing_tables/benchmarks.py CHANGED Viewed

@@ -56,3 +56,7 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
             provider_resource_id=provider_benchmark_id,
         )
         await self.register_object(benchmark)
+    async def unregister_benchmark(self, benchmark_id: str) -> None:
+        existing_benchmark = await self.get_benchmark(benchmark_id)
+        await self.unregister_object(existing_benchmark)

llama_stack/core/routing_tables/common.py CHANGED Viewed

@@ -64,6 +64,10 @@ async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
         return await p.unregister_shield(obj.identifier)
     elif api == Api.datasetio:
         return await p.unregister_dataset(obj.identifier)
+    elif api == Api.eval:
+        return await p.unregister_benchmark(obj.identifier)
+    elif api == Api.scoring:
+        return await p.unregister_scoring_function(obj.identifier)
     elif api == Api.tool_runtime:
         return await p.unregister_toolgroup(obj.identifier)
     else:

llama_stack/core/routing_tables/scoring_functions.py CHANGED Viewed

@@ -60,3 +60,7 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions):
         )
         scoring_fn.provider_id = provider_id
         await self.register_object(scoring_fn)
+    async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
+        existing_scoring_fn = await self.get_scoring_function(scoring_fn_id)
+        await self.unregister_object(existing_scoring_fn)

llama_stack/distributions/ci-tests/build.yaml CHANGED Viewed

@@ -17,6 +17,7 @@ distribution_spec:
     - provider_type: remote::vertexai
     - provider_type: remote::groq
     - provider_type: remote::sambanova
+    - provider_type: remote::azure
     - provider_type: inline::sentence-transformers
     vector_io:
     - provider_type: inline::faiss

llama_stack/distributions/ci-tests/run.yaml CHANGED Viewed

@@ -81,6 +81,13 @@ providers:
     config:
       url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
   vector_io:

llama_stack/distributions/starter/build.yaml CHANGED Viewed

@@ -18,6 +18,7 @@ distribution_spec:
     - provider_type: remote::vertexai
     - provider_type: remote::groq
     - provider_type: remote::sambanova
+    - provider_type: remote::azure
     - provider_type: inline::sentence-transformers
     vector_io:
     - provider_type: inline::faiss

llama_stack/distributions/starter/run.yaml CHANGED Viewed

@@ -81,6 +81,13 @@ providers:
     config:
       url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
   vector_io:

llama_stack/distributions/starter/starter.py CHANGED Viewed

@@ -59,6 +59,7 @@ ENABLED_INFERENCE_PROVIDERS = [
     "cerebras",
     "nvidia",
     "bedrock",
+    "azure",
 ]
 INFERENCE_PROVIDER_IDS = {
@@ -68,6 +69,7 @@ INFERENCE_PROVIDER_IDS = {
     "cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
     "nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
     "vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
+    "azure": "${env.AZURE_API_KEY:+azure}",
 }
@@ -277,5 +279,21 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
                 "http://localhost:11434",
                 "Ollama URL",
             ),
+            "AZURE_API_KEY": (
+                "",
+                "Azure API Key",
+            ),
+            "AZURE_API_BASE": (
+                "",
+                "Azure API Base",
+            ),
+            "AZURE_API_VERSION": (
+                "",
+                "Azure API Version",
+            ),
+            "AZURE_API_TYPE": (
+                "azure",
+                "Azure API Type",
+            ),
         },
     )

llama_stack/distributions/starter-gpu/build.yaml CHANGED Viewed

@@ -18,6 +18,7 @@ distribution_spec:
     - provider_type: remote::vertexai
     - provider_type: remote::groq
     - provider_type: remote::sambanova
+    - provider_type: remote::azure
     - provider_type: inline::sentence-transformers
     vector_io:
     - provider_type: inline::faiss

llama_stack/distributions/starter-gpu/run.yaml CHANGED Viewed

@@ -81,6 +81,13 @@ providers:
     config:
       url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
   vector_io:

llama_stack/distributions/watsonx/run.yaml CHANGED Viewed

@@ -10,6 +10,7 @@ apis:
 - telemetry
 - tool_runtime
 - vector_io
+- files
 providers:
   inference:
   - provider_id: watsonx
@@ -94,6 +95,14 @@ providers:
     provider_type: inline::rag-runtime
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files}
+      metadata_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/files_metadata.db
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db

llama_stack/distributions/watsonx/watsonx.py CHANGED Viewed

@@ -9,6 +9,7 @@ from pathlib import Path
 from llama_stack.apis.models import ModelType
 from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
+from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
@@ -16,7 +17,7 @@ from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
 from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
-def get_distribution_template() -> DistributionTemplate:
+def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
     providers = {
         "inference": [
             BuildProvider(provider_type="remote::watsonx"),
@@ -42,6 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
             BuildProvider(provider_type="inline::rag-runtime"),
             BuildProvider(provider_type="remote::model-context-protocol"),
         ],
+        "files": [BuildProvider(provider_type="inline::localfs")],
     }
     inference_provider = Provider(
@@ -79,9 +81,14 @@ def get_distribution_template() -> DistributionTemplate:
         },
     )
+    files_provider = Provider(
+        provider_id="meta-reference-files",
+        provider_type="inline::localfs",
+        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
     default_models, _ = get_model_registry(available_models)
     return DistributionTemplate(
-        name="watsonx",
+        name=name,
         distro_type="remote_hosted",
         description="Use watsonx for running LLM inference",
         container_image=None,
@@ -92,6 +99,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider, embedding_provider],
+                    "files": [files_provider],
                 },
                 default_models=default_models + [embedding_model],
                 default_tool_groups=default_tool_groups,

llama_stack/providers/inline/eval/meta_reference/eval.py CHANGED Viewed

@@ -75,6 +75,13 @@ class MetaReferenceEvalImpl(
         )
         self.benchmarks[task_def.identifier] = task_def
+    async def unregister_benchmark(self, benchmark_id: str) -> None:
+        if benchmark_id in self.benchmarks:
+            del self.benchmarks[benchmark_id]
+        key = f"{EVAL_TASKS_PREFIX}{benchmark_id}"
+        await self.kvstore.delete(key)
     async def run_eval(
         self,
         benchmark_id: str,

llama_stack/providers/inline/scoring/llm_as_judge/scoring.py CHANGED Viewed

@@ -63,6 +63,9 @@ class LlmAsJudgeScoringImpl(
     async def register_scoring_function(self, function_def: ScoringFn) -> None:
         self.llm_as_judge_fn.register_scoring_fn_def(function_def)
+    async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
+        self.llm_as_judge_fn.unregister_scoring_fn_def(scoring_fn_id)
     async def score_batch(
         self,
         dataset_id: str,

llama_stack/providers/inline/tool_runtime/rag/context_retriever.py CHANGED Viewed

@@ -8,7 +8,7 @@
 from jinja2 import Template
 from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.inference import UserMessage
+from llama_stack.apis.inference import OpenAIUserMessageParam
 from llama_stack.apis.tools.rag_tool import (
     DefaultRAGQueryGeneratorConfig,
     LLMRAGQueryGeneratorConfig,
@@ -61,16 +61,16 @@ async def llm_rag_query_generator(
         messages = [interleaved_content_as_str(content)]
     template = Template(config.template)
-    content = template.render({"messages": messages})
+    rendered_content: str = template.render({"messages": messages})
     model = config.model
-    message = UserMessage(content=content)
-    response = await inference_api.chat_completion(
-        model_id=model,
+    message = OpenAIUserMessageParam(content=rendered_content)
+    response = await inference_api.openai_chat_completion(
+        model=model,
         messages=[message],
         stream=False,
     )
-    query = response.completion_message.content
+    query = response.choices[0].message.content
     return query

llama-stack 0.2.21__py3-none-any.whl → 0.2.22__py3-none-any.whl

llama-stack 0.2.21py3-none-any.whl → 0.2.22py3-none-any.whl