PyPI - llama-stack - Versions diffs - 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (738) hide show

{llama_stack-0.0.42.dist-info → llama_stack-0.3.4.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.1.0)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

llama_stack/apis/agents/client.py DELETED Viewed

@@ -1,292 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import asyncio
-import json
-import os
-from typing import AsyncGenerator, Optional
-import fire
-import httpx
-from dotenv import load_dotenv
-from pydantic import BaseModel
-from termcolor import cprint
-from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_stack.distribution.datatypes import RemoteProviderConfig
-from .agents import *  # noqa: F403
-from .event_logger import EventLogger
-load_dotenv()
-async def get_client_impl(config: RemoteProviderConfig, _deps):
-    return AgentsClient(config.url)
-def encodable_dict(d: BaseModel):
-    return json.loads(d.json())
-class AgentsClient(Agents):
-    def __init__(self, base_url: str):
-        self.base_url = base_url
-    async def create_agent(self, agent_config: AgentConfig) -> AgentCreateResponse:
-        async with httpx.AsyncClient() as client:
-            response = await client.post(
-                f"{self.base_url}/agents/create",
-                json={
-                    "agent_config": encodable_dict(agent_config),
-                },
-                headers={"Content-Type": "application/json"},
-            )
-            response.raise_for_status()
-            return AgentCreateResponse(**response.json())
-    async def create_agent_session(
-        self,
-        agent_id: str,
-        session_name: str,
-    ) -> AgentSessionCreateResponse:
-        async with httpx.AsyncClient() as client:
-            response = await client.post(
-                f"{self.base_url}/agents/session/create",
-                json={
-                    "agent_id": agent_id,
-                    "session_name": session_name,
-                },
-                headers={"Content-Type": "application/json"},
-            )
-            response.raise_for_status()
-            return AgentSessionCreateResponse(**response.json())
-    def create_agent_turn(
-        self,
-        request: AgentTurnCreateRequest,
-    ) -> AsyncGenerator:
-        if request.stream:
-            return self._stream_agent_turn(request)
-        else:
-            return self._nonstream_agent_turn(request)
-    async def _stream_agent_turn(
-        self, request: AgentTurnCreateRequest
-    ) -> AsyncGenerator:
-        async with httpx.AsyncClient() as client:
-            async with client.stream(
-                "POST",
-                f"{self.base_url}/agents/turn/create",
-                json=encodable_dict(request),
-                headers={"Content-Type": "application/json"},
-                timeout=20,
-            ) as response:
-                async for line in response.aiter_lines():
-                    if line.startswith("data:"):
-                        data = line[len("data: ") :]
-                        try:
-                            jdata = json.loads(data)
-                            if "error" in jdata:
-                                cprint(data, "red")
-                                continue
-                            yield AgentTurnResponseStreamChunk(**jdata)
-                        except Exception as e:
-                            print(data)
-                            print(f"Error with parsing or validation: {e}")
-    async def _nonstream_agent_turn(self, request: AgentTurnCreateRequest):
-        raise NotImplementedError("Non-streaming not implemented yet")
-async def _run_agent(
-    api, model, tool_definitions, tool_prompt_format, user_prompts, attachments=None
-):
-    agent_config = AgentConfig(
-        model=model,
-        instructions="You are a helpful assistant",
-        sampling_params=SamplingParams(temperature=0.6, top_p=0.9),
-        tools=tool_definitions,
-        tool_choice=ToolChoice.auto,
-        tool_prompt_format=tool_prompt_format,
-        enable_session_persistence=False,
-    )
-    create_response = await api.create_agent(agent_config)
-    session_response = await api.create_agent_session(
-        agent_id=create_response.agent_id,
-        session_name="test_session",
-    )
-    for content in user_prompts:
-        cprint(f"User> {content}", color="white", attrs=["bold"])
-        iterator = api.create_agent_turn(
-            AgentTurnCreateRequest(
-                agent_id=create_response.agent_id,
-                session_id=session_response.session_id,
-                messages=[
-                    UserMessage(content=content),
-                ],
-                attachments=attachments,
-                stream=True,
-            )
-        )
-        async for event, log in EventLogger().log(iterator):
-            if log is not None:
-                log.print()
-async def run_llama_3_1(host: str, port: int, model: str = "Llama3.1-8B-Instruct"):
-    api = AgentsClient(f"http://{host}:{port}")
-    tool_definitions = [
-        SearchToolDefinition(
-            engine=SearchEngineType.brave,
-            api_key=os.getenv("BRAVE_SEARCH_API_KEY"),
-        ),
-        WolframAlphaToolDefinition(api_key=os.getenv("WOLFRAM_ALPHA_API_KEY")),
-        CodeInterpreterToolDefinition(),
-    ]
-    tool_definitions += [
-        FunctionCallToolDefinition(
-            function_name="get_boiling_point",
-            description="Get the boiling point of a imaginary liquids (eg. polyjuice)",
-            parameters={
-                "liquid_name": ToolParamDefinition(
-                    param_type="str",
-                    description="The name of the liquid",
-                    required=True,
-                ),
-                "celcius": ToolParamDefinition(
-                    param_type="str",
-                    description="Whether to return the boiling point in Celcius",
-                    required=False,
-                ),
-            },
-        ),
-    ]
-    user_prompts = [
-        "Who are you?",
-        "what is the 100th prime number?",
-        "Search web for who was 44th President of USA?",
-        "Write code to check if a number is prime. Use that to check if 7 is prime",
-        "What is the boiling point of polyjuicepotion ?",
-    ]
-    await _run_agent(api, model, tool_definitions, ToolPromptFormat.json, user_prompts)
-async def run_llama_3_2_rag(host: str, port: int, model: str = "Llama3.2-3B-Instruct"):
-    api = AgentsClient(f"http://{host}:{port}")
-    urls = [
-        "memory_optimizations.rst",
-        "chat.rst",
-        "llama3.rst",
-        "datasets.rst",
-        "qat_finetune.rst",
-        "lora_finetune.rst",
-    ]
-    attachments = [
-        Attachment(
-            content=URL(
-                uri=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}"
-            ),
-            mime_type="text/plain",
-        )
-        for i, url in enumerate(urls)
-    ]
-    # Alternatively, you can pre-populate the memory bank with documents for example,
-    # using `llama_stack.memory.client`. Then you can grab the bank_id
-    # from the output of that run.
-    tool_definitions = [
-        MemoryToolDefinition(
-            max_tokens_in_context=2048,
-            memory_bank_configs=[],
-        ),
-    ]
-    user_prompts = [
-        "How do I use Lora?",
-        "Tell me briefly about llama3 and torchtune",
-    ]
-    await _run_agent(
-        api, model, tool_definitions, ToolPromptFormat.json, user_prompts, attachments
-    )
-async def run_llama_3_2(host: str, port: int, model: str = "Llama3.2-3B-Instruct"):
-    api = AgentsClient(f"http://{host}:{port}")
-    # zero shot tools for llama3.2 text models
-    tool_definitions = [
-        FunctionCallToolDefinition(
-            function_name="get_boiling_point",
-            description="Get the boiling point of a imaginary liquids (eg. polyjuice)",
-            parameters={
-                "liquid_name": ToolParamDefinition(
-                    param_type="str",
-                    description="The name of the liquid",
-                    required=True,
-                ),
-                "celcius": ToolParamDefinition(
-                    param_type="bool",
-                    description="Whether to return the boiling point in Celcius",
-                    required=False,
-                ),
-            },
-        ),
-        FunctionCallToolDefinition(
-            function_name="make_web_search",
-            description="Search the web / internet for more realtime information",
-            parameters={
-                "query": ToolParamDefinition(
-                    param_type="str",
-                    description="the query to search for",
-                    required=True,
-                ),
-            },
-        ),
-    ]
-    user_prompts = [
-        "Who are you?",
-        "what is the 100th prime number?",
-        "Who was 44th President of USA?",
-        # multiple tool calls in a single prompt
-        "What is the boiling point of polyjuicepotion and pinkponklyjuice?",
-    ]
-    await _run_agent(
-        api, model, tool_definitions, ToolPromptFormat.python_list, user_prompts
-    )
-def main(host: str, port: int, run_type: str, model: Optional[str] = None):
-    assert run_type in [
-        "tools_llama_3_1",
-        "tools_llama_3_2",
-        "rag_llama_3_2",
-    ], f"Invalid run type {run_type}, must be one of tools_llama_3_1, tools_llama_3_2, rag_llama_3_2"
-    fn = {
-        "tools_llama_3_1": run_llama_3_1,
-        "tools_llama_3_2": run_llama_3_2,
-        "rag_llama_3_2": run_llama_3_2_rag,
-    }
-    args = [host, port]
-    if model is not None:
-        args.append(model)
-    asyncio.run(fn[run_type](*args))
-if __name__ == "__main__":
-    fire.Fire(main)

llama_stack/apis/agents/event_logger.py DELETED Viewed

@@ -1,184 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from typing import Optional
-from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_models.llama3.api.tool_utils import ToolUtils
-from termcolor import cprint
-from llama_stack.apis.agents import AgentTurnResponseEventType, StepType
-class LogEvent:
-    def __init__(
-        self,
-        role: Optional[str] = None,
-        content: str = "",
-        end: str = "\n",
-        color="white",
-    ):
-        self.role = role
-        self.content = content
-        self.color = color
-        self.end = "\n" if end is None else end
-    def __str__(self):
-        if self.role is not None:
-            return f"{self.role}> {self.content}"
-        else:
-            return f"{self.content}"
-    def print(self, flush=True):
-        cprint(f"{str(self)}", color=self.color, end=self.end, flush=flush)
-EventType = AgentTurnResponseEventType
-class EventLogger:
-    async def log(
-        self,
-        event_generator,
-        stream=True,
-        tool_prompt_format: ToolPromptFormat = ToolPromptFormat.json,
-    ):
-        previous_event_type = None
-        previous_step_type = None
-        async for chunk in event_generator:
-            if not hasattr(chunk, "event"):
-                # Need to check for custom tool first
-                # since it does not produce event but instead
-                # a Message
-                if isinstance(chunk, ToolResponseMessage):
-                    yield chunk, LogEvent(
-                        role="CustomTool", content=chunk.content, color="grey"
-                    )
-                continue
-            event = chunk.event
-            event_type = event.payload.event_type
-            if event_type in {
-                EventType.turn_start.value,
-                EventType.turn_complete.value,
-            }:
-                # Currently not logging any turn realted info
-                yield event, None
-                continue
-            step_type = event.payload.step_type
-            # handle safety
-            if (
-                step_type == StepType.shield_call
-                and event_type == EventType.step_complete.value
-            ):
-                violation = event.payload.step_details.violation
-                if not violation:
-                    yield event, LogEvent(
-                        role=step_type, content="No Violation", color="magenta"
-                    )
-                else:
-                    yield event, LogEvent(
-                        role=step_type,
-                        content=f"{violation.metadata} {violation.user_message}",
-                        color="red",
-                    )
-            # handle inference
-            if step_type == StepType.inference:
-                if stream:
-                    if event_type == EventType.step_start.value:
-                        # TODO: Currently this event is never received
-                        yield event, LogEvent(
-                            role=step_type, content="", end="", color="yellow"
-                        )
-                    elif event_type == EventType.step_progress.value:
-                        # HACK: if previous was not step/event was not inference's step_progress
-                        # this is the first time we are getting model inference response
-                        # aka equivalent to step_start for inference. Hence,
-                        # start with "Model>".
-                        if (
-                            previous_event_type != EventType.step_progress.value
-                            and previous_step_type != StepType.inference
-                        ):
-                            yield event, LogEvent(
-                                role=step_type, content="", end="", color="yellow"
-                            )
-                        if event.payload.tool_call_delta:
-                            if isinstance(event.payload.tool_call_delta.content, str):
-                                yield event, LogEvent(
-                                    role=None,
-                                    content=event.payload.tool_call_delta.content,
-                                    end="",
-                                    color="cyan",
-                                )
-                        else:
-                            yield event, LogEvent(
-                                role=None,
-                                content=event.payload.model_response_text_delta,
-                                end="",
-                                color="yellow",
-                            )
-                    else:
-                        # step_complete
-                        yield event, LogEvent(role=None, content="")
-                else:
-                    # Not streaming
-                    if event_type == EventType.step_complete.value:
-                        response = event.payload.step_details.model_response
-                        if response.tool_calls:
-                            content = ToolUtils.encode_tool_call(
-                                response.tool_calls[0], tool_prompt_format
-                            )
-                        else:
-                            content = response.content
-                        yield event, LogEvent(
-                            role=step_type,
-                            content=content,
-                            color="yellow",
-                        )
-            # handle tool_execution
-            if (
-                step_type == StepType.tool_execution
-                and
-                # Only print tool calls and responses at the step_complete event
-                event_type == EventType.step_complete.value
-            ):
-                details = event.payload.step_details
-                for t in details.tool_calls:
-                    yield event, LogEvent(
-                        role=step_type,
-                        content=f"Tool:{t.tool_name} Args:{t.arguments}",
-                        color="green",
-                    )
-                for r in details.tool_responses:
-                    yield event, LogEvent(
-                        role=step_type,
-                        content=f"Tool:{r.tool_name} Response:{r.content}",
-                        color="green",
-                    )
-            if (
-                step_type == StepType.memory_retrieval
-                and event_type == EventType.step_complete.value
-            ):
-                details = event.payload.step_details
-                content = interleaved_text_media_as_str(details.inserted_context)
-                content = content[:200] + "..." if len(content) > 200 else content
-                yield event, LogEvent(
-                    role=step_type,
-                    content=f"Retrieved context from banks: {details.memory_bank_ids}.\n====\n{content}\n>",
-                    color="cyan",
-                )
-            preivous_event_type = event_type
-            previous_step_type = step_type

llama_stack/apis/batch_inference/batch_inference.py DELETED Viewed

@@ -1,72 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from typing import List, Optional, Protocol, runtime_checkable
-from llama_models.schema_utils import json_schema_type, webmethod
-from pydantic import BaseModel, Field
-from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_stack.apis.inference import *  # noqa: F403
-@json_schema_type
-class BatchCompletionRequest(BaseModel):
-    model: str
-    content_batch: List[InterleavedTextMedia]
-    sampling_params: Optional[SamplingParams] = SamplingParams()
-    logprobs: Optional[LogProbConfig] = None
-@json_schema_type
-class BatchCompletionResponse(BaseModel):
-    completion_message_batch: List[CompletionMessage]
-@json_schema_type
-class BatchChatCompletionRequest(BaseModel):
-    model: str
-    messages_batch: List[List[Message]]
-    sampling_params: Optional[SamplingParams] = SamplingParams()
-    # zero-shot tool definitions as input to the model
-    tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
-    tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
-    tool_prompt_format: Optional[ToolPromptFormat] = Field(
-        default=ToolPromptFormat.json
-    )
-    logprobs: Optional[LogProbConfig] = None
-@json_schema_type
-class BatchChatCompletionResponse(BaseModel):
-    completion_message_batch: List[CompletionMessage]
-@runtime_checkable
-class BatchInference(Protocol):
-    @webmethod(route="/batch_inference/completion")
-    async def batch_completion(
-        self,
-        model: str,
-        content_batch: List[InterleavedTextMedia],
-        sampling_params: Optional[SamplingParams] = SamplingParams(),
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> BatchCompletionResponse: ...
-    @webmethod(route="/batch_inference/chat_completion")
-    async def batch_chat_completion(
-        self,
-        model: str,
-        messages_batch: List[List[Message]],
-        sampling_params: Optional[SamplingParams] = SamplingParams(),
-        # zero-shot tool definitions as input to the model
-        tools: Optional[List[ToolDefinition]] = list,
-        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
-        tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
-        logprobs: Optional[LogProbConfig] = None,
-    ) -> BatchChatCompletionResponse: ...

llama_stack/apis/common/deployment_types.py DELETED Viewed

@@ -1,31 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from enum import Enum
-from typing import Any, Dict, Optional
-from llama_models.llama3.api.datatypes import URL
-from llama_models.schema_utils import json_schema_type
-from pydantic import BaseModel
-@json_schema_type
-class RestAPIMethod(Enum):
-    GET = "GET"
-    POST = "POST"
-    PUT = "PUT"
-    DELETE = "DELETE"
-@json_schema_type
-class RestAPIExecutionConfig(BaseModel):
-    url: URL
-    method: RestAPIMethod
-    params: Optional[Dict[str, Any]] = None
-    headers: Optional[Dict[str, Any]] = None
-    body: Optional[Dict[str, Any]] = None

llama_stack/apis/dataset/dataset.py DELETED Viewed

@@ -1,63 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from enum import Enum
-from typing import Any, Dict, Optional, Protocol
-from llama_models.llama3.api.datatypes import URL
-from llama_models.schema_utils import json_schema_type, webmethod
-from pydantic import BaseModel
-@json_schema_type
-class TrainEvalDatasetColumnType(Enum):
-    dialog = "dialog"
-    text = "text"
-    media = "media"
-    number = "number"
-    json = "json"
-@json_schema_type
-class TrainEvalDataset(BaseModel):
-    """Dataset to be used for training or evaluating language models."""
-    # TODO(ashwin): figure out if we need to add an enum for a "dataset type"
-    columns: Dict[str, TrainEvalDatasetColumnType]
-    content_url: URL
-    metadata: Optional[Dict[str, Any]] = None
-@json_schema_type
-class CreateDatasetRequest(BaseModel):
-    """Request to create a dataset."""
-    uuid: str
-    dataset: TrainEvalDataset
-class Datasets(Protocol):
-    @webmethod(route="/datasets/create")
-    def create_dataset(
-        self,
-        uuid: str,
-        dataset: TrainEvalDataset,
-    ) -> None: ...
-    @webmethod(route="/datasets/get")
-    def get_dataset(
-        self,
-        dataset_uuid: str,
-    ) -> TrainEvalDataset: ...
-    @webmethod(route="/datasets/delete")
-    def delete_dataset(
-        self,
-        dataset_uuid: str,
-    ) -> None: ...

llama-stack 0.0.42__py3-none-any.whl → 0.3.4__py3-none-any.whl

llama-stack 0.0.42py3-none-any.whl → 0.3.4py3-none-any.whl