PyPI - lightspeed-stack - Versions diffs - 0.1.0__py3-none-any.whl - Mend

lightspeed-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

app/__init__.py +1 -0
app/endpoints/.ruff_cache/.gitignore +2 -0
app/endpoints/.ruff_cache/0.9.1/5703048272820174433 +0 -0
app/endpoints/.ruff_cache/0.9.1/9961612457335986079 +0 -0
app/endpoints/.ruff_cache/CACHEDIR.TAG +1 -0
app/endpoints/__init__.py +1 -0
app/endpoints/config.py +64 -0
app/endpoints/feedback.py +129 -0
app/endpoints/health.py +111 -0
app/endpoints/info.py +26 -0
app/endpoints/models.py +79 -0
app/endpoints/query.py +360 -0
app/endpoints/root.py +777 -0
app/endpoints/streaming_query.py +321 -0
app/main.py +38 -0
app/routers.py +30 -0
auth/__init__.py +38 -0
auth/interface.py +13 -0
auth/k8s.py +270 -0
auth/noop.py +42 -0
auth/noop_with_token.py +46 -0
auth/utils.py +26 -0
lightspeed_stack-0.1.0.dist-info/METADATA +443 -0
lightspeed_stack-0.1.0.dist-info/RECORD +43 -0
lightspeed_stack-0.1.0.dist-info/WHEEL +4 -0
lightspeed_stack-0.1.0.dist-info/entry_points.txt +4 -0
lightspeed_stack-0.1.0.dist-info/licenses/LICENSE +201 -0
models/__init__.py +1 -0
models/config.py +161 -0
models/requests.py +208 -0
models/responses.py +244 -0
runners/__init__.py +1 -0
runners/uvicorn.py +31 -0
utils/.ruff_cache/.gitignore +2 -0
utils/.ruff_cache/0.9.1/18446581155718949728 +0 -0
utils/.ruff_cache/0.9.1/4991844299736624256 +0 -0
utils/.ruff_cache/CACHEDIR.TAG +1 -0
utils/__init__.py +1 -0
utils/checks.py +27 -0
utils/common.py +111 -0
utils/endpoints.py +34 -0
utils/mcp_headers.py +48 -0
utils/suid.py +28 -0

app/endpoints/streaming_query.py ADDED Viewed

@@ -0,0 +1,321 @@
+"""Handler for REST API call to provide answer to streaming query."""
+import json
+import logging
+import re
+from typing import Any, AsyncIterator
+from cachetools import TTLCache  # type: ignore
+from llama_stack_client import APIConnectionError
+from llama_stack_client.lib.agents.agent import AsyncAgent  # type: ignore
+from llama_stack_client import AsyncLlamaStackClient  # type: ignore
+from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
+from llama_stack_client.types import UserMessage  # type: ignore
+from fastapi import APIRouter, HTTPException, Request, Depends, status
+from fastapi.responses import StreamingResponse
+from auth import get_auth_dependency
+from client import get_async_llama_stack_client
+from configuration import configuration
+from models.requests import QueryRequest
+from utils.endpoints import check_configuration_loaded, get_system_prompt
+from utils.common import retrieve_user_id
+from utils.mcp_headers import mcp_headers_dependency
+from utils.suid import get_suid
+from app.endpoints.query import (
+    get_rag_toolgroups,
+    is_transcripts_enabled,
+    store_transcript,
+    select_model_id,
+    validate_attachments_metadata,
+)
+logger = logging.getLogger("app.endpoints.handlers")
+router = APIRouter(tags=["streaming_query"])
+auth_dependency = get_auth_dependency()
+# Global agent registry to persist agents across requests
+_agent_cache: TTLCache[str, AsyncAgent] = TTLCache(maxsize=1000, ttl=3600)
+async def get_agent(
+    client: AsyncLlamaStackClient,
+    model_id: str,
+    system_prompt: str,
+    available_shields: list[str],
+    conversation_id: str | None,
+) -> tuple[AsyncAgent, str]:
+    """Get existing agent or create a new one with session persistence."""
+    if conversation_id is not None:
+        agent = _agent_cache.get(conversation_id)
+        if agent:
+            logger.debug("Reusing existing agent with key: %s", conversation_id)
+            return agent, conversation_id
+    logger.debug("Creating new agent")
+    agent = AsyncAgent(
+        client,  # type: ignore[arg-type]
+        model=model_id,
+        instructions=system_prompt,
+        input_shields=available_shields if available_shields else [],
+        enable_session_persistence=True,
+    )
+    conversation_id = await agent.create_session(get_suid())
+    _agent_cache[conversation_id] = agent
+    return agent, conversation_id
+METADATA_PATTERN = re.compile(r"\nMetadata: (\{.+})\n")
+def format_stream_data(d: dict) -> str:
+    """Format outbound data in the Event Stream Format."""
+    data = json.dumps(d)
+    return f"data: {data}\n\n"
+def stream_start_event(conversation_id: str) -> str:
+    """Yield the start of the data stream.
+    Args:
+        conversation_id: The conversation ID (UUID).
+    """
+    return format_stream_data(
+        {
+            "event": "start",
+            "data": {
+                "conversation_id": conversation_id,
+            },
+        }
+    )
+def stream_end_event(metadata_map: dict) -> str:
+    """Yield the end of the data stream."""
+    return format_stream_data(
+        {
+            "event": "end",
+            "data": {
+                "referenced_documents": [
+                    {
+                        "doc_url": v["docs_url"],
+                        "doc_title": v["title"],
+                    }
+                    for v in filter(
+                        lambda v: ("docs_url" in v) and ("title" in v),
+                        metadata_map.values(),
+                    )
+                ],
+                "truncated": None,  # TODO(jboos): implement truncated
+                "input_tokens": 0,  # TODO(jboos): implement input tokens
+                "output_tokens": 0,  # TODO(jboos): implement output tokens
+            },
+            "available_quotas": {},  # TODO(jboos): implement available quotas
+        }
+    )
+def stream_build_event(chunk: Any, chunk_id: int, metadata_map: dict) -> str | None:
+    """Build a streaming event from a chunk response.
+    This function processes chunks from the LLama Stack streaming response and formats
+    them into Server-Sent Events (SSE) format for the client. It handles two main
+    event types:
+    1. step_progress: Contains text deltas from the model inference process
+    2. step_complete: Contains information about completed tool execution steps
+    Args:
+        chunk: The streaming chunk from LLama Stack containing event data
+        chunk_id: The current chunk ID counter (gets incremented for each token)
+    Returns:
+        str | None: A formatted SSE data string with event information, or None if
+                   the chunk doesn't contain processable event data
+    """
+    # pylint: disable=R1702
+    if hasattr(chunk.event, "payload"):
+        if chunk.event.payload.event_type == "step_progress":
+            if hasattr(chunk.event.payload.delta, "text"):
+                text = chunk.event.payload.delta.text
+                return format_stream_data(
+                    {
+                        "event": "token",
+                        "data": {
+                            "id": chunk_id,
+                            "role": chunk.event.payload.step_type,
+                            "token": text,
+                        },
+                    }
+                )
+        if (
+            chunk.event.payload.event_type == "step_complete"
+            and chunk.event.payload.step_details.step_type == "tool_execution"
+        ):
+            for r in chunk.event.payload.step_details.tool_responses:
+                if r.tool_name == "knowledge_search" and r.content:
+                    for text_content_item in r.content:
+                        if isinstance(text_content_item, TextContentItem):
+                            for match in METADATA_PATTERN.findall(
+                                text_content_item.text
+                            ):
+                                meta = json.loads(match.replace("'", '"'))
+                                metadata_map[meta["document_id"]] = meta
+            if chunk.event.payload.step_details.tool_calls:
+                tool_name = str(
+                    chunk.event.payload.step_details.tool_calls[0].tool_name
+                )
+                return format_stream_data(
+                    {
+                        "event": "token",
+                        "data": {
+                            "id": chunk_id,
+                            "role": chunk.event.payload.step_type,
+                            "token": tool_name,
+                        },
+                    }
+                )
+    return None
+@router.post("/streaming_query")
+async def streaming_query_endpoint_handler(
+    _request: Request,
+    query_request: QueryRequest,
+    auth: Any = Depends(auth_dependency),
+    mcp_headers: dict[str, dict[str, str]] = Depends(mcp_headers_dependency),
+) -> StreamingResponse:
+    """Handle request to the /streaming_query endpoint."""
+    check_configuration_loaded(configuration)
+    llama_stack_config = configuration.llama_stack_configuration
+    logger.info("LLama stack config: %s", llama_stack_config)
+    try:
+        # try to get Llama Stack client
+        client = await get_async_llama_stack_client(llama_stack_config)
+        model_id = select_model_id(await client.models.list(), query_request)
+        response, conversation_id = await retrieve_response(
+            client,
+            model_id,
+            query_request,
+            auth,
+            mcp_headers=mcp_headers,
+        )
+        metadata_map: dict[str, dict[str, Any]] = {}
+        async def response_generator(turn_response: Any) -> AsyncIterator[str]:
+            """Generate SSE formatted streaming response."""
+            chunk_id = 0
+            complete_response = ""
+            # Send start event
+            yield stream_start_event(conversation_id)
+            async for chunk in turn_response:
+                if event := stream_build_event(chunk, chunk_id, metadata_map):
+                    complete_response += json.loads(event.replace("data: ", ""))[
+                        "data"
+                    ]["token"]
+                    chunk_id += 1
+                    yield event
+            yield stream_end_event(metadata_map)
+            if not is_transcripts_enabled():
+                logger.debug("Transcript collection is disabled in the configuration")
+            else:
+                store_transcript(
+                    user_id=retrieve_user_id(auth),
+                    conversation_id=conversation_id,
+                    query_is_valid=True,  # TODO(lucasagomes): implement as part of query validation
+                    query=query_request.query,
+                    query_request=query_request,
+                    response=complete_response,
+                    rag_chunks=[],  # TODO(lucasagomes): implement rag_chunks
+                    truncated=False,  # TODO(lucasagomes): implement truncation as part
+                    # of quota work
+                    attachments=query_request.attachments or [],
+                )
+        return StreamingResponse(response_generator(response))
+    # connection to Llama Stack server
+    except APIConnectionError as e:
+        logger.error("Unable to connect to Llama Stack: %s", e)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail={
+                "response": "Unable to connect to Llama Stack",
+                "cause": str(e),
+            },
+        ) from e
+async def retrieve_response(
+    client: AsyncLlamaStackClient,
+    model_id: str,
+    query_request: QueryRequest,
+    token: str,
+    mcp_headers: dict[str, dict[str, str]] | None = None,
+) -> tuple[Any, str]:
+    """Retrieve response from LLMs and agents."""
+    available_shields = [shield.identifier for shield in await client.shields.list()]
+    if not available_shields:
+        logger.info("No available shields. Disabling safety")
+    else:
+        logger.info("Available shields found: %s", available_shields)
+    # use system prompt from request or default one
+    system_prompt = get_system_prompt(query_request, configuration)
+    logger.debug("Using system prompt: %s", system_prompt)
+    # TODO(lucasagomes): redact attachments content before sending to LLM
+    # if attachments are provided, validate them
+    if query_request.attachments:
+        validate_attachments_metadata(query_request.attachments)
+    agent, conversation_id = await get_agent(
+        client,
+        model_id,
+        system_prompt,
+        available_shields,
+        query_request.conversation_id,
+    )
+    # preserve compatibility when mcp_headers is not provided
+    if mcp_headers is None:
+        mcp_headers = {}
+    if not mcp_headers and token:
+        for mcp_server in configuration.mcp_servers:
+            mcp_headers[mcp_server.url] = {
+                "Authorization": f"Bearer {token}",
+            }
+    agent.extra_headers = {
+        "X-LlamaStack-Provider-Data": json.dumps(
+            {
+                "mcp_headers": mcp_headers,
+            }
+        ),
+    }
+    logger.debug("Session ID: %s", conversation_id)
+    vector_db_ids = [
+        vector_db.identifier for vector_db in await client.vector_dbs.list()
+    ]
+    toolgroups = (get_rag_toolgroups(vector_db_ids) or []) + [
+        mcp_server.name for mcp_server in configuration.mcp_servers
+    ]
+    response = await agent.create_turn(
+        messages=[UserMessage(role="user", content=query_request.query)],
+        session_id=conversation_id,
+        documents=query_request.get_documents(),
+        stream=True,
+        toolgroups=toolgroups or None,
+    )
+    return response, conversation_id

app/main.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""Definition of FastAPI based web service."""
+from fastapi import FastAPI
+from app import routers
+import version
+from log import get_logger
+from configuration import configuration
+from utils.common import register_mcp_servers_async
+logger = get_logger(__name__)
+logger.info("Initializing app")
+service_name = configuration.configuration.name
+app = FastAPI(
+    title=f"{service_name} service - OpenAPI",
+    description=f"{service_name} service API specification.",
+    version=version.__version__,
+    license_info={
+        "name": "Apache 2.0",
+        "url": "https://www.apache.org/licenses/LICENSE-2.0.html",
+    },
+)
+logger.info("Including routers")
+routers.include_routers(app)
+@app.on_event("startup")
+async def startup_event() -> None:
+    """Perform logger setup on service startup."""
+    logger.info("Registering MCP servers")
+    await register_mcp_servers_async(logger, configuration.configuration)
+    get_logger("app.endpoints.handlers")
+    logger.info("App startup complete")

app/routers.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""REST API routers."""
+from fastapi import FastAPI
+from app.endpoints import (
+    info,
+    models,
+    root,
+    query,
+    health,
+    config,
+    feedback,
+    streaming_query,
+)
+def include_routers(app: FastAPI) -> None:
+    """Include FastAPI routers for different endpoints.
+    Args:
+        app: The `FastAPI` app instance.
+    """
+    app.include_router(root.router)
+    app.include_router(info.router, prefix="/v1")
+    app.include_router(models.router, prefix="/v1")
+    app.include_router(query.router, prefix="/v1")
+    app.include_router(health.router, prefix="/v1")
+    app.include_router(config.router, prefix="/v1")
+    app.include_router(feedback.router, prefix="/v1")
+    app.include_router(streaming_query.router, prefix="/v1")

auth/__init__.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""This package contains authentication code and modules."""
+import logging
+from auth.interface import AuthInterface
+from auth import noop, noop_with_token, k8s
+from configuration import configuration
+import constants
+logger = logging.getLogger(__name__)
+def get_auth_dependency(
+    virtual_path: str = constants.DEFAULT_VIRTUAL_PATH,
+) -> AuthInterface:
+    """Select the configured authentication dependency interface."""
+    module = configuration.authentication_configuration.module  # pyright: ignore
+    logger.debug(
+        "Initializing authentication dependency: module='%s', virtual_path='%s'",
+        module,
+        virtual_path,
+    )
+    match module:
+        case constants.AUTH_MOD_NOOP:
+            return noop.NoopAuthDependency(virtual_path=virtual_path)
+        case constants.AUTH_MOD_NOOP_WITH_TOKEN:
+            return noop_with_token.NoopWithTokenAuthDependency(
+                virtual_path=virtual_path
+            )
+        case constants.AUTH_MOD_K8S:
+            return k8s.K8SAuthDependency(virtual_path=virtual_path)
+        case _:
+            err_msg = f"Unsupported authentication module '{module}'"
+            logger.error(err_msg)
+            raise ValueError(err_msg)

auth/interface.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Abstract base class for authentication methods."""
+from abc import ABC, abstractmethod
+from fastapi import Request
+class AuthInterface(ABC):  # pylint: disable=too-few-public-methods
+    """Base class for all authentication method implementations."""
+    @abstractmethod
+    async def __call__(self, request: Request) -> tuple[str, str, str]:
+        """Validate FastAPI Requests for authentication and authorization."""