PyPI - agent-starter-pack - Versions diffs - 0.0.1b0__py3-none-any.whl - Mend

agent-starter-pack 0.0.1b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of agent-starter-pack might be problematic. Click here for more details.

Files changed (162) hide show

agents/langgraph_base_react/template/.templateconfig.yaml ADDED Viewed

@@ -0,0 +1,13 @@
+description: "A agent implementing a base ReAct agent using LangGraph"
+settings:
+  requires_data_ingestion: false
+  deployment_targets: ["agent_engine", "cloud_run"]
+  extra_dependencies: [
+    "langchain-google-vertexai~=2.0.7",
+    "langchain~=0.3.14",
+    "langgraph~=0.2.63",
+    "langchain-google-vertexai~=2.0.7",
+    "langchain~=0.3.14",
+    "langchain-community~=0.3.17",
+    "langchain-openai~=0.3.5",
+  ]

agents/langgraph_base_react/tests/integration/test_agent.py ADDED Viewed

@@ -0,0 +1,48 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# mypy: disable-error-code="union-attr"
+from app.agent import agent
+def test_agent_stream() -> None:
+    """
+    Integration test for the agent stream functionality.
+    Tests that the agent returns valid streaming responses.
+    """
+    input_dict = {
+        "messages": [
+            {"type": "human", "content": "Hi"},
+            {"type": "ai", "content": "Hi there!"},
+            {"type": "human", "content": "What's the weather in NY?"},
+        ]
+    }
+    events = [
+        message for message, _ in agent.stream(input_dict, stream_mode="messages")
+    ]
+    # Verify we get a reasonable number of messages
+    assert len(events) > 0, "Expected at least one message"
+    # First message should be an AI message
+    assert events[0].type == "AIMessageChunk"
+    # At least one message should have content
+    has_content = False
+    for event in events:
+        if hasattr(event, "content") and event.content:
+            has_content = True
+            break
+    assert has_content, "Expected at least one message with content"

agents/multimodal_live_api/README.md ADDED Viewed

@@ -0,0 +1,50 @@
+# Multimodal Live Agent
+This pattern showcases a real-time conversational RAG agent powered by Google Gemini. The agent handles audio, video, and text interactions while leveraging tool calling with a vector DB for grounded responses.
+![live_api_diagram](https://storage.googleapis.com/github-repo/generative-ai/sample-apps/e2e-gen-ai-app-starter-pack/live_api_diagram.png)
+**Key components:**
+- **Python Backend** (in `app/` folder): A production-ready server built with [FastAPI](https://fastapi.tiangolo.com/) and [google-genai](https://googleapis.github.io/python-genai/) that features:
+  - **Real-time bidirectional communication** via WebSockets between the frontend and Gemini model
+  - **Integrated tool calling** with vector database support for contextual document retrieval
+  - **Production-grade reliability** with retry logic and automatic reconnection capabilities
+  - **Deployment flexibility** supporting both AI Studio and Vertex AI endpoints
+  - **Feedback logging endpoint** for collecting user interactions
+- **React Frontend** (in `frontend/` folder): Extends the [Multimodal live API Web Console](https://github.com/google-gemini/multimodal-live-api-web-console), with added features like **custom URLs** and **feedback collection**.
+![live api demo](https://storage.googleapis.com/github-repo/generative-ai/sample-apps/e2e-gen-ai-app-starter-pack/live_api_pattern_demo.gif)
+## Usage
+1.  **Install Dependencies:**
+    ```bash
+    make install
+    ```
+2.  **Start the Backend and Frontend:**
+    Start the backend:
+    ```bash
+    make backend
+    ```
+    In a different shell, start the frontend:
+    ```bash
+    make ui
+    ```
+Once both the backend and frontend are running, click the play button in the frontend UI to establish a connection with the backend. You can now interact with the Multimodal Live Agent! You can try asking questions such as "Using the tool you have, define Governance in the context MLOPs" to allow the agent to use the [documentation](https://cloud.google.com/architecture/deploy-operate-generative-ai-applications) it was provided to.
+## Additional Resources for Multimodal Live API
+Explore these resources to learn more about the Multimodal Live API and see examples of its usage:
+- [Project Pastra](https://github.com/heiko-hotz/gemini-multimodal-live-dev-guide/tree/main): a comprehensive developer guide for the Gemini Multimodal Live API.
+- [Google Cloud Multimodal Live API demos and samples](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/gemini/multimodal-live-api): Collection of code samples and demo applications leveraging multimodal live API in Vertex AI
+- [Gemini 2 Cookbook](https://github.com/google-gemini/cookbook/tree/main/gemini-2): Practical examples and tutorials for working with Gemini 2
+- [Multimodal Live API Web Console](https://github.com/google-gemini/multimodal-live-api-web-console): Interactive React-based web interface for testing and experimenting with Gemini Multimodal Live API.

agents/multimodal_live_api/app/agent.py ADDED Viewed

@@ -0,0 +1,86 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import google
+import vertexai
+from google import genai
+from google.genai.types import (
+    Content,
+    FunctionDeclaration,
+    LiveConnectConfig,
+    Tool,
+)
+from langchain_google_vertexai import VertexAIEmbeddings
+from app.templates import FORMAT_DOCS, SYSTEM_INSTRUCTION
+from app.vector_store import get_vector_store
+# Constants
+VERTEXAI = os.getenv("VERTEXAI", "true").lower() == "true"
+LOCATION = "us-central1"
+EMBEDDING_MODEL = "text-embedding-004"
+MODEL_ID = "gemini-2.0-flash-001"
+URLS = [
+    "https://cloud.google.com/architecture/deploy-operate-generative-ai-applications"
+]
+# Initialize Google Cloud clients
+credentials, project_id = google.auth.default()
+vertexai.init(project=project_id, location=LOCATION)
+if VERTEXAI:
+    genai_client = genai.Client(project=project_id, location=LOCATION, vertexai=True)
+else:
+    # API key should be set using GOOGLE_API_KEY environment variable
+    genai_client = genai.Client(http_options={"api_version": "v1alpha"})
+# Initialize vector store and retriever
+embedding = VertexAIEmbeddings(model_name=EMBEDDING_MODEL)
+vector_store = get_vector_store(embedding=embedding, urls=URLS)
+retriever = vector_store.as_retriever()
+def retrieve_docs(query: str) -> dict[str, str]:
+    """
+    Retrieves pre-formatted documents about MLOps (Machine Learning Operations),
+      Gen AI lifecycle, and production deployment best practices.
+    Args:
+        query: Search query string related to MLOps, Gen AI, or production deployment.
+    Returns:
+        A set of relevant, pre-formatted documents.
+    """
+    docs = retriever.invoke(query)
+    formatted_docs = FORMAT_DOCS.format(docs=docs)
+    return {"output": formatted_docs}
+# Configure tools and live connection
+retrieve_docs_tool = Tool(
+    function_declarations=[
+        FunctionDeclaration.from_callable(client=genai_client, callable=retrieve_docs)
+    ]
+)
+tool_functions = {"retrieve_docs": retrieve_docs}
+live_connect_config = LiveConnectConfig(
+    response_modalities=["AUDIO"],
+    tools=[retrieve_docs_tool],
+    system_instruction=Content(parts=[{"text": SYSTEM_INSTRUCTION}]),
+)

agents/multimodal_live_api/app/server.py ADDED Viewed

@@ -0,0 +1,193 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import asyncio
+import json
+import logging
+from collections.abc import Callable
+from typing import Any, Literal
+import backoff
+from fastapi import FastAPI, WebSocket
+from fastapi.middleware.cors import CORSMiddleware
+from google.cloud import logging as google_cloud_logging
+from google.genai import types
+from google.genai.types import LiveServerToolCall
+from pydantic import BaseModel
+from websockets.exceptions import ConnectionClosedError
+from app.agent import MODEL_ID, genai_client, live_connect_config, tool_functions
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+logging_client = google_cloud_logging.Client()
+logger = logging_client.logger(__name__)
+logging.basicConfig(level=logging.INFO)
+class GeminiSession:
+    """Manages bidirectional communication between a client and the Gemini model."""
+    def __init__(
+        self, session: Any, websocket: WebSocket, tool_functions: dict[str, Callable]
+    ) -> None:
+        """Initialize the Gemini session.
+        Args:
+            session: The Gemini session
+            websocket: The client websocket connection
+            user_id: Unique identifier for this client
+            tool_functions: Dictionary of available tool functions
+        """
+        self.session = session
+        self.websocket = websocket
+        self.run_id = "n/a"
+        self.user_id = "n/a"
+        self.tool_functions = tool_functions
+    async def receive_from_client(self) -> None:
+        """Listen for and process messages from the client.
+        Continuously receives messages and forwards audio data to Gemini.
+        Handles connection errors gracefully.
+        """
+        while True:
+            try:
+                data = await self.websocket.receive_json()
+                if isinstance(data, dict) and (
+                    "realtimeInput" in data or "clientContent" in data
+                ):
+                    await self.session._ws.send(json.dumps(data))
+                elif "setup" in data:
+                    self.run_id = data["setup"]["run_id"]
+                    self.user_id = data["setup"]["user_id"]
+                    logging.info(f"Setup data: {data['setup']}")
+                else:
+                    logging.warning(f"Received unexpected input from client: {data}")
+            except ConnectionClosedError as e:
+                logging.warning(f"Client {self.user_id} closed connection: {e}")
+                break
+            except Exception as e:
+                logging.error(f"Error receiving from client {self.user_id}: {e!s}")
+                break
+    def _get_func(self, action_label: str) -> Callable | None:
+        """Get the tool function for a given action label."""
+        return None if action_label == "" else self.tool_functions.get(action_label)
+    async def _handle_tool_call(
+        self, session: Any, tool_call: LiveServerToolCall
+    ) -> None:
+        """Process tool calls from Gemini and send back responses.
+        Args:
+            session: The Gemini session
+            tool_call: Tool call request from Gemini
+        """
+        for fc in tool_call.function_calls:
+            logging.debug(f"Calling tool function: {fc.name} with args: {fc.args}")
+            response = self._get_func(fc.name)(**fc.args)
+            tool_response = types.LiveClientToolResponse(
+                function_responses=[
+                    types.FunctionResponse(name=fc.name, id=fc.id, response=response)
+                ]
+            )
+            logging.debug(f"Tool response: {tool_response}")
+            await session.send(tool_response)
+    async def receive_from_gemini(self) -> None:
+        """Listen for and process messages from Gemini.
+        Continuously receives messages from Gemini, forwards them to the client,
+        and handles any tool calls. Handles connection errors gracefully.
+        """
+        while result := await self.session._ws.recv(decode=False):
+            await self.websocket.send_bytes(result)
+            message = types.LiveServerMessage.model_validate(json.loads(result))
+            if message.tool_call:
+                tool_call = LiveServerToolCall.model_validate(message.tool_call)
+                await self._handle_tool_call(self.session, tool_call)
+def get_connect_and_run_callable(websocket: WebSocket) -> Callable:
+    """Create a callable that handles Gemini connection with retry logic.
+    Args:
+        websocket: The client websocket connection
+    Returns:
+        Callable: An async function that establishes and manages the Gemini connection
+    """
+    async def on_backoff(details: backoff._typing.Details) -> None:
+        await websocket.send_json(
+            {
+                "status": f"Model connection error, retrying in {details['wait']} seconds..."
+            }
+        )
+    @backoff.on_exception(
+        backoff.expo, ConnectionClosedError, max_tries=10, on_backoff=on_backoff
+    )
+    async def connect_and_run() -> None:
+        async with genai_client.aio.live.connect(
+            model=MODEL_ID, config=live_connect_config
+        ) as session:
+            await websocket.send_json({"status": "Backend is ready for conversation"})
+            gemini_session = GeminiSession(
+                session=session, websocket=websocket, tool_functions=tool_functions
+            )
+            logging.info("Starting bidirectional communication")
+            await asyncio.gather(
+                gemini_session.receive_from_client(),
+                gemini_session.receive_from_gemini(),
+            )
+    return connect_and_run
+@app.websocket("/ws")
+async def websocket_endpoint(websocket: WebSocket) -> None:
+    """Handle new websocket connections."""
+    await websocket.accept()
+    connect_and_run = get_connect_and_run_callable(websocket)
+    await connect_and_run()
+class Feedback(BaseModel):
+    """Represents feedback for a conversation."""
+    score: int | float
+    text: str | None = ""
+    run_id: str
+    user_id: str | None
+    log_type: Literal["feedback"] = "feedback"
+@app.post("/feedback")
+async def collect_feedback(feedback_dict: Feedback) -> None:
+    """Collect and log feedback."""
+    feedback_data = feedback_dict.model_dump()
+    logger.log_struct(feedback_data, severity="INFO")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="debug")

agents/multimodal_live_api/app/templates.py ADDED Viewed

@@ -0,0 +1,51 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from langchain_core.prompts import PromptTemplate
+FORMAT_DOCS = PromptTemplate.from_template(
+    """## Context provided:
+{% for doc in docs%}
+<Document {{ loop.index0 }}>
+{{ doc.page_content | safe }}
+</Document {{ loop.index0 }}>
+{% endfor %}
+""",
+    template_format="jinja2",
+)
+SYSTEM_INSTRUCTION = """You are "MLOps Expert," a specialized AI assistant designed to provide accurate and up-to-date information on Machine Learning Operations (MLOps), the lifecycle of Generative AI applications, and best practices for production deployment.
+Your primary knowledge source is a powerful search tool that provides access to the most current MLOps documentation and resources. **For any question related to MLOps, the lifecycle of Gen AI Apps, or best practices for production deployment, you MUST use this tool as your first and foremost source of information.**  Do not rely on your internal knowledge for these topics, as it may be outdated or incomplete.
+**Here's how you should operate:**
+1. **Analyze the User's Question:** Determine if the question falls within the domain of MLOps, Gen AI lifecycle, or production deployment best practices.
+2. **Prioritize Tool Usage:** If the question is within the defined domain, use the provided search tool to find relevant information.
+3. **Synthesize and Respond:** Craft a clear, concise, and informative answer based *solely* on the information retrieved from the tool.
+4. **Cite Sources (Optional):** If possible and relevant, indicate which part of the answer came from the tool. For example, you can say, "According to the documentation I found..." or provide links if applicable.
+5. **Out-of-Scope Questions:** If the question is outside the scope of MLOps, Gen AI, or production deployment, politely state that the topic is beyond your current expertise. For example: "My expertise is in MLOps, and that question seems to be about a different area. I'm not equipped to answer it accurately."
+**Your Persona:**
+*   You are an expert MLOps consultant, knowledgeable and up-to-date with the latest industry trends and best practices.
+*   You are helpful, professional, and eager to provide accurate information.
+*   You are concise and avoid unnecessary conversational filler. Get straight to the point.
+**Example Interaction:**
+**User:** "What are the best practices for monitoring a deployed ML model?"
+**MLOps Expert:** (Uses the tool to search for "monitoring deployed ML model") "According to the MLOps documentation I have access to, the best practices for monitoring a deployed ML model include tracking data drift, model performance degradation, and system health metrics. Key metrics to monitor are..." (continues with information found in the tool).
+"""

agents/multimodal_live_api/app/vector_store.py ADDED Viewed

@@ -0,0 +1,55 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+from langchain.schema import Document
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.document_loaders import WebBaseLoader
+from langchain_community.vectorstores import SKLearnVectorStore
+from langchain_core.embeddings import Embeddings
+PERSIST_PATH = ".persist_vector_store"
+def load_and_split_documents(urls: list[str]) -> list[Document]:
+    """Load and split documents from a list of URLs."""
+    docs = [WebBaseLoader(url).load() for url in urls]
+    docs_list = [item for sublist in docs for item in sublist]
+    logging.info(f"# of documents loaded (pre-chunking) = {len(docs_list)}")
+    text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=50)
+    doc_splits = text_splitter.split_documents(docs_list)
+    logging.info(f"# of documents after split = {len(doc_splits)}")
+    return doc_splits
+def get_vector_store(
+    embedding: Embeddings, urls: list[str], persist_path: str = PERSIST_PATH
+) -> SKLearnVectorStore:
+    """Get or create a vector store."""
+    if os.path.exists(persist_path):
+        vector_store = SKLearnVectorStore(
+            embedding=embedding, persist_path=persist_path
+        )
+    else:
+        doc_splits = load_and_split_documents(urls=urls)
+        vector_store = SKLearnVectorStore.from_documents(
+            documents=doc_splits, embedding=embedding, persist_path=persist_path
+        )
+        vector_store.persist()
+    return vector_store

agents/multimodal_live_api/template/.templateconfig.yaml ADDED Viewed

@@ -0,0 +1,15 @@
+description: "A real-time multimodal RAG agent powered by Gemini, supporting audio/video/text chat with vector DB-backed responses"
+settings:
+  requires_data_ingestion: false
+  frontend_type: "live_api_react"
+  deployment_targets: ["cloud_run"]
+  extra_dependencies: [
+    "backoff~=2.2.1",
+    "beautifulsoup4~=4.12.3",
+    "google-genai~=1.2.0",
+    "jinja2~=3.1.4",
+    "langchain~=0.3.13",
+    "langchain-community~=0.3.13",
+    "langchain-google-vertexai~=2.0.9",
+    "scikit-learn>=1.0.0,<2.0.0",
+  ]