PyPI - clap-agents - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

clap-agents 0.1.1py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

clap/__init__.py +13 -42
clap/embedding/__init__.py +21 -0
clap/embedding/base_embedding.py +28 -0
clap/embedding/fastembed_embedding.py +75 -0
clap/embedding/ollama_embedding.py +76 -0
clap/embedding/sentence_transformer_embedding.py +44 -0
clap/llm_services/__init__.py +15 -0
clap/llm_services/base.py +3 -6
clap/llm_services/google_openai_compat_service.py +1 -5
clap/llm_services/groq_service.py +5 -13
clap/llm_services/ollama_service.py +101 -0
clap/mcp_client/client.py +7 -20
clap/multiagent_pattern/agent.py +107 -34
clap/multiagent_pattern/team.py +54 -29
clap/react_pattern/react_agent.py +339 -126
clap/tool_pattern/tool.py +94 -165
clap/tool_pattern/tool_agent.py +171 -171
clap/tools/__init__.py +1 -1
clap/tools/email_tools.py +16 -19
clap/tools/web_crawler.py +26 -18
clap/utils/completions.py +35 -37
clap/utils/extraction.py +3 -3
clap/utils/rag_utils.py +183 -0
clap/vector_stores/__init__.py +16 -0
clap/vector_stores/base.py +85 -0
clap/vector_stores/chroma_store.py +142 -0
clap/vector_stores/qdrant_store.py +155 -0
{clap_agents-0.1.1.dist-info → clap_agents-0.2.1.dist-info}/METADATA +201 -23
clap_agents-0.2.1.dist-info/RECORD +38 -0
clap_agents-0.1.1.dist-info/RECORD +0 -27
{clap_agents-0.1.1.dist-info → clap_agents-0.2.1.dist-info}/WHEEL +0 -0
{clap_agents-0.1.1.dist-info → clap_agents-0.2.1.dist-info}/licenses/LICENSE +0 -0

clap/tools/web_crawler.py CHANGED Viewed

@@ -1,17 +1,22 @@
 import asyncio
 import json
 import os
 from dotenv import load_dotenv
+from typing import Any
+from clap.tool_pattern.tool import tool
-from clap.tool_pattern.tool import tool
+_CRAWL4AI_AVAILABLE = False
+_AsyncWebCrawler_Placeholder_Type = Any
 try:
-    from crawl4ai import AsyncWebCrawler
+    from crawl4ai import AsyncWebCrawler as ImportedAsyncWebCrawler
+    _AsyncWebCrawler_Placeholder_Type = ImportedAsyncWebCrawler
+    _CRAWL4AI_AVAILABLE = True
 except ImportError:
-    raise ImportError("crawl4ai library not found. Please install it using: pip install crawl4ai")
+    pass
-load_dotenv()
+load_dotenv()
 @tool
 async def scrape_url(url: str) -> str:
@@ -24,9 +29,17 @@ async def scrape_url(url: str) -> str:
     Returns:
         The webpage content in markdown format or an error message.
     """
+    if not _CRAWL4AI_AVAILABLE:
+        raise ImportError("The 'crawl4ai' library is required for scrape_url. Install with 'pip install \"clap-agents[standard_tools]\"' or 'pip install crawl4ai'.")
     try:
-        async with AsyncWebCrawler() as crawler:
-            result = await crawler.arun(url=url)
+        crawler: _AsyncWebCrawler_Placeholder_Type = _AsyncWebCrawler_Placeholder_Type() # type: ignore
+        if not hasattr(crawler, 'arun') or not hasattr(crawler, 'close'): # Basic check
+             raise RuntimeError("AsyncWebCrawler from crawl4ai is not correctly initialized (likely due to missing dependency).")
+        async with crawler: # type: ignore
+            result = await crawler.arun(url=url) # type: ignore
             return result.markdown.raw_markdown if result.markdown else "No content found"
     except Exception as e:
         return f"Error scraping URL '{url}': {str(e)}"
@@ -44,32 +57,29 @@ async def extract_text_by_query(url: str, query: str, context_size: int = 300) -
     Returns:
         Relevant text snippets containing the query or a message indicating no matches/content.
     """
+    if not _CRAWL4AI_AVAILABLE:
+        raise ImportError("The 'crawl4ai' library is required for extract_text_by_query. Install with 'pip install \"clap-agents[standard_tools]\"' or 'pip install crawl4ai'.")
     try:
-        markdown_content = await scrape_url.run(url=url)
+        markdown_content = await scrape_url(url=url)
         if not markdown_content or markdown_content == "No content found" or markdown_content.startswith("Error"):
-            # Pass through the error message from scrape_url if it failed
-            return markdown_content if markdown_content.startswith("Error") else f"Could not retrieve content from URL: {url}"
+            return markdown_content
         lower_query = query.lower()
         lower_content = markdown_content.lower()
         matches = []
         start_index = 0
         while len(matches) < 5: # Limit matches
             pos = lower_content.find(lower_query, start_index)
-            if pos == -1:
-                break
+            if pos == -1: break
             start = max(0, pos - context_size)
             end = min(len(markdown_content), pos + len(lower_query) + context_size)
             context_snippet = markdown_content[start:end]
             prefix = "..." if start > 0 else ""
             suffix = "..." if end < len(markdown_content) else ""
             matches.append(f"{prefix}{context_snippet}{suffix}")
             start_index = pos + len(lower_query)
         if matches:
             result_text = "\n\n---\n\n".join([f"Match {i+1}:\n{match}" for i, match in enumerate(matches)])
             return f"Found {len(matches)} matches for '{query}' on the page:\n\n{result_text}"
@@ -77,6 +87,4 @@ async def extract_text_by_query(url: str, query: str, context_size: int = 300) -
             return f"No matches found for '{query}' on the page."
     except Exception as e:
-        # Catch potential errors during the find/string manipulation logic
         return f"Error processing content from '{url}' for query '{query}': {str(e)}"

clap/utils/completions.py CHANGED Viewed

@@ -2,10 +2,9 @@
 import asyncio
 from typing import Optional, List, Dict, Any
-# Assuming Groq client and specific API response types might be needed
-# from groq import Groq # Already imported elsewhere, ensure available
-# from groq.types.chat.chat_completion import ChatCompletion # Example type hint
-# from groq.types.chat.chat_completion_message import ChatCompletionMessage # Example type hint
+# from groq import Groq
+# from groq.types.chat.chat_completion import ChatCompletion
+# from groq.types.chat.chat_completion_message import ChatCompletionMessage
 from groq import AsyncGroq
@@ -15,11 +14,11 @@ ChatCompletionMessage = Any
 async def completions_create(
     client: AsyncGroq,
-    messages: List[Dict[str, Any]], # Use more specific types if available
+    messages: List[Dict[str, Any]],
     model: str,
-    tools: Optional[List[Dict[str, Any]]] = None, # Added tools parameter
-    tool_choice: str = "auto" # Added tool_choice parameter ("auto", "none", or {"type": "function", "function": {"name": "my_function"}})
-) -> ChatCompletionMessage: # Changed return type
+    tools: Optional[List[Dict[str, Any]]] = None,
+    tool_choice: str = "auto"
+) -> ChatCompletionMessage:
     """
     Sends an asynchronous request to the client's completions endpoint, supporting tool use.
@@ -34,7 +33,7 @@ async def completions_create(
         The message object from the API response, which might contain content or tool calls.
     """
     try:
-        # Prepare arguments, only include tools/tool_choice if tools are provided
         api_kwargs = {
             "messages": messages,
             "model": model,
@@ -43,15 +42,15 @@ async def completions_create(
             api_kwargs["tools"] = tools
             api_kwargs["tool_choice"] = tool_choice
-        # Changed .acreate to .create based on Groq async documentation
         response = await client.chat.completions.create(**api_kwargs)
-        # Return the entire message object from the first choice
         return response.choices[0].message
     except Exception as e:
-        # Handle potential API errors
         print(f"Error calling LLM API asynchronously: {e}")
-        # Return a custom message or re-raise depending on desired error handling
-        # Returning a placeholder error message object might be useful
         class ErrorMessage:
              content = f"Error communicating with LLM: {e}"
              tool_calls = None
@@ -61,10 +60,10 @@ async def completions_create(
 def build_prompt_structure(
     role: str,
-    content: Optional[str] = None, # Content is optional now
+    content: Optional[str] = None,
     tag: str = "",
-    tool_calls: Optional[List[Dict[str, Any]]] = None, # Added for assistant messages
-    tool_call_id: Optional[str] = None # Added for tool messages
+    tool_calls: Optional[List[Dict[str, Any]]] = None,
+    tool_call_id: Optional[str] = None
 ) -> dict:
     """
     Builds a structured message dictionary for the chat API.
@@ -85,17 +84,17 @@ def build_prompt_structure(
              content = f"<{tag}>{content}</{tag}>"
         message["content"] = content
-    # Add tool_calls if provided (only for assistant role)
     if role == "assistant" and tool_calls:
         message["tool_calls"] = tool_calls
-    # Add tool_call_id if provided (only for tool role)
     if role == "tool" and tool_call_id:
         message["tool_call_id"] = tool_call_id
-        if content is None: # Tool role requires content
+        if content is None:
              raise ValueError("Content is required for role 'tool'.")
-    # Basic validation
     if role == "tool" and not tool_call_id:
          raise ValueError("tool_call_id is required for role 'tool'.")
     if role != "assistant" and tool_calls:
@@ -106,7 +105,7 @@ def build_prompt_structure(
 def update_chat_history(
     history: list,
-    message: ChatCompletionMessage | Dict[str, Any] # Accept API message object or manually created dict
+    message: ChatCompletionMessage | Dict[str, Any]
     ):
     """
     Updates the chat history by appending a message object or a manually created message dict.
@@ -115,37 +114,37 @@ def update_chat_history(
         history (list): The list representing the current chat history.
         message: The message object from the API response or a dict created by build_prompt_structure.
     """
-    # If it's an API message object, convert it to the expected dict format
     if hasattr(message, "role"): # Basic check if it looks like an API message object
         msg_dict = {"role": message.role}
         if hasattr(message, "content") and message.content is not None:
             msg_dict["content"] = message.content
         if hasattr(message, "tool_calls") and message.tool_calls:
-             # Assuming message.tool_calls is already in the correct list[dict] format
             msg_dict["tool_calls"] = message.tool_calls
-        # Add other relevant fields if needed
         history.append(msg_dict)
     elif isinstance(message, dict) and "role" in message:
-        # If it's already a dictionary (e.g., from build_prompt_structure)
         history.append(message)
     else:
         raise TypeError("Invalid message type provided to update_chat_history.")
 class ChatHistory(list):
-    def __init__(self, messages: Optional[List[Dict[str, Any]]] = None, total_length: int = -1): # Type hint messages
+    def __init__(self, messages: Optional[List[Dict[str, Any]]] = None, total_length: int = -1):
         if messages is None:
             messages = []
         super().__init__(messages)
-        self.total_length = total_length # Note: total_length logic might need adjustment for tool calls/responses
+        self.total_length = total_length
-    def append(self, msg: Dict[str, Any]): # Expecting message dictionaries now
+    def append(self, msg: Dict[str, Any]):
         if not isinstance(msg, dict) or "role" not in msg:
             raise TypeError("ChatHistory can only append message dictionaries with a 'role'.")
-        # Simple length check, might need refinement based on token count or message types
         if self.total_length > 0 and len(self) == self.total_length:
-            self.pop(0) # Remove the oldest message (index 0)
+            self.pop(0)
         super().append(msg)
@@ -157,17 +156,16 @@ class FixedFirstChatHistory(ChatHistory):
         if not isinstance(msg, dict) or "role" not in msg:
             raise TypeError("ChatHistory can only append message dictionaries with a 'role'.")
-        # Keep the first message (system prompt) fixed
         if self.total_length > 0 and len(self) == self.total_length:
-            if len(self) > 1: # Ensure there's more than just the system prompt to remove
-                 self.pop(1) # Remove the second oldest message (index 1)
+            if len(self) > 1:
+                 self.pop(1)
             else:
-                 # Cannot append if length is 1 and fixed
                  print("Warning: Cannot append to FixedFirstChatHistory of size 1.")
                  return
-        # Only call super().append if there's space or an item was removed
         if self.total_length <= 0 or len(self) < self.total_length:
              super().append(msg)
-# --- END OF ASYNC MODIFIED completions.py ---

clap/utils/extraction.py CHANGED Viewed

@@ -29,13 +29,13 @@ def extract_tag_content(text: str, tag: str) -> TagContentResult:
             - 'content' (list): A list of strings containing the content found between the specified tags.
             - 'found' (bool): A flag indicating whether any content was found for the given tag.
     """
-    # Build the regex pattern dynamically to find multiple occurrences of the tag
     tag_pattern = rf"<{tag}>(.*?)</{tag}>"
-    # Use findall to capture all content between the specified tag
     matched_contents = re.findall(tag_pattern, text, re.DOTALL)
-    # Return the dataclass instance with the result
     return TagContentResult(
         content=[content.strip() for content in matched_contents],
         found=bool(matched_contents),

clap/utils/rag_utils.py ADDED Viewed

@@ -0,0 +1,183 @@
+import csv
+from typing import List, Dict, Any, Tuple, Optional, Union
+try:
+    import pypdf
+except ImportError:
+    raise ImportError(
+        "pypdf not found. Please install it for PDF loading: pip install pypdf"
+    )
+def load_text_file(file_path: str) -> str:
+    """Loads text content from a file."""
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return f.read()
+    except Exception as e:
+        print(f"Error loading text file {file_path}: {e}")
+        return ""
+def load_pdf_file(file_path: str) -> str:
+    """Loads text content from a PDF file."""
+    text = ""
+    try:
+        with open(file_path, 'rb') as f:
+            reader = pypdf.PdfReader(f)
+            print(f"Loading PDF '{file_path}' with {len(reader.pages)} pages...")
+            for i, page in enumerate(reader.pages):
+                page_text = page.extract_text()
+                if page_text:
+                    text += page_text + "\n"
+                else:
+                    print(f"Warning: No text extracted from page {i+1} of {file_path}")
+        print(f"Finished loading PDF '{file_path}'.")
+        return text.strip()
+    except FileNotFoundError:
+        print(f"Error: PDF file not found at {file_path}")
+        return ""
+    except Exception as e:
+        print(f"Error loading PDF file {file_path}: {e}")
+        return ""
+def load_csv_file(
+    file_path: str,
+    content_column: Union[str, int],
+    metadata_columns: Optional[List[Union[str, int]]] = None,
+    delimiter: str = ',',
+    encoding: str = 'utf-8'
+) -> List[Tuple[str, Dict[str, Any]]]:
+    """
+    Loads data from a CSV file, extracting content and metadata.
+    Each row is treated as a potential document/chunk.
+    Args:
+        file_path: Path to the CSV file.
+        content_column: The name (string) or index (int) of the column containing the main text content.
+        metadata_columns: Optional list of column names (string) or indices (int)
+                          to include as metadata for each row.
+        delimiter: CSV delimiter (default ',').
+        encoding: File encoding (default 'utf-8').
+    Returns:
+        A list of tuples, where each tuple contains:
+        (document_text: str, metadata: dict)
+    """
+    data = []
+    metadata_columns = metadata_columns or []
+    try:
+        with open(file_path, mode='r', encoding=encoding, newline='') as f:
+            has_header = isinstance(content_column, str) or any(isinstance(mc, str) for mc in metadata_columns)
+            if has_header:
+                reader = csv.DictReader(f, delimiter=delimiter)
+                headers = reader.fieldnames
+                if headers is None:
+                     print(f"Error: Could not read headers from CSV {file_path}")
+                     return []
+                if isinstance(content_column, str) and content_column not in headers:
+                    raise ValueError(f"Content column '{content_column}' not found in CSV headers: {headers}")
+                for mc in metadata_columns:
+                    if isinstance(mc, str) and mc not in headers:
+                        raise ValueError(f"Metadata column '{mc}' not found in CSV headers: {headers}")
+                content_key = content_column
+                meta_keys = [mc for mc in metadata_columns if isinstance(mc, str)]
+            else:
+                reader = csv.reader(f, delimiter=delimiter)
+                content_key = int(content_column)
+                meta_keys = [int(mc) for mc in metadata_columns]
+            print(f"Loading CSV '{file_path}'...")
+            for i, row in enumerate(reader):
+                try:
+                    if has_header:
+                        doc_text = row.get(content_key, "").strip()
+                        metadata = {key: row.get(key, "") for key in meta_keys}
+                    else:
+                        if content_key >= len(row): continue
+                        doc_text = row[content_key].strip()
+                        metadata = {}
+                        for key_index in meta_keys:
+                            if key_index < len(row):
+                                metadata[f"column_{key_index}"] = row[key_index]
+                    if doc_text:
+                        metadata["source_row"] = i + (1 if has_header else 0)
+                        data.append((doc_text, metadata))
+                except IndexError:
+                     print(f"Warning: Skipping row {i} due to index out of bounds (check column indices).")
+                except Exception as row_e:
+                     print(f"Warning: Skipping row {i} due to error: {row_e}")
+        print(f"Finished loading CSV '{file_path}', processed {len(data)} rows with content.")
+        return data
+    except FileNotFoundError:
+        print(f"Error: CSV file not found at {file_path}")
+        return []
+    except ValueError as ve:
+        print(f"Error processing CSV header/indices for {file_path}: {ve}")
+        return []
+    except Exception as e:
+        print(f"Error loading CSV file {file_path}: {e}")
+        return []
+def chunk_text_by_fixed_size(
+    text: str, chunk_size: int, chunk_overlap: int = 0
+) -> List[str]:
+    """Chunks text into fixed size blocks with optional overlap."""
+    if not isinstance(text, str):
+        print(f"Warning: chunk_text_by_fixed_size expected string, got {type(text)}. Skipping.")
+        return []
+    if chunk_overlap >= chunk_size:
+        raise ValueError("chunk_overlap must be smaller than chunk_size")
+    if chunk_size <= 0:
+         raise ValueError("chunk_size must be positive")
+    chunks = []
+    start = 0
+    while start < len(text):
+        end = start + chunk_size
+        chunks.append(text[start:end])
+        step = chunk_size - chunk_overlap
+        if step <= 0:
+             step = 1
+        start += step
+    return [chunk for chunk in chunks if chunk.strip()]
+def chunk_text_by_separator(text: str, separator: str = "\n\n") -> List[str]:
+    """Chunks text based on a specified separator."""
+    if not isinstance(text, str):
+        print(f"Warning: chunk_text_by_separator expected string, got {type(text)}. Skipping.")
+        return []
+    chunks = text.split(separator)
+    return [chunk for chunk in chunks if chunk.strip()]

clap/vector_stores/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+from .base import VectorStoreInterface, QueryResult, Document, Embedding, ID, Metadata
+__all__ = ["VectorStoreInterface", "QueryResult", "Document", "Embedding", "ID", "Metadata"]
+try:
+    from .chroma_store import ChromaStore
+    __all__.append("ChromaStore")
+except ImportError:
+    pass
+try:
+    from .qdrant_store import QdrantStore
+    __all__.append("QdrantStore")
+except ImportError:
+    pass

clap/vector_stores/base.py ADDED Viewed

@@ -0,0 +1,85 @@
+import abc
+from typing import Any, Dict, List, Optional, TypedDict, Union
+Document = str
+Embedding = List[float]
+ID = str
+Metadata = Dict[str, Any]
+class QueryResult(TypedDict):
+    ids: List[List[ID]]
+    embeddings: Optional[List[List[Embedding]]]
+    documents: Optional[List[List[Document]]]
+    metadatas: Optional[List[List[Metadata]]]
+    distances: Optional[List[List[float]]]
+class VectorStoreInterface(abc.ABC):
+    """Abstract Base Class for Vector Store interactions."""
+    @abc.abstractmethod
+    async def add_documents(
+        self,
+        documents: List[Document],
+        ids: List[ID],
+        metadatas: Optional[List[Metadata]] = None,
+        embeddings: Optional[List[Embedding]] = None,
+    ) -> None:
+        """
+        Add documents and their embeddings to the store.
+        If embeddings are not provided, the implementation should handle embedding generation.
+        Args:
+            documents: List of document texts.
+            ids: List of unique IDs for each document.
+            metadatas: Optional list of metadata dictionaries for each document.
+            embeddings: Optional list of pre-computed embeddings.
+        """
+        pass
+    @abc.abstractmethod
+    async def aquery(
+        self,
+        query_texts: Optional[List[Document]] = None,
+        query_embeddings: Optional[List[Embedding]] = None,
+        n_results: int = 5,
+        where: Optional[Dict[str, Any]] = None,
+        where_document: Optional[Dict[str, Any]] = None,
+        include: List[str] = ["metadatas", "documents", "distances"],
+    ) -> QueryResult:
+        """
+        Query the vector store for similar documents.
+        Provide either query_texts or query_embeddings.
+        Args:
+            query_texts: List of query texts. Embeddings will be generated.
+            query_embeddings: List of query embeddings.
+            n_results: Number of results to return for each query.
+            where: Optional metadata filter (syntax depends on implementation).
+            where_document: Optional document content filter (syntax depends on implementation).
+            include: List of fields to include in the results (e.g., "documents", "metadatas", "distances", "embeddings").
+        Returns:
+            A QueryResult dictionary containing the search results.
+        """
+        pass
+    @abc.abstractmethod
+    async def adelete(
+        self,
+        ids: Optional[List[ID]] = None,
+        where: Optional[Dict[str, Any]] = None,
+        where_document: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """
+        Delete documents from the store by ID or filter.
+        Args:
+            ids: Optional list of IDs to delete.
+            where: Optional metadata filter for deletion.
+            where_document: Optional document content filter for deletion.
+        """
+        pass

clap-agents 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl

clap-agents 0.1.1py3-none-any.whl → 0.2.1py3-none-any.whl