PyPI - alita-sdk - Versions diffs - 0.3.211__py3-none-any.whl → 0.3.212__py3-none-any.whl - Mend

alita-sdk 0.3.211py3-none-any.whl → 0.3.212py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

alita_sdk/tools/gitlab_org/api_wrapper.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import logging
 from datetime import datetime
 from typing import Optional, Any, List, Dict
+import fnmatch
 from gitlab import GitlabGetError
 from langchain_core.tools import ToolException
@@ -22,7 +23,9 @@ GitLabCreateBranch = create_model(
 GitLabListBranches = create_model(
     "GitLabListBranchesModel",
-    repository=(Optional[str], Field(description="Name of the repository", default=None))
+    repository=(Optional[str], Field(description="Name of the repository", default=None)),
+    limit=(Optional[int], Field(description="Maximum number of branches to return. If not provided, all branches will be returned.", default=20)),
+    branch_wildcard=(Optional[str], Field(description="Wildcard pattern to filter branches by name. If not provided, all branches will be returned.", default=None))
 )
 GitlabSetActiveBranch = create_model(
@@ -209,16 +212,32 @@ class GitLabWorkspaceAPIWrapper(BaseToolApiWrapper):
         self._active_branch = branch
         return f"Active branch set to {branch}"
-    def list_branches_in_repo(self, repository: Optional[str] = None) -> List[str]:
-        """List all branches in the repository."""
+    def list_branches_in_repo(self, repository: Optional[str] = None, limit: Optional[int] = 20, branch_wildcard: Optional[str] = None) -> List[str]:
+        """
+        Lists branches in the repository with optional limit and wildcard filtering.
+        Parameters:
+            repository (Optional[str]): Name of the repository. If None, uses the active repository.
+            limit (Optional[int]): Maximum number of branches to return
+            branch_wildcard (Optional[str]): Wildcard pattern to filter branches (e.g., '*dev')
+        Returns:
+            List[str]: List containing names of branches
+        """
         try:
             repo_instance = self._get_repo(repository)
-            branches = repo_instance.branches.list()
-            return [branch.name for branch in branches]
+            branches = repo_instance.branches.list(get_all=True)
+            if branch_wildcard:
+                branches = [branch for branch in branches if fnmatch.fnmatch(branch.name, branch_wildcard)]
+            if limit:
+                branches = branches[:limit]
+            branch_names = [branch.name for branch in branches]
+            return branch_names
         except Exception as e:
-            return ToolException(f"Unable to list branches due to error: {str(e)}")
+            return f"Failed to list branches: {str(e)}"
     def create_branch(self, branch_name: str, repository: Optional[str] = None) -> str:
         """Create a new branch in the repository."""
@@ -568,104 +587,104 @@ class GitLabWorkspaceAPIWrapper(BaseToolApiWrapper):
         return [
             {
                 "name": "create_branch",
-                "description": self.create_branch.__doc__,
+                "description": self.create_branch.__doc__ or "Create a new branch in the repository.",
                 "args_schema": GitLabCreateBranch,
                 "ref": self.create_branch,
             },
             {
                 "name": "set_active_branch",
-                "description": self.set_active_branch.__doc__,
+                "description": self.set_active_branch.__doc__ or "Set the active branch for the bot.",
                 "args_schema": GitlabSetActiveBranch,
                 "ref": self.set_active_branch,
             },
             {
                 "name": "list_branches_in_repo",
-                "description": self.list_branches_in_repo.__doc__,
+                "description": self.list_branches_in_repo.__doc__ or "List branches in the repository with optional limit and wildcard filtering.",
                 "args_schema": GitLabListBranches,
                 "ref": self.list_branches_in_repo,
             },
             {
                 "name": "get_issues",
-                "description": self.get_issues.__doc__,
+                "description": self.get_issues.__doc__ or "Fetches all open issues from the repository.",
                 "args_schema": GitLabGetIssues,
                 "ref": self.get_issues,
             },
             {
                 "name": "get_issue",
-                "description": self.get_issue.__doc__,
+                "description": self.get_issue.__doc__ or "Fetches a specific issue and its first 10 comments.",
                 "args_schema": GitLabGetIssue,
                 "ref": self.get_issue,
             },
             {
                 "name": "create_pull_request",
-                "description": self.create_pull_request.__doc__,
+                "description": self.create_pull_request.__doc__ or "Creates a pull request in the repository.",
                 "args_schema": GitLabCreatePullRequest,
                 "ref": self.create_pull_request,
             },
             {
                 "name": "comment_on_issue",
-                "description": self.comment_on_issue.__doc__,
+                "description": self.comment_on_issue.__doc__ or "Adds a comment to a GitLab issue.",
                 "args_schema": GitLabCommentOnIssue,
                 "ref": self.comment_on_issue,
             },
             {
                 "name": "create_file",
-                "description": self.create_file.__doc__,
+                "description": self.create_file.__doc__ or "Creates a new file in the GitLab repository.",
                 "args_schema": GitLabCreateFile,
                 "ref": self.create_file,
             },
             {
                 "name": "read_file",
-                "description": self.read_file.__doc__,
+                "description": self.read_file.__doc__ or "Reads a file from the GitLab repository.",
                 "args_schema": GitLabReadFile,
                 "ref": self.read_file,
             },
             {
                 "name": "update_file",
-                "description": self.update_file.__doc__,
+                "description": self.update_file.__doc__ or "Updates a file in the GitLab repository.",
                 "args_schema": GitLabUpdateFile,
                 "ref": self.update_file,
             },
             {
                 "name": "delete_file",
-                "description": self.delete_file.__doc__,
+                "description": self.delete_file.__doc__ or "Deletes a file from the GitLab repository.",
                 "args_schema": GitLabDeleteFile,
                 "ref": self.delete_file,
             },
             {
                 "name": "get_pr_changes",
-                "description": self.get_pr_changes.__doc__,
+                "description": self.get_pr_changes.__doc__ or "Get pull request changes from the specified PR number and repository.",
                 "args_schema": GitLabGetPRChanges,
                 "ref": self.get_pr_changes,
             },
             {
                 "name": "create_pr_change_comment",
-                "description": self.create_pr_change_comment.__doc__,
+                "description": self.create_pr_change_comment.__doc__ or "Create a comment on a pull request change in GitLab.",
                 "args_schema": GitLabCreatePullRequestChangeCommentInput,
                 "ref": self.create_pr_change_comment,
             },
             {
                 "name": "list_files",
-                "description": self.list_files.__doc__,
+                "description": self.list_files.__doc__ or "List files by defined path.",
                 "args_schema": ListFilesModel,
                 "ref": self.list_files,
             },
             {
                 "name": "list_folders",
-                "description": self.list_folders.__doc__,
+                "description": self.list_folders.__doc__ or "List folders by defined path.",
                 "args_schema": ListFilesModel,
                 "ref": self.list_folders,
             },
             {
                 "name": "append_file",
-                "description": self.append_file.__doc__,
+                "description": self.append_file.__doc__ or "Appends new content to the end of a file.",
                 "args_schema": AppendFileInput,
                 "ref": self.append_file,
             },
             {
                 "ref": self.get_commits,
                 "name": "get_commits",
-                "description": self.get_commits.__doc__,
+                "description": self.get_commits.__doc__ or "Retrieves a list of commits from the repository.",
                 "args_schema": GetCommits,
             }
         ]

alita_sdk/tools/sharepoint/api_wrapper.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 import logging
-from typing import Optional, List, Generator
+from typing import Optional, List, Generator, Any
 from langchain_core.documents import Document
 from langchain_core.tools import ToolException
@@ -129,7 +129,7 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
                   is_capture_image: bool = False,
                   page_number: int = None,
                   sheet_name: str = None,
-                  excel_by_sheets: bool = False):
+                  excel_by_sheets: bool = False) -> str | dict | ToolException:
         """ Reads file located at the specified server-relative path. """
         try:
             file = self._client.web.get_file_by_server_relative_path(path)
@@ -148,30 +148,30 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
                                   excel_by_sheets=excel_by_sheets,
                                   llm=self.llm)
-    def _base_loader(self, **kwargs) -> List[Document]:
+    def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
         try:
             all_files = self.get_files_list()
         except Exception as e:
             raise ToolException(f"Unable to extract files: {e}")
-        docs: List[Document] = []
         for file in all_files:
             metadata = {
                 ("updated_on" if k == "Modified" else k): str(v)
                 for k, v in file.items()
             }
-            docs.append(Document(page_content="", metadata=metadata))
-        return docs
+            yield Document(page_content="", metadata=metadata)
     def _process_document(self, document: Document) -> Generator[Document, None, None]:
-        page_content = self.read_file(document.metadata['Path'], is_capture_image=True, excel_by_sheets=True)
-        if isinstance(page_content, dict):
-            for key, value in page_content.items():
-                metadata = document.metadata
-                metadata['page'] = key
-                yield Document(page_content=str(value), metadata=metadata)
+        doc_content = self.read_file(document.metadata['Path'],
+                                      is_capture_image=True,
+                                      excel_by_sheets=True)
+        if isinstance(doc_content, dict):
+            for page, content in doc_content:
+                new_metadata = document.metadata
+                new_metadata['page'] = page
+                yield Document(page_content=str(content), metadata=new_metadata)
         else:
-            document.page_content = json.dumps(str(page_content))
+            document.page_content = str(doc_content)
             yield document
     @extend_with_vector_tools

alita_sdk/tools/testrail/api_wrapper.py CHANGED Viewed

@@ -8,6 +8,8 @@ from openai import BadRequestError
 from pydantic import SecretStr, create_model, model_validator
 from pydantic.fields import Field, PrivateAttr
 from testrail_api import StatusCodeError, TestRailAPI
+from ..chunkers.code.constants import get_file_extension
 from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
 from langchain_core.documents import Document
@@ -537,6 +539,9 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
                      title_keyword: Optional[str] = None,
                      **kwargs: Any
                      ) -> Generator[Document, None, None]:
+        self._include_attachments = kwargs.get('include_attachments', False)
+        self._skip_attachment_extensions = kwargs.get('skip_attachment_extensions', [])
         try:
             if suite_id:
                 resp = self._client.cases.get_cases(project_id=project_id, suite_id=int(suite_id))
@@ -582,6 +587,11 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
             Generator[Document, None, None]: A generator yielding processed Document objects with metadata.
         """
         try:
+            if not self._include_attachments:
+                # If attachments are not included, return the document as is
+                yield document
+                return
             # get base data from the document required to extract attachments and other metadata
             base_data = json.loads(document.page_content)
             case_id = base_data.get("id")
@@ -591,6 +601,10 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
             # process each attachment to extract its content
             for attachment in attachments:
+                if get_file_extension(attachment['filename']) in self._skip_attachment_extensions:
+                    logger.info(f"Skipping attachment {attachment['filename']} with unsupported extension.")
+                    continue
                 attachment_id = f"attach_{attachment['id']}"
                 # add attachment id to metadata of parent
                 document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
@@ -639,6 +653,12 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
             'suite_id': (Optional[str],
                          Field(default=None, description="Optional TestRail suite ID to filter test cases")),
             'section_id': (Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
+            'include_attachments': (Optional[bool],
+                                    Field(description="Whether to include attachment content in indexing",
+                                          default=False)),
+            'skip_attachment_extensions': (Optional[List[str]], Field(
+                description="List of file extensions to skip when processing attachments: i.e. ['.png', '.jpg']",
+                default=[])),
         }
     def _to_markup(self, data: List[Dict], output_format: str) -> str:

alita_sdk/tools/utils/content_parser.py CHANGED Viewed

@@ -1,19 +1,11 @@
-import re
-from docx import Document
-from io import BytesIO
-import pandas as pd
-from PIL import Image
-from pptx import Presentation
-from pptx.enum.shapes import MSO_SHAPE_TYPE
-import io
-import pymupdf
+from pathlib import Path
 from langchain_core.tools import ToolException
-from transformers import BlipProcessor, BlipForConditionalGeneration
-from langchain_core.messages import HumanMessage
 from logging import getLogger
+from alita_sdk.runtime.langchain.document_loaders.constants import loaders_map
+from langchain_core.documents import Document
-from ...runtime.langchain.tools.utils import bytes_to_base64
+from ...runtime.langchain.document_loaders.utils import create_temp_file
 logger = getLogger(__name__)
@@ -61,7 +53,7 @@ IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp', 'svg']
 def parse_file_content(file_name=None, file_content=None, is_capture_image: bool = False, page_number: int = None,
-                       sheet_name: str = None, llm=None, file_path: str = None, excel_by_sheets: bool = False):
+                       sheet_name: str = None, llm=None, file_path: str = None, excel_by_sheets: bool = False) -> str | ToolException:
     """Parse the content of a file based on its type and return the parsed content.
     Args:
@@ -72,6 +64,7 @@ def parse_file_content(file_name=None, file_content=None, is_capture_image: bool
         sheet_name (str, optional): The specific sheet name to parse for Excel files.
         llm: The language model to use for image processing.
         file_path (str, optional): The path to the file if it needs to be read from disk.
+        return_type (str, optional): Tipe of returned result. Possible values are 'str', 'docs'.
     Returns:
         str: The parsed content of the file.
     Raises:
@@ -81,142 +74,39 @@ def parse_file_content(file_name=None, file_content=None, is_capture_image: bool
     if (file_path and (file_name or file_content)) or (not file_path and (not file_name or file_content is None)):
         raise ToolException("Either (file_name and file_content) or file_path must be provided, but not both.")
-    if file_path:
-        file_content = file_to_bytes(file_path)
-        if file_content is None:
-            return ToolException(f"File not found or could not be read: {file_path}")
-        file_name = file_path.split('/')[-1]  # Extract file name from path
-    if file_name.endswith('.txt'):
-        return parse_txt(file_content)
-    elif file_name.endswith('.docx'):
-        return read_docx_from_bytes(file_content)
-    elif file_name.endswith('.xlsx') or file_name.endswith('.xls'):
-        return parse_excel(file_content, sheet_name, excel_by_sheets)
-    elif file_name.endswith('.pdf'):
-        return parse_pdf(file_content, page_number, is_capture_image, llm)
-    elif file_name.endswith('.pptx'):
-        return parse_pptx(file_content, page_number, is_capture_image, llm)
-    elif any(file_name.lower().endswith(f".{ext}") for ext in IMAGE_EXTENSIONS):
-        match = re.search(r'\.([a-zA-Z0-9]+)$', file_name)
-        return __perform_llm_prediction_for_image(llm, file_content, match.group(1), image_processing_prompt)
-    else:
+    extension = Path(file_path if file_path else file_name).suffix
+    loader_object = loaders_map.get(extension)
+    loader_kwargs = loader_object['kwargs']
+    loader_kwargs.update({
+        "file_path": file_path,
+        "file_content": file_content,
+        "file_name": file_name,
+        "extract_images": is_capture_image,
+        "llm": llm,
+        "page_number": page_number,
+        "sheet_name": sheet_name,
+        "excel_by_sheets": excel_by_sheets
+    })
+    loader = loader_object['class'](**loader_kwargs)
+    if not loader:
         return ToolException(
             "Not supported type of files entered. Supported types are TXT, DOCX, PDF, PPTX, XLSX and XLS only.")
-def parse_txt(file_content):
-    try:
-        return file_content.decode('utf-8')
-    except Exception as e:
-        return ToolException(f"Error decoding file content: {e}")
-def parse_excel(file_content, sheet_name = None, return_by_sheets: bool = False):
-    try:
-        excel_file = io.BytesIO(file_content)
-        if sheet_name:
-            return parse_sheet(excel_file, sheet_name)
-        dfs = pd.read_excel(excel_file, sheet_name=sheet_name)
-        if return_by_sheets:
-            result = {}
-            for sheet_name, df in dfs.items():
-                df.fillna('', inplace=True)
-                result[sheet_name] = df.to_dict(orient='records')
-            return result
-        else:
-            result = []
-            for sheet_name, df in dfs.items():
-                df.fillna('', inplace=True)
-                string_content = df.to_string(index=False)
-                result.append(f"====== Sheet name: {sheet_name} ======\n{string_content}")
-            return "\n\n".join(result)
-    except Exception as e:
-        return ToolException(f"Error reading Excel file: {e}")
-def parse_sheet(excel_file, sheet_name):
-    df = pd.read_excel(excel_file, sheet_name=sheet_name)
-    df.fillna('', inplace=True)
-    return df.to_string()
-def parse_pdf(file_content, page_number, is_capture_image, llm):
-    with pymupdf.open(stream=file_content, filetype="pdf") as report:
-        text_content = ''
-        if page_number is not None:
-            page = report.load_page(page_number - 1)
-            text_content += read_pdf_page(report, page, page_number, is_capture_image, llm)
-        else:
-            for index, page in enumerate(report, start=1):
-                text_content += read_pdf_page(report, page, index, is_capture_image, llm)
-        return text_content
-def parse_pptx(file_content, page_number, is_capture_image, llm=None):
-    prs = Presentation(io.BytesIO(file_content))
-    text_content = ''
-    if page_number is not None:
-        text_content += read_pptx_slide(prs.slides[page_number - 1], page_number, is_capture_image, llm)
+    if hasattr(loader, 'get_content'):
+        return loader.get_content()
     else:
-        for index, slide in enumerate(prs.slides, start=1):
-            text_content += read_pptx_slide(slide, index, is_capture_image, llm)
-    return text_content
-def read_pdf_page(report, page, index, is_capture_images, llm=None):
-    text_content = f'Page: {index}\n'
-    text_content += page.get_text()
-    if is_capture_images:
-        images = page.get_images(full=True)
-        for i, img in enumerate(images):
-            xref = img[0]
-            base_image = report.extract_image(xref)
-            img_bytes = base_image["image"]
-            text_content += __perform_llm_prediction_for_image(llm, img_bytes)
-    return text_content
-def read_docx_from_bytes(file_content):
-    """Read and return content from a .docx file using a byte stream."""
-    try:
-        doc = Document(BytesIO(file_content))
-        text = []
-        for paragraph in doc.paragraphs:
-            text.append(paragraph.text)
-        return '\n'.join(text)
-    except Exception as e:
-        print(f"Error reading .docx from bytes: {e}")
-        return ""
-def read_pptx_slide(slide, index, is_capture_image, llm):
-    text_content = f'Slide: {index}\n'
-    for shape in slide.shapes:
-        if hasattr(shape, "text"):
-            text_content += shape.text + "\n"
-        elif is_capture_image and shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
-            try:
-                caption = __perform_llm_prediction_for_image(llm, shape.image.blob)
-            except:
-                caption = "\n[Picture: unknown]\n"
-            text_content += caption
-    return text_content
-def describe_image(image):
-    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-    inputs = processor(image, return_tensors="pt")
-    out = model.generate(**inputs)
-    return "\n[Picture: " + processor.decode(out[0], skip_special_tokens=True) + "]\n"
-def __perform_llm_prediction_for_image(llm, image: bytes, image_format='png', prompt=image_processing_prompt) -> str:
-    if not llm:
-        raise ToolException("LLM is not provided for image processing.")
-    base64_string = bytes_to_base64(image)
-    result = llm.invoke([
-        HumanMessage(
-            content=[
-                {"type": "text", "text": prompt},
-                {
-                    "type": "image_url",
-                    "image_url": {"url": f"data:image/{image_format};base64,{base64_string}"},
-                },
-            ])
-    ])
-    return f"\n[Image description: {result.content}]\n"
+        if file_content:
+            return load_content_from_bytes(file_content=file_content,
+                                           extension=extension,
+                                           loader_extra_config=loader_kwargs,
+                                           llm=llm)
+        else:
+            return load_content(file_path=file_path,
+                                extension=extension,
+                                loader_extra_config=loader_kwargs,
+                                llm=llm)
 # TODO: review usage of this function alongside with functions above
 def load_content(file_path: str, extension: str = None, loader_extra_config: dict = None, llm = None) -> str:
@@ -254,22 +144,7 @@ def load_content(file_path: str, extension: str = None, loader_extra_config: dic
 def load_content_from_bytes(file_content: bytes, extension: str = None, loader_extra_config: dict = None, llm = None) -> str:
     """Loads the content of a file from bytes based on its extension using a configured loader."""
-    import tempfile
-    # Automatic cleanup with context manager
-    with tempfile.NamedTemporaryFile(mode='w+b', delete=True) as temp_file:
-        # Write data to temp file
-        temp_file.write(file_content)
-        temp_file.flush()  # Ensure data is written
-        # Get the file path for operations
-        temp_path = temp_file.name
-        # Perform your operations
-        return load_content(temp_path, extension, loader_extra_config, llm)
+    return load_content(create_temp_file(file_content), extension, loader_extra_config, llm)
 def file_to_bytes(filepath):
     """

{alita_sdk-0.3.211.dist-info → alita_sdk-0.3.212.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.211
+Version: 0.3.212
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0

alita-sdk 0.3.211__py3-none-any.whl → 0.3.212__py3-none-any.whl

alita-sdk 0.3.211py3-none-any.whl → 0.3.212py3-none-any.whl