PyPI - alita-sdk - Versions diffs - 0.3.208__py3-none-any.whl → 0.3.210__py3-none-any.whl - Mend

alita-sdk 0.3.208py3-none-any.whl → 0.3.210py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

alita_sdk/runtime/clients/artifact.py +18 -4
alita_sdk/runtime/langchain/document_loaders/AlitaCSVLoader.py +2 -1
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +3 -3
alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +8 -4
alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
alita_sdk/runtime/langchain/langraph_agent.py +9 -6
alita_sdk/runtime/toolkits/artifact.py +7 -3
alita_sdk/runtime/toolkits/tools.py +8 -1
alita_sdk/runtime/tools/application.py +2 -0
alita_sdk/runtime/tools/artifact.py +65 -8
alita_sdk/runtime/tools/vectorstore.py +125 -42
alita_sdk/runtime/utils/utils.py +3 -0
alita_sdk/tools/ado/__init__.py +8 -0
alita_sdk/tools/ado/repos/repos_wrapper.py +37 -0
alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +0 -7
alita_sdk/tools/ado/work_item/__init__.py +4 -0
alita_sdk/tools/ado/work_item/ado_wrapper.py +37 -4
alita_sdk/tools/aws/delta_lake/__init__.py +1 -1
alita_sdk/tools/bitbucket/__init__.py +13 -1
alita_sdk/tools/bitbucket/api_wrapper.py +31 -4
alita_sdk/tools/bitbucket/cloud_api_wrapper.py +31 -0
alita_sdk/tools/chunkers/code/codeparser.py +18 -10
alita_sdk/tools/confluence/api_wrapper.py +35 -134
alita_sdk/tools/confluence/loader.py +30 -28
alita_sdk/tools/elitea_base.py +112 -11
alita_sdk/tools/figma/__init__.py +13 -1
alita_sdk/tools/figma/api_wrapper.py +47 -3
alita_sdk/tools/github/api_wrapper.py +8 -0
alita_sdk/tools/github/github_client.py +18 -0
alita_sdk/tools/gitlab/__init__.py +4 -0
alita_sdk/tools/gitlab/api_wrapper.py +10 -0
alita_sdk/tools/google/bigquery/__init__.py +1 -1
alita_sdk/tools/jira/__init__.py +21 -13
alita_sdk/tools/jira/api_wrapper.py +285 -5
alita_sdk/tools/sharepoint/__init__.py +11 -1
alita_sdk/tools/sharepoint/api_wrapper.py +23 -53
alita_sdk/tools/testrail/__init__.py +4 -0
alita_sdk/tools/testrail/api_wrapper.py +28 -56
alita_sdk/tools/utils/content_parser.py +123 -9
alita_sdk/tools/xray/__init__.py +8 -1
alita_sdk/tools/xray/api_wrapper.py +505 -14
alita_sdk/tools/zephyr_scale/api_wrapper.py +5 -5
{alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/METADATA +1 -1
{alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/RECORD +47 -47
{alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/top_level.txt +0 -0

alita_sdk/tools/testrail/api_wrapper.py CHANGED Viewed

@@ -4,13 +4,15 @@ from typing import Dict, List, Optional, Union, Any, Generator
 import pandas as pd
 from langchain_core.tools import ToolException
+from openai import BadRequestError
 from pydantic import SecretStr, create_model, model_validator
 from pydantic.fields import Field, PrivateAttr
 from testrail_api import StatusCodeError, TestRailAPI
-from ..elitea_base import BaseVectorStoreToolApiWrapper, BaseIndexParams
+from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
 from langchain_core.documents import Document
 from ...runtime.utils.utils import IndexerKeywords
+from ..utils.content_parser import parse_file_content
 try:
     from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
@@ -288,20 +290,6 @@ updateCase = create_model(
     ),
 )
-# Schema for indexing TestRail data into vector store
-indexData = create_model(
-    "indexData",
-    __base__=BaseIndexParams,
-    project_id=(str, Field(description="TestRail project ID to index data from")),
-    suite_id=(Optional[str], Field(default=None, description="Optional TestRail suite ID to filter test cases")),
-    section_id=(Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
-    title_keyword=(Optional[str], Field(default=None, description="Optional keyword to filter test cases by title")),
-    progress_step=(Optional[int],
-                   Field(default=None, ge=0, le=100, description="Optional step size for progress reporting during indexing")),
-    clean_index=(Optional[bool],
-                       Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
-)
 SUPPORTED_KEYS = {
     "id", "title", "section_id", "template_id", "type_id", "priority_id", "milestone_id",
     "refs", "created_by", "created_on", "updated_by", "updated_on", "estimate",
@@ -316,14 +304,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
     password: Optional[SecretStr] = None,
     email: Optional[str] = None,
     _client: Optional[TestRailAPI] = PrivateAttr() # Private attribute for the TestRail client
-    llm: Any = None
-    connection_string: Optional[SecretStr] = None
-    collection_name: Optional[str] = None
-    embedding_model: Optional[str] = "HuggingFaceEmbeddings"
-    embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
-    vectorstore_type: Optional[str] = "PGVector"
     @model_validator(mode="before")
     @classmethod
@@ -489,7 +469,8 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
                 project_id=project_id, **params
             )
-            cases = extracted_cases.get("cases")
+            # support old versions of testrail_api
+            cases = extracted_cases.get("cases") if isinstance(extracted_cases, dict) else extracted_cases
             if cases is None:
                 return ToolException("No test cases found in the extracted data.")
@@ -553,7 +534,8 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
     def _base_loader(self, project_id: str,
                      suite_id: Optional[str] = None,
                      section_id: Optional[int] = None,
-                     title_keyword: Optional[str] = None
+                     title_keyword: Optional[str] = None,
+                     **kwargs: Any
                      ) -> Generator[Document, None, None]:
         try:
             if suite_id:
@@ -576,7 +558,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
                 'title': case.get('title', ''),
                 'suite_id': suite_id or case.get('suite_id', ''),
                 'id': str(case.get('id', '')),
-                'updated_on': case.get('updated_on') or -1,
+                IndexerKeywords.UPDATED_ON.value: case.get('updated_on') or -1,
                 'labels': [lbl['title'] for lbl in case.get('labels', [])],
                 'type': case.get('type_id') or -1,
                 'priority': case.get('priority_id') or -1,
@@ -587,22 +569,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
                 'entity_type': 'test_case',
             })
-    def index_data(
-            self,
-            project_id: str,
-            suite_id: Optional[str] = None,
-            collection_suffix: str = "",
-            section_id: Optional[int] = None,
-            title_keyword: Optional[str] = None,
-            progress_step: Optional[int] = None,
-            clean_index: Optional[bool] = False
-    ):
-        """Load TestRail test cases into the vector store."""
-        docs = self._base_loader(project_id, suite_id, section_id, title_keyword)
-        embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
-        vs = self._init_vector_store(collection_suffix, embeddings=embedding)
-        return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
     def _process_document(self, document: Document) -> Generator[Document, None, None]:
         """
         Process an existing base document to extract relevant metadata for full document preparation.
@@ -625,16 +591,15 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
             # process each attachment to extract its content
             for attachment in attachments:
-                attachment_id = attachment['id']
+                attachment_id = f"attach_{attachment['id']}"
                 # add attachment id to metadata of parent
                 document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
                 # TODO: pass it to chunkers
                 yield Document(page_content=self._process_attachment(attachment),
                                                      metadata={
                                                          'project_id': base_data.get('project_id', ''),
-                                                         IndexerKeywords.PARENT.value: case_id,
-                                                         'id': attachment_id,
+                                                         'id': str(attachment_id),
+                                                         IndexerKeywords.PARENT.value: str(case_id),
                                                          'filename': attachment['filename'],
                                                          'filetype': attachment['filetype'],
                                                          'created_on': attachment['created_on'],
@@ -658,10 +623,24 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
         page_content = "This filetype is not supported."
         if attachment['filetype'] == 'txt' :
             page_content =  self._client.get(endpoint=f"get_attachment/{attachment['id']}")
-        # TODO: add support for other file types
-        # use utility to handle different types (tools/utils)
+        else:
+            try:
+                attachment_path = self._client.attachments.get_attachment(attachment_id=attachment['id'], path=f"./{attachment['filename']}")
+                page_content = parse_file_content(file_name=attachment['filename'], file_content=attachment_path.read_bytes(), llm=self.llm, is_capture_image=True)
+            except BadRequestError as ai_e:
+                logger.error(f"Unable to parse page's content with type: {attachment['filetype']} due to AI service issues: {ai_e}")
+            except Exception as e:
+                logger.error(f"Unable to parse page's content with type: {attachment['filetype']}: {e}")
         return page_content
+    def _index_tool_params(self):
+        return {
+            'project_id': (str, Field(description="TestRail project ID to index data from")),
+            'suite_id': (Optional[str],
+                         Field(default=None, description="Optional TestRail suite ID to filter test cases")),
+            'section_id': (Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
+        }
     def _to_markup(self, data: List[Dict], output_format: str) -> str:
         """
         Converts the given data into the specified format: 'json', 'csv', or 'markdown'.
@@ -689,6 +668,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
         if output_format == "markdown":
             return df.to_markdown(index=False)
+    @extend_with_vector_tools
     def get_available_tools(self):
         tools = [
             {
@@ -726,14 +706,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
                 "ref": self.update_case,
                 "description": self.update_case.__doc__,
                 "args_schema": updateCase,
-            },
-            {
-                "name": "index_data",
-                "ref": self.index_data,
-                "description": self.index_data.__doc__,
-                "args_schema": indexData,
             }
         ]
-        # Add vector search from base
-        tools.extend(self._get_vector_search_tools())
         return tools

alita_sdk/tools/utils/content_parser.py CHANGED Viewed

@@ -11,9 +11,12 @@ import pymupdf
 from langchain_core.tools import ToolException
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from langchain_core.messages import HumanMessage
+from logging import getLogger
 from ...runtime.langchain.tools.utils import bytes_to_base64
+logger = getLogger(__name__)
 image_processing_prompt='''
 You are an AI model designed for analyzing images. Your task is to accurately describe the content of the given image. Depending on the type of image, follow these specific instructions:
@@ -56,13 +59,39 @@ Be as precise and thorough as possible in your responses. If something is unclea
 IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp', 'svg']
-def parse_file_content(file_name, file_content, is_capture_image: bool = False, page_number: int = None, sheet_name: str = None, llm=None):
+def parse_file_content(file_name=None, file_content=None, is_capture_image: bool = False, page_number: int = None,
+                       sheet_name: str = None, llm=None, file_path: str = None, excel_by_sheets: bool = False):
+    """Parse the content of a file based on its type and return the parsed content.
+    Args:
+        file_name (str): The name of the file to parse.
+        file_content (bytes): The content of the file as bytes.
+        is_capture_image (bool): Whether to capture images from the file.
+        page_number (int, optional): The specific page number to parse for PDF or PPTX files.
+        sheet_name (str, optional): The specific sheet name to parse for Excel files.
+        llm: The language model to use for image processing.
+        file_path (str, optional): The path to the file if it needs to be read from disk.
+    Returns:
+        str: The parsed content of the file.
+    Raises:
+        ToolException: If the file type is not supported or if there is an error reading the file.
+        """
+    if (file_path and (file_name or file_content)) or (not file_path and (not file_name or file_content is None)):
+        raise ToolException("Either (file_name and file_content) or file_path must be provided, but not both.")
+    if file_path:
+        file_content = file_to_bytes(file_path)
+        if file_content is None:
+            return ToolException(f"File not found or could not be read: {file_path}")
+        file_name = file_path.split('/')[-1]  # Extract file name from path
     if file_name.endswith('.txt'):
         return parse_txt(file_content)
     elif file_name.endswith('.docx'):
         return read_docx_from_bytes(file_content)
     elif file_name.endswith('.xlsx') or file_name.endswith('.xls'):
-        return parse_excel(file_content, sheet_name)
+        return parse_excel(file_content, sheet_name, excel_by_sheets)
     elif file_name.endswith('.pdf'):
         return parse_pdf(file_content, page_number, is_capture_image, llm)
     elif file_name.endswith('.pptx'):
@@ -80,17 +109,26 @@ def parse_txt(file_content):
     except Exception as e:
         return ToolException(f"Error decoding file content: {e}")
-def parse_excel(file_content, sheet_name = None):
+def parse_excel(file_content, sheet_name = None, return_by_sheets: bool = False):
     try:
         excel_file = io.BytesIO(file_content)
         if sheet_name:
             return parse_sheet(excel_file, sheet_name)
         dfs = pd.read_excel(excel_file, sheet_name=sheet_name)
-        result = []
-        for sheet_name, df in dfs.items():
-            df.fillna('', inplace=True)
-            result.append(f"=== Sheet: {sheet_name} ===\n{df.to_string(index=False)}")
-        return "\n\n".join(result)
+        if return_by_sheets:
+            result = {}
+            for sheet_name, df in dfs.items():
+                df.fillna('', inplace=True)
+                result[sheet_name] = df.to_dict(orient='records')
+            return result
+        else:
+            result = []
+            for sheet_name, df in dfs.items():
+                df.fillna('', inplace=True)
+                string_content = df.to_string(index=False)
+                result.append(f"====== Sheet name: {sheet_name} ======\n{string_content}")
+            return "\n\n".join(result)
     except Exception as e:
         return ToolException(f"Error reading Excel file: {e}")
@@ -165,6 +203,8 @@ def describe_image(image):
     return "\n[Picture: " + processor.decode(out[0], skip_special_tokens=True) + "]\n"
 def __perform_llm_prediction_for_image(llm, image: bytes, image_format='png', prompt=image_processing_prompt) -> str:
+    if not llm:
+        raise ToolException("LLM is not provided for image processing.")
     base64_string = bytes_to_base64(image)
     result = llm.invoke([
         HumanMessage(
@@ -176,4 +216,78 @@ def __perform_llm_prediction_for_image(llm, image: bytes, image_format='png', pr
                 },
             ])
     ])
-    return f"\n[Image description: {result.content}]\n"
+    return f"\n[Image description: {result.content}]\n"
+# TODO: review usage of this function alongside with functions above
+def load_content(file_path: str, extension: str = None, loader_extra_config: dict = None, llm = None) -> str:
+    """
+    Loads the content of a file based on its extension using a configured loader.
+    """
+    try:
+        from ...runtime.langchain.document_loaders.constants import loaders_map
+        if not extension:
+            extension = file_path.split('.')[-1].lower()
+        loader_config = loaders_map.get(extension)
+        if not loader_config:
+            logger.warning(f"No loader found for file extension: {extension}. File: {file_path}")
+            return ""
+        loader_cls = loader_config['class']
+        loader_kwargs = loader_config['kwargs']
+        if loader_extra_config:
+            loader_kwargs.update(loader_extra_config)
+        if loader_config['is_multimodal_processing'] and llm:
+            loader_kwargs.update({'llm': llm})
+        loader = loader_cls(file_path, **loader_kwargs)
+        documents = loader.load()
+        page_contents = [doc.page_content for doc in documents]
+        return "\n".join(page_contents)
+    except Exception as e:
+        error_message = f"Error loading attachment: {str(e)}"
+        logger.warning(f"{error_message} for file {file_path}")
+        return ""
+def load_content_from_bytes(file_content: bytes, extension: str = None, loader_extra_config: dict = None, llm = None) -> str:
+    """Loads the content of a file from bytes based on its extension using a configured loader."""
+    import tempfile
+    # Automatic cleanup with context manager
+    with tempfile.NamedTemporaryFile(mode='w+b', delete=True) as temp_file:
+        # Write data to temp file
+        temp_file.write(file_content)
+        temp_file.flush()  # Ensure data is written
+        # Get the file path for operations
+        temp_path = temp_file.name
+        # Perform your operations
+        return load_content(temp_path, extension, loader_extra_config, llm)
+def file_to_bytes(filepath):
+    """
+    Reads a file and returns its content as a bytes object.
+    Args:
+        filepath (str): The path to the file.
+    Returns:
+        bytes: The content of the file as a bytes object.
+    """
+    try:
+        with open(filepath, "rb") as f:
+            file_content_bytes = f.read()
+        return file_content_bytes
+    except FileNotFoundError:
+        logger.error(f"File not found: {filepath}")
+        return None
+    except Exception as e:
+        logger.error(f"Error reading file {filepath}: {e}")
+        return None

alita_sdk/tools/xray/__init__.py CHANGED Viewed

@@ -20,7 +20,14 @@ def get_tools(tool):
         client_secret=tool['settings'].get('client_secret', None),
         limit=tool['settings'].get('limit', 20),
         verify_ssl=tool['settings'].get('verify_ssl', True),
-        toolkit_name=tool.get('toolkit_name')
+        toolkit_name=tool.get('toolkit_name'),
+        # indexer settings
+        connection_string=tool['settings'].get('connection_string', None),
+        collection_name=f"{tool.get('toolkit_name')}_{str(tool['id'])}",
+        embedding_model="HuggingFaceEmbeddings",
+        embedding_model_params={"model_name": "sentence-transformers/all-MiniLM-L6-v2"},
+        vectorstore_type="PGVector"
     ).get_tools()

alita-sdk 0.3.208__py3-none-any.whl → 0.3.210__py3-none-any.whl

alita-sdk 0.3.208py3-none-any.whl → 0.3.210py3-none-any.whl