PyPI - alita-sdk - Versions diffs - 0.3.205__py3-none-any.whl → 0.3.207__py3-none-any.whl - Mend

alita-sdk 0.3.205py3-none-any.whl → 0.3.207py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

alita_sdk/runtime/clients/client.py +314 -11
alita_sdk/runtime/langchain/assistant.py +22 -21
alita_sdk/runtime/langchain/interfaces/llm_processor.py +1 -4
alita_sdk/runtime/langchain/langraph_agent.py +6 -1
alita_sdk/runtime/langchain/store_manager.py +4 -4
alita_sdk/runtime/toolkits/application.py +5 -10
alita_sdk/runtime/toolkits/tools.py +11 -21
alita_sdk/runtime/tools/vectorstore.py +25 -11
alita_sdk/runtime/utils/streamlit.py +505 -222
alita_sdk/runtime/utils/toolkit_runtime.py +147 -0
alita_sdk/runtime/utils/toolkit_utils.py +157 -0
alita_sdk/runtime/utils/utils.py +5 -0
alita_sdk/tools/__init__.py +2 -0
alita_sdk/tools/ado/repos/repos_wrapper.py +20 -13
alita_sdk/tools/bitbucket/api_wrapper.py +5 -5
alita_sdk/tools/bitbucket/cloud_api_wrapper.py +54 -29
alita_sdk/tools/elitea_base.py +9 -4
alita_sdk/tools/gitlab/__init__.py +22 -10
alita_sdk/tools/gitlab/api_wrapper.py +278 -253
alita_sdk/tools/gitlab/tools.py +354 -376
alita_sdk/tools/llm/llm_utils.py +0 -6
alita_sdk/tools/memory/__init__.py +54 -10
alita_sdk/tools/openapi/__init__.py +14 -3
alita_sdk/tools/sharepoint/__init__.py +2 -1
alita_sdk/tools/sharepoint/api_wrapper.py +11 -3
alita_sdk/tools/testrail/api_wrapper.py +39 -16
alita_sdk/tools/utils/content_parser.py +77 -13
{alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/METADATA +1 -1
{alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/RECORD +32 -40
alita_sdk/community/analysis/__init__.py +0 -0
alita_sdk/community/analysis/ado_analyse/__init__.py +0 -103
alita_sdk/community/analysis/ado_analyse/api_wrapper.py +0 -261
alita_sdk/community/analysis/github_analyse/__init__.py +0 -98
alita_sdk/community/analysis/github_analyse/api_wrapper.py +0 -166
alita_sdk/community/analysis/gitlab_analyse/__init__.py +0 -110
alita_sdk/community/analysis/gitlab_analyse/api_wrapper.py +0 -172
alita_sdk/community/analysis/jira_analyse/__init__.py +0 -141
alita_sdk/community/analysis/jira_analyse/api_wrapper.py +0 -252
alita_sdk/runtime/llms/alita.py +0 -259
{alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/top_level.txt +0 -0

alita_sdk/tools/llm/llm_utils.py CHANGED Viewed

@@ -10,12 +10,6 @@ def get_model(model_type: str, model_params: dict):
         return None
     if model_type in llms:
         return get_llm(model_type)(**model_params)
-    elif model_type == "Alita":
-        try:
-            from alita_sdk.llms.alita import AlitaChatModel
-        except ImportError:
-            raise RuntimeError("Alita model not found")
-        return AlitaChatModel(**model_params)
     elif model_type in chat_models:
         model = getattr(__import__("langchain_community.chat_models", fromlist=[model_type]), model_type)
         return model(**model_params)

alita_sdk/tools/memory/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Optional, List
 from langchain_core.tools import BaseToolkit, BaseTool
-from langgraph.store.postgres import PostgresStore
 try:
     from langmem import create_manage_memory_tool, create_search_memory_tool
 except ImportError:
@@ -15,13 +15,37 @@ from pydantic import create_model, BaseModel, ConfigDict, Field, SecretStr
 name = "memory"
-def get_tools(tool):
-    return MemoryToolkit().get_toolkit(
-        namespace=tool['settings'].get('namespace', str(tool['id'])),
-        username=tool['settings'].get('username', ''),
-        store=tool['settings'].get('store', None),
-        toolkit_name=tool.get('toolkit_name', '')
-).get_tools()
+def get_tools(tools_list: list, alita_client, llm, memory_store=None):
+    """
+    Get memory tools for the provided tool configurations.
+    Args:
+        tools_list: List of tool configurations
+        alita_client: Alita client instance
+        llm: LLM client instance
+        memory_store: Optional memory store instance
+    Returns:
+        List of memory tools
+    """
+    all_tools = []
+    for tool in tools_list:
+        if tool.get('type') == 'memory' or tool.get('toolkit_name') == 'memory':
+            try:
+                toolkit_instance = MemoryToolkit().get_toolkit(
+                    namespace=tool['settings'].get('namespace', str(tool['id'])),
+                    username=tool['settings'].get('username', ''),
+                    store=tool['settings'].get('store', memory_store),
+                    toolkit_name=tool.get('toolkit_name', '')
+                )
+                all_tools.extend(toolkit_instance.get_tools())
+            except Exception as e:
+                print(f"DEBUG: Error in memory toolkit get_tools: {e}")
+                print(f"DEBUG: Tool config: {tool}")
+                raise
+    return all_tools
 class MemoryToolkit(BaseToolkit):
     tools: List[BaseTool] = []
@@ -30,7 +54,7 @@ class MemoryToolkit(BaseToolkit):
     @staticmethod
     def toolkit_config_schema() -> BaseModel:
         return create_model(
-            name,
+            'MemoryConfig',
             namespace=(str, Field(description="Memory namespace", json_schema_extra={'toolkit_name': True})),
             username=(Optional[str], Field(description="Username", default='Tester', json_schema_extra={'hidden': True})),
             connection_string=(Optional[SecretStr], Field(description="Connection string for vectorstore",
@@ -48,7 +72,27 @@ class MemoryToolkit(BaseToolkit):
         )
     @classmethod
-    def get_toolkit(cls, namespace: str, store: PostgresStore, **kwargs):
+    def get_toolkit(cls, namespace: str, store=None, **kwargs):
+        """
+        Get toolkit with memory tools.
+        Args:
+            namespace: Memory namespace
+            store: PostgresStore instance (imported dynamically)
+            **kwargs: Additional arguments
+        """
+        try:
+            from langgraph.store.postgres import PostgresStore
+        except ImportError:
+            raise ImportError(
+                "PostgreSQL dependencies (psycopg) are required for MemoryToolkit. "
+                "Install with: pip install psycopg[binary]"
+            )
+        # Validate store type
+        if store is not None and not isinstance(store, PostgresStore):
+            raise TypeError(f"Expected PostgresStore, got {type(store)}")
         return cls(tools=[
             create_manage_memory_tool(namespace=namespace, store=store),
             create_search_memory_tool(namespace=namespace, store=store)

alita_sdk/tools/openapi/__init__.py CHANGED Viewed

@@ -1,12 +1,15 @@
 import json
 import re
+import logging
 from typing import List, Any, Optional, Dict
-from langchain_core.tools import BaseTool, BaseToolkit
+from langchain_core.tools import BaseTool, BaseToolkit, ToolException
 from requests_openapi import Operation, Client, Server
 from pydantic import create_model, Field
 from functools import partial
+logger = logging.getLogger(__name__)
 name = "openapi"
 def get_tools(tool):
@@ -105,11 +108,19 @@ class AlitaOpenAPIToolkit(BaseToolkit):
             c.requestor.headers.update(headers)
         tools = []
         for i in tools_set:
             try:
+                if not i:
+                    raise ToolException("Operation id is missing for some of declared operations.")
                 tool = c.operations[i]
+                if not isinstance(tool, Operation):
+                    raise ToolException(f"Operation {i} is not an instance of Operation class.")
                 tools.append(create_api_tool(i, tool))
-            except KeyError:
-                ...
+            except ToolException:
+                raise
+            except Exception as e:
+                logger.warning(f"Tool {i} not found in OpenAPI spec.")
+                raise ToolException(f"Cannot create API tool ({i}): \n{e}.")
         return cls(request_session=c, tools=tools)
     def get_tools(self):

alita_sdk/tools/sharepoint/__init__.py CHANGED Viewed

@@ -14,7 +14,8 @@ def get_tools(tool):
         site_url=tool['settings'].get('site_url', None),
         client_id=tool['settings'].get('client_id', None),
         client_secret=tool['settings'].get('client_secret', None),
-        toolkit_name=tool.get('toolkit_name'))
+        toolkit_name=tool.get('toolkit_name'),
+        llm=tool['settings'].get('llm'))
             .get_tools())

alita_sdk/tools/sharepoint/api_wrapper.py CHANGED Viewed

@@ -32,7 +32,10 @@ ReadDocument = create_model(
     "ReadDocument",
     path=(str, Field(description="Contains the server-relative path of a document for reading.")),
     is_capture_image=(Optional[bool], Field(description="Determines is pictures in the document should be recognized.", default=False)),
-    page_number=(Optional[int], Field(description="Specifies which page to read. If it is None, then full document will be read.", default=None))
+    page_number=(Optional[int], Field(description="Specifies which page to read. If it is None, then full document will be read.", default=None)),
+    sheet_name=(Optional[str], Field(
+                        description="Specifies which sheet to read. If it is None, then full document will be read.",
+                        default=None))
 )
 indexData = create_model(
@@ -139,7 +142,7 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
             logging.error(f"Failed to load files from sharepoint: {e}")
             return ToolException("Can not get files. Please, double check folder name and read permissions.")
-    def read_file(self, path, is_capture_image: bool = False, page_number: int = None):
+    def read_file(self, path, is_capture_image: bool = False, page_number: int = None, sheet_name: str=None):
         """ Reads file located at the specified server-relative path. """
         try:
             file = self._client.web.get_file_by_server_relative_path(path)
@@ -150,7 +153,12 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
         except Exception as e:
             logging.error(f"Failed to load file from SharePoint: {e}. Path: {path}. Please, double check file name and path.")
             return ToolException("File not found. Please, check file name and path.")
-        return parse_file_content(file.name, file_content, is_capture_image, page_number)
+        return parse_file_content(file_name=file.name,
+                                  file_content=file_content,
+                                  is_capture_image=is_capture_image,
+                                  page_number=page_number,
+                                  sheet_name=sheet_name,
+                                  llm=self.llm)
     def _base_loader(self) -> List[Document]:
         try:

alita_sdk/tools/testrail/api_wrapper.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 import logging
-from typing import Dict, List, Optional, Union, Any
+from typing import Dict, List, Optional, Union, Any, Generator
 import pandas as pd
 from langchain_core.tools import ToolException
@@ -9,6 +9,9 @@ from pydantic.fields import Field, PrivateAttr
 from testrail_api import StatusCodeError, TestRailAPI
 from ..elitea_base import BaseVectorStoreToolApiWrapper, BaseIndexParams
 from langchain_core.documents import Document
+from ...runtime.utils.utils import IndexerKeywords
 try:
     from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
 except ImportError:
@@ -551,7 +554,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
                      suite_id: Optional[str] = None,
                      section_id: Optional[int] = None,
                      title_keyword: Optional[str] = None
-                     ) -> List[Document]:
+                     ) -> Generator[Document, None, None]:
         try:
             if suite_id:
                 resp = self._client.cases.get_cases(project_id=project_id, suite_id=int(suite_id))
@@ -567,16 +570,22 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
         if title_keyword is not None:
             cases = [case for case in cases if title_keyword.lower() in case.get('title', '').lower()]
-        docs: List[Document] = []
         for case in cases:
-            docs.append(Document(page_content=json.dumps(case), metadata={
+            yield Document(page_content=json.dumps(case), metadata={
                 'project_id': project_id,
                 'title': case.get('title', ''),
                 'suite_id': suite_id or case.get('suite_id', ''),
                 'id': str(case.get('id', '')),
-                'updated_on': case.get('updated_on', ''),
-            }))
-        return docs
+                'updated_on': case.get('updated_on') or -1,
+                'labels': [lbl['title'] for lbl in case.get('labels', [])],
+                'type': case.get('type_id') or -1,
+                'priority': case.get('priority_id') or -1,
+                'milestone': case.get('milestone_id') or -1,
+                'estimate': case.get('estimate') or '',
+                'automation_type': case.get('custom_automation_type') or -1,
+                'section_id': case.get('section_id') or -1,
+                'entity_type': 'test_case',
+            })
     def index_data(
             self,
@@ -594,7 +603,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
         vs = self._init_vector_store(collection_suffix, embeddings=embedding)
         return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
-    def _process_document(self, document: Document) -> Document:
+    def _process_document(self, document: Document) -> Generator[Document, None, None]:
         """
         Process an existing base document to extract relevant metadata for full document preparation.
         Used for late processing of documents after we ensure that the document has to be indexed to avoid
@@ -604,7 +613,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
             document (Document): The base document to process.
         Returns:
-            Document: The processed document with metadata.
+            Generator[Document, None, None]: A generator yielding processed Document objects with metadata.
         """
         try:
             # get base data from the document required to extract attachments and other metadata
@@ -613,14 +622,25 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
             # get a list of attachments for the case
             attachments = self._client.attachments.get_attachments_for_case_bulk(case_id=case_id)
-            attachments_data = {}
             # process each attachment to extract its content
             for attachment in attachments:
-                attachments_data[attachment['filename']] = self._process_attachment(attachment)
-            base_data['attachments'] = attachments_data
-            document.page_content = json.dumps(base_data)
-            return document
+                attachment_id = attachment['id']
+                # add attachment id to metadata of parent
+                document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
+                # TODO: pass it to chunkers
+                yield Document(page_content=self._process_attachment(attachment),
+                                                     metadata={
+                                                         'project_id': base_data.get('project_id', ''),
+                                                         IndexerKeywords.PARENT.value: case_id,
+                                                         'id': attachment_id,
+                                                         'filename': attachment['filename'],
+                                                         'filetype': attachment['filetype'],
+                                                         'created_on': attachment['created_on'],
+                                                         'entity_type': 'test_case_attachment',
+                                                         'is_image': attachment['is_image'],
+                                                     })
         except json.JSONDecodeError as e:
             raise ToolException(f"Failed to decode JSON from document: {e}")
@@ -634,10 +654,13 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
         Returns:
             str: string description of the attachment.
         """
+        page_content = "This filetype is not supported."
         if attachment['filetype'] == 'txt' :
-            return self._client.get(endpoint=f"get_attachment/{attachment['id']}")
+            page_content =  self._client.get(endpoint=f"get_attachment/{attachment['id']}")
         # TODO: add support for other file types
-        return "This filetype is not supported."
+        # use utility to handle different types (tools/utils)
+        return page_content
     def _to_markup(self, data: List[Dict], output_format: str) -> str:
         """

alita_sdk/tools/utils/content_parser.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import re
 from docx import Document
 from io import BytesIO
 import pandas as pd
@@ -8,8 +10,53 @@ import io
 import pymupdf
 from langchain_core.tools import ToolException
 from transformers import BlipProcessor, BlipForConditionalGeneration
+from langchain_core.messages import HumanMessage
+from ...runtime.langchain.tools.utils import bytes_to_base64
+image_processing_prompt='''
+You are an AI model designed for analyzing images. Your task is to accurately describe the content of the given image. Depending on the type of image, follow these specific instructions:
+If the image is a diagram (e.g., chart, table, pie chart, bar graph, etc.):
+Identify the type of diagram.
+Extract all numerical values, labels, axis titles, headings, legends, and any other textual elements.
+Describe the relationships or trends between the data, if visible.
+If the image is a screenshot:
+Describe what is shown in the screenshot.
+If it is a software interface, identify the program or website name (if visible).
+List the key interface elements (e.g., buttons, menus, text fields, images, headers).
+If there is text, extract it.
+If the screenshot shows a conversation, describe the participants, the content of the messages, and timestamps (if visible).
+If the image is a photograph:
+Describe the main objects, people, animals, or elements visible in the photo.
+Specify the setting (e.g., indoors, outdoors, nature, urban area).
+If possible, identify the actions being performed by people or objects in the photo.
+If the image is an illustration or drawing:
-def parse_file_content(file_name, file_content, is_capture_image: bool = False, page_number: int = None, sheet_name: str = None):
+Describe the style of the illustration (e.g., realistic, cartoonish, abstract).
+Identify the main elements, their colors, and the composition of the image.
+If there is text, extract it.
+If the image contains text:
+Extract all text from the image.
+Specify the format of the text (e.g., heading, paragraph, list).
+If the image is a mixed type (e.g., a diagram within a screenshot):
+Identify all types of content present in the image.
+Perform an analysis for each type of content separately, following the relevant instructions above.
+If the image does not fit into any of the above categories:
+Provide a detailed description of what is shown in the image.
+Highlight any visible details that could help in understanding the image.
+Be as precise and thorough as possible in your responses. If something is unclear or illegible, state that explicitly.
+'''
+IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp', 'svg']
+def parse_file_content(file_name, file_content, is_capture_image: bool = False, page_number: int = None, sheet_name: str = None, llm=None):
     if file_name.endswith('.txt'):
         return parse_txt(file_content)
     elif file_name.endswith('.docx'):
@@ -17,9 +64,12 @@ def parse_file_content(file_name, file_content, is_capture_image: bool = False,
     elif file_name.endswith('.xlsx') or file_name.endswith('.xls'):
         return parse_excel(file_content, sheet_name)
     elif file_name.endswith('.pdf'):
-        return parse_pdf(file_content, page_number, is_capture_image)
+        return parse_pdf(file_content, page_number, is_capture_image, llm)
     elif file_name.endswith('.pptx'):
-        return parse_pptx(file_content, page_number, is_capture_image)
+        return parse_pptx(file_content, page_number, is_capture_image, llm)
+    elif any(file_name.lower().endswith(f".{ext}") for ext in IMAGE_EXTENSIONS):
+        match = re.search(r'\.([a-zA-Z0-9]+)$', file_name)
+        return __perform_llm_prediction_for_image(llm, file_content, match.group(1), image_processing_prompt)
     else:
         return ToolException(
             "Not supported type of files entered. Supported types are TXT, DOCX, PDF, PPTX, XLSX and XLS only.")
@@ -49,28 +99,28 @@ def parse_sheet(excel_file, sheet_name):
     df.fillna('', inplace=True)
     return df.to_string()
-def parse_pdf(file_content, page_number, is_capture_image):
+def parse_pdf(file_content, page_number, is_capture_image, llm):
     with pymupdf.open(stream=file_content, filetype="pdf") as report:
         text_content = ''
         if page_number is not None:
             page = report.load_page(page_number - 1)
-            text_content += read_pdf_page(report, page, page_number, is_capture_image)
+            text_content += read_pdf_page(report, page, page_number, is_capture_image, llm)
         else:
             for index, page in enumerate(report, start=1):
-                text_content += read_pdf_page(report, page, index, is_capture_image)
+                text_content += read_pdf_page(report, page, index, is_capture_image, llm)
         return text_content
-def parse_pptx(file_content, page_number, is_capture_image):
+def parse_pptx(file_content, page_number, is_capture_image, llm=None):
     prs = Presentation(io.BytesIO(file_content))
     text_content = ''
     if page_number is not None:
-        text_content += read_pptx_slide(prs.slides[page_number - 1], page_number, is_capture_image)
+        text_content += read_pptx_slide(prs.slides[page_number - 1], page_number, is_capture_image, llm)
     else:
         for index, slide in enumerate(prs.slides, start=1):
-            text_content += read_pptx_slide(slide, index, is_capture_image)
+            text_content += read_pptx_slide(slide, index, is_capture_image, llm)
     return text_content
-def read_pdf_page(report, page, index, is_capture_images):
+def read_pdf_page(report, page, index, is_capture_images, llm=None):
     text_content = f'Page: {index}\n'
     text_content += page.get_text()
     if is_capture_images:
@@ -79,7 +129,7 @@ def read_pdf_page(report, page, index, is_capture_images):
             xref = img[0]
             base_image = report.extract_image(xref)
             img_bytes = base_image["image"]
-            text_content += describe_image(Image.open(io.BytesIO(img_bytes)).convert("RGB"))
+            text_content += __perform_llm_prediction_for_image(llm, img_bytes)
     return text_content
 def read_docx_from_bytes(file_content):
@@ -94,14 +144,14 @@ def read_docx_from_bytes(file_content):
         print(f"Error reading .docx from bytes: {e}")
         return ""
-def read_pptx_slide(slide, index, is_capture_image):
+def read_pptx_slide(slide, index, is_capture_image, llm):
     text_content = f'Slide: {index}\n'
     for shape in slide.shapes:
         if hasattr(shape, "text"):
             text_content += shape.text + "\n"
         elif is_capture_image and shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
             try:
-                caption = describe_image(Image.open(io.BytesIO(shape.image.blob)).convert("RGB"))
+                caption = __perform_llm_prediction_for_image(llm, shape.image.blob)
             except:
                 caption = "\n[Picture: unknown]\n"
             text_content += caption
@@ -113,3 +163,17 @@ def describe_image(image):
     inputs = processor(image, return_tensors="pt")
     out = model.generate(**inputs)
     return "\n[Picture: " + processor.decode(out[0], skip_special_tokens=True) + "]\n"
+def __perform_llm_prediction_for_image(llm, image: bytes, image_format='png', prompt=image_processing_prompt) -> str:
+    base64_string = bytes_to_base64(image)
+    result = llm.invoke([
+        HumanMessage(
+            content=[
+                {"type": "text", "text": prompt},
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/{image_format};base64,{base64_string}"},
+                },
+            ])
+    ])
+    return f"\n[Image description: {result.content}]\n"

{alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.205
+Version: 0.3.207
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0

alita-sdk 0.3.205__py3-none-any.whl → 0.3.207__py3-none-any.whl

alita-sdk 0.3.205py3-none-any.whl → 0.3.207py3-none-any.whl