PyPI - alita-sdk - Versions diffs - 0.3.204__py3-none-any.whl → 0.3.205__py3-none-any.whl - Mend

alita-sdk 0.3.204py3-none-any.whl → 0.3.205py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

alita_sdk/runtime/tools/vectorstore.py +143 -13
alita_sdk/tools/__init__.py +2 -0
alita_sdk/tools/aws/__init__.py +7 -0
alita_sdk/tools/aws/delta_lake/__init__.py +136 -0
alita_sdk/tools/aws/delta_lake/api_wrapper.py +220 -0
alita_sdk/tools/aws/delta_lake/schemas.py +20 -0
alita_sdk/tools/aws/delta_lake/tool.py +35 -0
alita_sdk/tools/elitea_base.py +49 -4
alita_sdk/tools/google/__init__.py +7 -0
alita_sdk/tools/google/bigquery/__init__.py +154 -0
alita_sdk/tools/google/bigquery/api_wrapper.py +502 -0
alita_sdk/tools/google/bigquery/schemas.py +102 -0
alita_sdk/tools/google/bigquery/tool.py +34 -0
alita_sdk/tools/sharepoint/api_wrapper.py +60 -4
alita_sdk/tools/testrail/__init__.py +9 -1
alita_sdk/tools/testrail/api_wrapper.py +132 -6
alita_sdk/tools/zephyr_scale/api_wrapper.py +271 -22
{alita_sdk-0.3.204.dist-info → alita_sdk-0.3.205.dist-info}/METADATA +3 -1
{alita_sdk-0.3.204.dist-info → alita_sdk-0.3.205.dist-info}/RECORD +22 -12
{alita_sdk-0.3.204.dist-info → alita_sdk-0.3.205.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.204.dist-info → alita_sdk-0.3.205.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.204.dist-info → alita_sdk-0.3.205.dist-info}/top_level.txt +0 -0

alita_sdk/tools/sharepoint/api_wrapper.py CHANGED Viewed

@@ -1,5 +1,6 @@
+import json
 import logging
-from typing import Optional
+from typing import Optional, List, Dict, Any
 from ..utils.content_parser import parse_file_content
 from langchain_core.tools import ToolException
@@ -7,7 +8,9 @@ from office365.runtime.auth.client_credential import ClientCredential
 from office365.sharepoint.client_context import ClientContext
 from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
-from ..elitea_base import BaseToolApiWrapper
+from ..elitea_base import BaseToolApiWrapper, BaseIndexParams, BaseVectorStoreToolApiWrapper
+from ...runtime.langchain.interfaces.llm_processor import get_embeddings
+from langchain_core.documents import Document
 NoInput = create_model(
     "NoInput"
@@ -32,14 +35,30 @@ ReadDocument = create_model(
     page_number=(Optional[int], Field(description="Specifies which page to read. If it is None, then full document will be read.", default=None))
 )
+indexData = create_model(
+    "indexData",
+    __base__=BaseIndexParams,
+    progress_step=(Optional[int], Field(default=None, ge=0, le=100,
+                         description="Optional step size for progress reporting during indexing")),
+    clean_index=(Optional[bool], Field(default=False,
+                       description="Optional flag to enforce clean existing index before indexing new data")),
+)
-class SharepointApiWrapper(BaseToolApiWrapper):
+class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
     site_url: str
     client_id: str = None
     client_secret: SecretStr = None
     token: SecretStr = None
     _client: Optional[ClientContext] = PrivateAttr()  # Private attribute for the office365 client
+    llm: Any = None
+    connection_string: Optional[SecretStr] = None
+    collection_name: Optional[str] = None
+    embedding_model: Optional[str] = "HuggingFaceEmbeddings"
+    embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
+    vectorstore_type: Optional[str] = "PGVector"
     @model_validator(mode='before')
     @classmethod
     def validate_toolkit(cls, values):
@@ -111,7 +130,8 @@ class SharepointApiWrapper(BaseToolApiWrapper):
                     'Path': file.properties['ServerRelativeUrl'],
                     'Created': file.properties['TimeCreated'],
                     'Modified': file.properties['TimeLastModified'],
-                    'Link': file.properties['LinkingUrl']
+                    'Link': file.properties['LinkingUrl'],
+                    'id': file.properties['UniqueId']
                 }
                 result.append(temp_props)
             return result if result else ToolException("Can not get files or folder is empty. Please, double check folder name and read permissions.")
@@ -132,6 +152,36 @@ class SharepointApiWrapper(BaseToolApiWrapper):
             return ToolException("File not found. Please, check file name and path.")
         return parse_file_content(file.name, file_content, is_capture_image, page_number)
+    def _base_loader(self) -> List[Document]:
+        try:
+            all_files = self.get_files_list()
+        except Exception as e:
+            raise ToolException(f"Unable to extract files: {e}")
+        docs: List[Document] = []
+        for file in all_files:
+            metadata = {
+                ("updated_at" if k == "Modified" else k): str(v)
+                for k, v in file.items()
+            }
+            docs.append(Document(page_content="", metadata=metadata))
+        return docs
+    def index_data(self,
+                   collection_suffix: str = '',
+                   progress_step: int = None,
+                   clean_index: bool = False):
+        docs = self._base_loader()
+        embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
+        vs = self._init_vector_store(collection_suffix, embeddings=embedding)
+        return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
+    def _process_document(self, document: Document) -> Document:
+        page_content = self.read_file(document.metadata['Path'], is_capture_image=True)
+        document.page_content = json.dumps(str(page_content))
+        return document
     def get_available_tools(self):
         return [
             {
@@ -151,5 +201,11 @@ class SharepointApiWrapper(BaseToolApiWrapper):
                 "description": self.read_file.__doc__,
                 "args_schema": ReadDocument,
                 "ref": self.read_file
+            },
+            {
+                "name": "index_data",
+                "ref": self.index_data,
+                "description": self.index_data.__doc__,
+                "args_schema": indexData,
             }
         ]

alita_sdk/tools/testrail/__init__.py CHANGED Viewed

@@ -16,7 +16,15 @@ def get_tools(tool):
         url=tool['settings']['url'],
         password=tool['settings'].get('password', None),
         email=tool['settings'].get('email', None),
-        toolkit_name=tool.get('toolkit_name')
+        toolkit_name=tool.get('toolkit_name'),
+        llm=tool['settings'].get('llm', None),
+        # indexer settings
+        connection_string=tool['settings'].get('connection_string', None),
+        collection_name=f"{tool.get('toolkit_name')}_{str(tool['id'])}",
+        embedding_model="HuggingFaceEmbeddings",
+        embedding_model_params={"model_name": "sentence-transformers/all-MiniLM-L6-v2"},
+        vectorstore_type="PGVector"
     ).get_tools()

alita_sdk/tools/testrail/api_wrapper.py CHANGED Viewed

@@ -1,14 +1,18 @@
 import json
 import logging
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union, Any
 import pandas as pd
 from langchain_core.tools import ToolException
 from pydantic import SecretStr, create_model, model_validator
 from pydantic.fields import Field, PrivateAttr
 from testrail_api import StatusCodeError, TestRailAPI
-from ..elitea_base import BaseToolApiWrapper
+from ..elitea_base import BaseVectorStoreToolApiWrapper, BaseIndexParams
+from langchain_core.documents import Document
+try:
+    from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
+except ImportError:
+    from alita_sdk.langchain.interfaces.llm_processor import get_embeddings
 logger = logging.getLogger(__name__)
@@ -281,6 +285,19 @@ updateCase = create_model(
     ),
 )
+# Schema for indexing TestRail data into vector store
+indexData = create_model(
+    "indexData",
+    __base__=BaseIndexParams,
+    project_id=(str, Field(description="TestRail project ID to index data from")),
+    suite_id=(Optional[str], Field(default=None, description="Optional TestRail suite ID to filter test cases")),
+    section_id=(Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
+    title_keyword=(Optional[str], Field(default=None, description="Optional keyword to filter test cases by title")),
+    progress_step=(Optional[int],
+                   Field(default=None, ge=0, le=100, description="Optional step size for progress reporting during indexing")),
+    clean_index=(Optional[bool],
+                       Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
+)
 SUPPORTED_KEYS = {
     "id", "title", "section_id", "template_id", "type_id", "priority_id", "milestone_id",
@@ -291,11 +308,19 @@ SUPPORTED_KEYS = {
 }
-class TestrailAPIWrapper(BaseToolApiWrapper):
+class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
     url: str
     password: Optional[SecretStr] = None,
     email: Optional[str] = None,
     _client: Optional[TestRailAPI] = PrivateAttr() # Private attribute for the TestRail client
+    llm: Any = None
+    connection_string: Optional[SecretStr] = None
+    collection_name: Optional[str] = None
+    embedding_model: Optional[str] = "HuggingFaceEmbeddings"
+    embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
+    vectorstore_type: Optional[str] = "PGVector"
     @model_validator(mode="before")
     @classmethod
@@ -492,7 +517,7 @@ class TestrailAPIWrapper(BaseToolApiWrapper):
         you can submit and update specific fields only).
         :param case_id: T
-            he ID of the test case
+            He ID of the test case
         :param kwargs:
             :key title: str
                 The title of the test case
@@ -522,6 +547,98 @@ class TestrailAPIWrapper(BaseToolApiWrapper):
             f"Test case #{case_id} has been updated at '{updated_case['updated_on']}')"
         )
+    def _base_loader(self, project_id: str,
+                     suite_id: Optional[str] = None,
+                     section_id: Optional[int] = None,
+                     title_keyword: Optional[str] = None
+                     ) -> List[Document]:
+        try:
+            if suite_id:
+                resp = self._client.cases.get_cases(project_id=project_id, suite_id=int(suite_id))
+                cases = resp.get('cases', [])
+            else:
+                resp = self._client.cases.get_cases(project_id=project_id)
+                cases = resp.get('cases', [])
+        except StatusCodeError as e:
+            raise ToolException(f"Unable to extract test cases: {e}")
+            # Apply filters
+        if section_id is not None:
+            cases = [case for case in cases if case.get('section_id') == section_id]
+        if title_keyword is not None:
+            cases = [case for case in cases if title_keyword.lower() in case.get('title', '').lower()]
+        docs: List[Document] = []
+        for case in cases:
+            docs.append(Document(page_content=json.dumps(case), metadata={
+                'project_id': project_id,
+                'title': case.get('title', ''),
+                'suite_id': suite_id or case.get('suite_id', ''),
+                'id': str(case.get('id', '')),
+                'updated_on': case.get('updated_on', ''),
+            }))
+        return docs
+    def index_data(
+            self,
+            project_id: str,
+            suite_id: Optional[str] = None,
+            collection_suffix: str = "",
+            section_id: Optional[int] = None,
+            title_keyword: Optional[str] = None,
+            progress_step: Optional[int] = None,
+            clean_index: Optional[bool] = False
+    ):
+        """Load TestRail test cases into the vector store."""
+        docs = self._base_loader(project_id, suite_id, section_id, title_keyword)
+        embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
+        vs = self._init_vector_store(collection_suffix, embeddings=embedding)
+        return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
+    def _process_document(self, document: Document) -> Document:
+        """
+        Process an existing base document to extract relevant metadata for full document preparation.
+        Used for late processing of documents after we ensure that the document has to be indexed to avoid
+        time-consuming operations for documents which might be useless.
+        Args:
+            document (Document): The base document to process.
+        Returns:
+            Document: The processed document with metadata.
+        """
+        try:
+            # get base data from the document required to extract attachments and other metadata
+            base_data = json.loads(document.page_content)
+            case_id = base_data.get("id")
+            # get a list of attachments for the case
+            attachments = self._client.attachments.get_attachments_for_case_bulk(case_id=case_id)
+            attachments_data = {}
+            # process each attachment to extract its content
+            for attachment in attachments:
+                attachments_data[attachment['filename']] = self._process_attachment(attachment)
+            base_data['attachments'] = attachments_data
+            document.page_content = json.dumps(base_data)
+            return document
+        except json.JSONDecodeError as e:
+            raise ToolException(f"Failed to decode JSON from document: {e}")
+    def _process_attachment(self, attachment: Dict[str, Any]) -> str:
+        """
+        Processes an attachment to extract its content.
+        Args:
+            attachment (Dict[str, Any]): The attachment data.
+        Returns:
+            str: string description of the attachment.
+        """
+        if attachment['filetype'] == 'txt' :
+            return self._client.get(endpoint=f"get_attachment/{attachment['id']}")
+        # TODO: add support for other file types
+        return "This filetype is not supported."
     def _to_markup(self, data: List[Dict], output_format: str) -> str:
         """
         Converts the given data into the specified format: 'json', 'csv', or 'markdown'.
@@ -550,7 +667,7 @@ class TestrailAPIWrapper(BaseToolApiWrapper):
             return df.to_markdown(index=False)
     def get_available_tools(self):
-        return [
+        tools = [
             {
                 "name": "get_case",
                 "ref": self.get_case,
@@ -587,4 +704,13 @@ class TestrailAPIWrapper(BaseToolApiWrapper):
                 "description": self.update_case.__doc__,
                 "args_schema": updateCase,
             },
+            {
+                "name": "index_data",
+                "ref": self.index_data,
+                "description": self.index_data.__doc__,
+                "args_schema": indexData,
+            }
         ]
+        # Add vector search from base
+        tools.extend(self._get_vector_search_tools())
+        return tools

alita-sdk 0.3.204__py3-none-any.whl → 0.3.205__py3-none-any.whl

alita-sdk 0.3.204py3-none-any.whl → 0.3.205py3-none-any.whl