PyPI - alita-sdk - Versions diffs - 0.3.209__py3-none-any.whl → 0.3.210__py3-none-any.whl - Mend

alita-sdk 0.3.209py3-none-any.whl → 0.3.210py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

alita_sdk/runtime/clients/artifact.py +18 -4
alita_sdk/runtime/langchain/document_loaders/AlitaCSVLoader.py +2 -1
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +3 -3
alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +8 -4
alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
alita_sdk/runtime/langchain/langraph_agent.py +1 -1
alita_sdk/runtime/toolkits/artifact.py +7 -3
alita_sdk/runtime/toolkits/tools.py +8 -1
alita_sdk/runtime/tools/application.py +2 -0
alita_sdk/runtime/tools/artifact.py +65 -8
alita_sdk/runtime/tools/vectorstore.py +125 -41
alita_sdk/runtime/utils/utils.py +3 -0
alita_sdk/tools/ado/__init__.py +8 -0
alita_sdk/tools/ado/repos/repos_wrapper.py +37 -0
alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +0 -7
alita_sdk/tools/ado/work_item/__init__.py +4 -0
alita_sdk/tools/ado/work_item/ado_wrapper.py +37 -4
alita_sdk/tools/aws/delta_lake/__init__.py +1 -1
alita_sdk/tools/bitbucket/__init__.py +13 -1
alita_sdk/tools/bitbucket/api_wrapper.py +31 -4
alita_sdk/tools/bitbucket/cloud_api_wrapper.py +31 -0
alita_sdk/tools/chunkers/code/codeparser.py +18 -10
alita_sdk/tools/confluence/api_wrapper.py +35 -134
alita_sdk/tools/confluence/loader.py +30 -28
alita_sdk/tools/elitea_base.py +112 -11
alita_sdk/tools/figma/__init__.py +13 -1
alita_sdk/tools/figma/api_wrapper.py +47 -3
alita_sdk/tools/github/api_wrapper.py +8 -0
alita_sdk/tools/github/github_client.py +18 -0
alita_sdk/tools/gitlab/__init__.py +4 -0
alita_sdk/tools/gitlab/api_wrapper.py +10 -0
alita_sdk/tools/google/bigquery/__init__.py +1 -1
alita_sdk/tools/jira/__init__.py +21 -13
alita_sdk/tools/jira/api_wrapper.py +285 -5
alita_sdk/tools/sharepoint/__init__.py +11 -1
alita_sdk/tools/sharepoint/api_wrapper.py +23 -53
alita_sdk/tools/testrail/__init__.py +4 -0
alita_sdk/tools/testrail/api_wrapper.py +21 -54
alita_sdk/tools/utils/content_parser.py +72 -8
alita_sdk/tools/xray/__init__.py +8 -1
alita_sdk/tools/xray/api_wrapper.py +505 -14
alita_sdk/tools/zephyr_scale/api_wrapper.py +5 -5
{alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/METADATA +1 -1
{alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/RECORD +47 -47
{alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/top_level.txt +0 -0

alita_sdk/tools/jira/api_wrapper.py CHANGED Viewed

@@ -4,17 +4,20 @@ import re
 import traceback
 from json import JSONDecodeError
 from traceback import format_exc
-from typing import List, Optional, Any, Dict
+from typing import List, Optional, Any, Dict, Generator
 import os
 from atlassian import Jira
+from langchain_core.documents import Document
 from langchain_core.tools import ToolException
 from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
 import requests
-from ..elitea_base import BaseToolApiWrapper
+from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
 from ..llm.img_utils import ImageDescriptionCache
 from ..utils import is_cookie_token, parse_cookie_string
+from ..utils.content_parser import parse_file_content, load_content_from_bytes
+from ...runtime.utils.utils import IndexerKeywords
 logger = logging.getLogger(__name__)
@@ -388,7 +391,7 @@ def process_search_response(jira_url, response, payload_params: Dict[str, Any] =
     return str(processed_issues)
-class JiraApiWrapper(BaseToolApiWrapper):
+class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
     base_url: str
     api_version: Optional[str] = "2",
     api_key: Optional[SecretStr] = None,
@@ -402,7 +405,6 @@ class JiraApiWrapper(BaseToolApiWrapper):
     _client: Jira = PrivateAttr()
     _image_cache: ImageDescriptionCache = PrivateAttr(default_factory=lambda: ImageDescriptionCache(max_size=50))
     issue_search_pattern: str = r'/rest/api/\d+/search'
-    llm: Any = None
     @model_validator(mode='before')
     @classmethod
@@ -1061,7 +1063,7 @@ class JiraApiWrapper(BaseToolApiWrapper):
             def process_image_match(match):
                 """Process each image reference and get its contextual description"""
                 image_ref = match.group(1)
-                full_match = match.group(0)  # The complete image reference with markers
+                full_match = match.group(0) # The complete image reference with markers
                 logger.info(f"Processing image reference: {image_ref} (full match: {full_match})")
@@ -1221,6 +1223,284 @@ class JiraApiWrapper(BaseToolApiWrapper):
             logger.error(f"Error processing comments with images: {stacktrace}")
             return f"Error processing comments with images: {str(e)}"
+    def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
+        """
+        Base loader for Jira issues, used to load issues as documents.
+        Uses the existing Jira client instance to fetch and process issues.
+        """
+        # Extract parameters from kwargs
+        jql = kwargs.get('jql')
+        fields_to_extract = kwargs.get('fields_to_extract')
+        fields_to_index = kwargs.get('fields_to_index')
+        include_attachments = kwargs.get('include_attachments', False)
+        max_total_issues = kwargs.get('max_total_issues', 1000)
+        # set values for skipped attachment extensions
+        self._skipped_attachment_extensions = kwargs.get('skip_attachment_extensions', [])
+        self._included_fields = fields_to_extract.copy() if fields_to_extract else []
+        try:
+            # Prepare fields to extract
+            DEFAULT_FIELDS = ['status', 'summary', 'reporter', 'description', 'created', 'updated', 'assignee', 'project', 'issuetype']
+            fields = DEFAULT_FIELDS.copy()
+            if fields_to_extract:
+                fields.extend(fields_to_extract)
+            if include_attachments:
+                fields.append('attachment')
+            # Use provided JQL query or default to all issues
+            if not jql:
+                jql_query = "ORDER BY updated DESC"  # Default to get all issues ordered by update time
+            else:
+                jql_query = jql
+            # Remove duplicates and prepare fields
+            final_fields = ','.join({field.lower() for field in fields})
+            # Fetch issues using the existing Jira client
+            issue_generator = self._jql_get_tickets(
+                jql_query,
+                fields=final_fields,
+                limit=max_total_issues
+            )
+            # Process each batch of issues
+            for issues_batch in issue_generator:
+                for issue in issues_batch:
+                    issue_doc = self._process_issue_for_indexing(
+                        issue,
+                        fields_to_index
+                    )
+                    if issue_doc:
+                        yield issue_doc
+        except Exception as e:
+            logger.error(f"Error loading Jira issues: {str(e)}")
+            raise ToolException(f"Unable to load Jira issues: {str(e)}")
+    def _process_document(self, base_document: Document) -> Generator[Document, None, None]:
+        """
+        Process a base document to extract and index Jira issues extra fields: comments, attachments, etc..
+        """
+        issue_key = base_document.metadata.get('issue_key')
+        # get attachments content
+        issue = self._client.issue(issue_key, fields="attachment")
+        attachments = issue.get('fields', {}).get('attachment', [])
+        for attachment in attachments:
+            # get extension
+            ext = f".{attachment['filename'].split('.')[-1].lower()}"
+            if ext not in self._skipped_attachment_extensions:
+                attachment_id = f"attach_{attachment['id']}"
+                base_document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
+                try:
+                    attachment_content = self._client.get_attachment_content(attachment['id'])
+                except Exception as e:
+                    logger.error(f"Failed to download attachment {attachment['filename']} for issue {issue_key}: {str(e)}")
+                    attachment_content = self._client.get(path=f"secure/attachment/{attachment['id']}/{attachment['filename']}", not_json_response=True)
+                content = load_content_from_bytes(attachment_content, ext, llm=self.llm) if ext not in '.pdf' \
+                    else parse_file_content(file_content=attachment_content, file_name=attachment['filename'], llm=self.llm, is_capture_image=True)
+                if not content:
+                    continue
+                yield Document(page_content=content,
+                               metadata={
+                                   'id': attachment_id,
+                                   'issue_key': issue_key,
+                                   'source': f"{self.base_url}/browse/{issue_key}",
+                                   'filename': attachment['filename'],
+                                   'created': attachment['created'],
+                                   'mimeType': attachment['mimeType'],
+                                   'author': attachment.get('author', {}).get('name'),
+                                   IndexerKeywords.PARENT.value: base_document.metadata.get('id', None),
+                                   'type': 'attachment',
+                               })
+    def _jql_get_tickets(self, jql, fields="*all", start=0, limit=None, expand=None, validate_query=None):
+        """
+        Generator that yields batches of Jira issues based on JQL query.
+        """
+        from atlassian.errors import ApiError
+        params = {}
+        if limit is not None:
+            params["maxResults"] = int(limit)
+        if fields is not None:
+            if isinstance(fields, (list, tuple, set)):
+                fields = ",".join(fields)
+            params["fields"] = fields
+        if jql is not None:
+            params["jql"] = jql
+        if expand is not None:
+            params["expand"] = expand
+        if validate_query is not None:
+            params["validateQuery"] = validate_query
+        url = self._client.resource_url("search")
+        while True:
+            params["startAt"] = int(start)
+            try:
+                response = self._client.get(url, params=params)
+                if not response:
+                    break
+            except ApiError as e:
+                error_message = f"Jira API error: {str(e)}"
+                raise ValueError(f"Failed to fetch issues from Jira: {error_message}")
+            issues = response["issues"]
+            yield issues
+            if limit is not None and len(response["issues"]) + start >= limit:
+                break
+            if not response["issues"]:
+                break
+            start += len(issues)
+    def _process_issue_for_indexing(self, issue: dict, fields_to_index=None) -> Document:
+        """
+        Process a single Jira issue into a Document for indexing.
+        Copied and adapted from AlitaJiraLoader logic.
+        """
+        try:
+            # Build content starting with summary
+            content = f"{issue['fields']['summary']}\n"
+            # Add description if present
+            description = issue['fields'].get('description', '')
+            if description:
+                content += f"{description}\n"
+            else:
+                # If no description, still create document but with minimal content
+                logger.debug(f"Issue {issue.get('key', 'unknown')} has no description")
+            # Add comments if present
+            if 'comment' in issue['fields'] and issue['fields']['comment'].get('comments'):
+                for comment in issue['fields']['comment']['comments']:
+                    content += f"{comment['body']}\n"
+            # Add additional fields to index
+            if fields_to_index:
+                for field in fields_to_index:
+                    if field in issue['fields'] and issue['fields'][field]:
+                        field_value = issue['fields'][field]
+                        # Convert complex objects to string representation
+                        if isinstance(field_value, dict):
+                            field_value = str(field_value)
+                        elif isinstance(field_value, list):
+                            field_value = ', '.join(str(item) for item in field_value)
+                        content += f"{field_value}\n"
+            # Create metadata
+            metadata = {
+                "id": issue["id"],
+                "issue_key": issue["key"],
+                "source": f"{self.base_url}/browse/{issue['key']}",
+                "author": issue["fields"].get("reporter", {}).get("emailAddress") if issue["fields"].get("reporter") else None,
+                "status": issue["fields"].get("status", {}).get("name") if issue["fields"].get("status") else None,
+                "updated_on": issue["fields"].get("updated"),
+                "created_on": issue["fields"].get("created"),
+                "project": issue["fields"].get("project", {}).get("key") if issue["fields"].get("project") else None,
+                "issuetype": issue["fields"].get("issuetype", {}).get("name") if issue["fields"].get("issuetype") else None,
+                "type": "jira_issue",
+            }
+            return Document(page_content=content, metadata=metadata)
+        except Exception as e:
+            logger.error(f"Error processing issue {issue.get('key', 'unknown')}: {str(e)}")
+            return None
+    def _index_tool_params(self, **kwargs) -> dict[str, tuple[type, Field]]:
+        return {
+            'jql': (Optional[str], Field(
+                description="JQL query to filter issues. If not provided, all accessible issues will be indexed. Examples: 'project=PROJ', 'parentEpic=EPIC-123', 'status=Open'",
+                default=None)),
+            'fields_to_extract': (Optional[List[str]],
+                                  Field(description="Additional fields to extract from issues", default=None)),
+            'fields_to_index': (Optional[List[str]],
+                                Field(description="Additional fields to include in indexed content", default=None)),
+            'include_attachments': (Optional[bool],
+                                    Field(description="Whether to include attachment content in indexing",
+                                          default=False)),
+            'max_total_issues': (Optional[int], Field(description="Maximum number of issues to index", default=1000)),
+            'skip_attachment_extensions': (Optional[str], Field(
+                description="Comma-separated list of file extensions to skip when processing attachments",
+                default=None)),
+        }
+    # def index_data(self,
+    #                jql: Optional[str] = None,
+    #                fields_to_extract: Optional[List[str]] = None,
+    #                fields_to_index: Optional[List[str]] = None,
+    #                include_attachments: Optional[bool] = False,
+    #                max_total_issues: Optional[int] = 1000,
+    #                skip_attachment_extensions: Optional[List[str]] = None,
+    #                collection_suffix: str = "",
+    #                progress_step: Optional[int] = None,
+    #                clean_index: Optional[bool] = False):
+    #     """
+    #     Index Jira issues into the vector store.
+    #
+    #     Args:
+    #         jql: JQL query to filter issues. If not provided, all accessible issues will be indexed
+    #         fields_to_extract: Additional fields to extract from issues
+    #         fields_to_index: Additional fields to include in indexed content
+    #         include_attachments: Whether to include attachment content in indexing
+    #         max_total_issues: Maximum number of issues to index
+    #         skip_attachment_extensions: Comma-separated list of file extensions to skip when processing attachments
+    #         collection_suffix: Optional suffix for collection name (max 7 characters)
+    #         progress_step: Optional step size for progress reporting during indexing
+    #         clean_index: Optional flag to enforce clean existing index before indexing new data
+    #
+    #     Returns:
+    #         Result message from the vector store indexing operation
+    #     """
+    #     try:
+    #         # Validate that at least one filter is provided
+    #         if not any([jql]):
+    #             raise ToolException("Must provide at least one of: jql to filter issues for indexing")
+    #
+    #         # set extensions to skip for post-processing
+    #         self._skipped_attachment_extensions = skip_attachment_extensions if skip_attachment_extensions else []
+    #
+    #         # Get embeddings
+    #         from ...runtime.langchain.interfaces.llm_processor import get_embeddings
+    #         embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
+    #
+    #         # Initialize vector store
+    #         vs = self._init_vector_store(collection_suffix, embeddings=embedding)
+    #
+    #         # Prepare parameters for the loader
+    #         loader_params = {
+    #             'jql': jql,
+    #             'fields_to_extract': fields_to_extract,
+    #             'fields_to_index': fields_to_index,
+    #             'include_attachments': include_attachments,
+    #             'max_total_issues': max_total_issues,
+    #             'skip_attachment_extensions': skip_attachment_extensions,
+    #         }
+    #
+    #         # Load documents using _base_loader
+    #         docs = self._base_loader(**loader_params)
+    #
+    #         if not docs:
+    #             return "No Jira issues found matching the specified criteria."
+    #
+    #         docs = list(docs)  # Convert generator to list for logging and indexing
+    #         logger.info(f"Loaded {len(docs)} Jira issues for indexing")
+    #
+    #         # Index the documents
+    #         result = vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
+    #
+    #         return f"Successfully indexed {len(docs)} Jira issues. {result}"
+    #
+    #     except Exception as e:
+    #         logger.error(f"Error indexing Jira issues: {str(e)}")
+    #         raise ToolException(f"Error indexing Jira issues: {str(e)}")
+    @extend_with_vector_tools
     def get_available_tools(self):
         return [
             {

alita_sdk/tools/sharepoint/__init__.py CHANGED Viewed

@@ -15,7 +15,13 @@ def get_tools(tool):
         client_id=tool['settings'].get('client_id', None),
         client_secret=tool['settings'].get('client_secret', None),
         toolkit_name=tool.get('toolkit_name'),
-        llm=tool['settings'].get('llm'))
+        llm=tool['settings'].get('llm'),
+        # indexer settings
+        connection_string=tool['settings'].get('connection_string', None),
+        collection_name=f"{tool.get('toolkit_name')}_{str(tool['id'])}",
+        embedding_model="HuggingFaceEmbeddings",
+        embedding_model_params={"model_name": "sentence-transformers/all-MiniLM-L6-v2"},
+        vectorstore_type="PGVector")
             .get_tools())
@@ -33,6 +39,10 @@ class SharepointToolkit(BaseToolkit):
             client_id=(str, Field(description="Client ID")),
             client_secret=(SecretStr, Field(description="Client Secret", json_schema_extra={'secret': True})),
             selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
+            # indexer settings
+            connection_string = (Optional[SecretStr], Field(description="Connection string for vectorstore",
+                                                            default=None,
+                                                            json_schema_extra={'secret': True})),
             __config__=ConfigDict(json_schema_extra={
                 'metadata': {
                     "label": "Sharepoint", "icon_url": "sharepoint.svg",

alita_sdk/tools/sharepoint/api_wrapper.py CHANGED Viewed

@@ -1,17 +1,15 @@
 import json
 import logging
-from typing import Optional, List, Dict, Any, Generator
+from typing import Optional, List, Generator
-from ..chunkers import markdown_chunker
-from ..utils.content_parser import parse_file_content
+from langchain_core.documents import Document
 from langchain_core.tools import ToolException
 from office365.runtime.auth.client_credential import ClientCredential
 from office365.sharepoint.client_context import ClientContext
 from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
-from ..elitea_base import BaseToolApiWrapper, BaseIndexParams, BaseVectorStoreToolApiWrapper
-from ...runtime.langchain.interfaces.llm_processor import get_embeddings
-from langchain_core.documents import Document
+from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
+from ..utils.content_parser import parse_file_content
 NoInput = create_model(
     "NoInput"
@@ -39,15 +37,6 @@ ReadDocument = create_model(
                         default=None))
 )
-indexData = create_model(
-    "indexData",
-    __base__=BaseIndexParams,
-    progress_step=(Optional[int], Field(default=None, ge=0, le=100,
-                         description="Optional step size for progress reporting during indexing")),
-    clean_index=(Optional[bool], Field(default=False,
-                       description="Optional flag to enforce clean existing index before indexing new data")),
-)
 class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
     site_url: str
@@ -56,13 +45,6 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
     token: SecretStr = None
     _client: Optional[ClientContext] = PrivateAttr()  # Private attribute for the office365 client
-    llm: Any = None
-    connection_string: Optional[SecretStr] = None
-    collection_name: Optional[str] = None
-    embedding_model: Optional[str] = "HuggingFaceEmbeddings"
-    embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
-    vectorstore_type: Optional[str] = "PGVector"
     @model_validator(mode='before')
     @classmethod
     def validate_toolkit(cls, values):
@@ -143,7 +125,11 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
             logging.error(f"Failed to load files from sharepoint: {e}")
             return ToolException("Can not get files. Please, double check folder name and read permissions.")
-    def read_file(self, path, is_capture_image: bool = False, page_number: int = None, sheet_name: str=None):
+    def read_file(self, path,
+                  is_capture_image: bool = False,
+                  page_number: int = None,
+                  sheet_name: str = None,
+                  excel_by_sheets: bool = False):
         """ Reads file located at the specified server-relative path. """
         try:
             file = self._client.web.get_file_by_server_relative_path(path)
@@ -159,9 +145,10 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
                                   is_capture_image=is_capture_image,
                                   page_number=page_number,
                                   sheet_name=sheet_name,
+                                  excel_by_sheets=excel_by_sheets,
                                   llm=self.llm)
-    def _base_loader(self) -> List[Document]:
+    def _base_loader(self, **kwargs) -> List[Document]:
         try:
             all_files = self.get_files_list()
         except Exception as e:
@@ -170,35 +157,24 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
         docs: List[Document] = []
         for file in all_files:
             metadata = {
-                ("updated_at" if k == "Modified" else k): str(v)
+                ("updated_on" if k == "Modified" else k): str(v)
                 for k, v in file.items()
             }
             docs.append(Document(page_content="", metadata=metadata))
         return docs
-    def index_data(self,
-                   collection_suffix: str = '',
-                   progress_step: int = None,
-                   clean_index: bool = False):
-        docs = self._base_loader()
-        embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
-        vs = self._init_vector_store(collection_suffix, embeddings=embedding)
-        return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
     def _process_document(self, document: Document) -> Generator[Document, None, None]:
-        config = {
-            "max_tokens": self.llm.model_config.get('max_tokens', 512),
-            "token_overlap": self.llm.model_config.get('token_overlap',
-                                                       int(self.llm.model_config.get('max_tokens', 512) * 0.05))
-        }
-        chunks = markdown_chunker(file_content_generator=self._generate_file_content(document), config=config)
-        yield from chunks
-    def _generate_file_content(self, document: Document) -> Generator[Document, None, None]:
-        page_content = self.read_file(document.metadata['Path'], is_capture_image=True)
-        document.page_content = json.dumps(str(page_content))
-        yield document
+        page_content = self.read_file(document.metadata['Path'], is_capture_image=True, excel_by_sheets=True)
+        if isinstance(page_content, dict):
+            for key, value in page_content.items():
+                metadata = document.metadata
+                metadata['page'] = key
+                yield Document(page_content=str(value), metadata=metadata)
+        else:
+            document.page_content = json.dumps(str(page_content))
+            yield document
+    @extend_with_vector_tools
     def get_available_tools(self):
         return [
             {
@@ -218,11 +194,5 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
                 "description": self.read_file.__doc__,
                 "args_schema": ReadDocument,
                 "ref": self.read_file
-            },
-            {
-                "name": "index_data",
-                "ref": self.index_data,
-                "description": self.index_data.__doc__,
-                "args_schema": indexData,
             }
         ]

alita_sdk/tools/testrail/__init__.py CHANGED Viewed

@@ -51,6 +51,10 @@ class TestrailToolkit(BaseToolkit):
             ),
             email=(str, Field(description="User's email", json_schema_extra={'configuration': True})),
             password=(SecretStr, Field(description="User's password", json_schema_extra={'secret': True, 'configuration': True})),
+            # indexer settings
+            connection_string=(Optional[SecretStr], Field(description="Connection string for vectorstore",
+                                                          default=None,
+                                                          json_schema_extra={'secret': True})),
             selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
             __config__=ConfigDict(json_schema_extra={'metadata':
                                                          {"label": "Testrail", "icon_url": "testrail-icon.svg",

alita_sdk/tools/testrail/api_wrapper.py CHANGED Viewed

@@ -4,10 +4,11 @@ from typing import Dict, List, Optional, Union, Any, Generator
 import pandas as pd
 from langchain_core.tools import ToolException
+from openai import BadRequestError
 from pydantic import SecretStr, create_model, model_validator
 from pydantic.fields import Field, PrivateAttr
 from testrail_api import StatusCodeError, TestRailAPI
-from ..elitea_base import BaseVectorStoreToolApiWrapper, BaseIndexParams
+from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
 from langchain_core.documents import Document
 from ...runtime.utils.utils import IndexerKeywords
@@ -289,20 +290,6 @@ updateCase = create_model(
     ),
 )
-# Schema for indexing TestRail data into vector store
-indexData = create_model(
-    "indexData",
-    __base__=BaseIndexParams,
-    project_id=(str, Field(description="TestRail project ID to index data from")),
-    suite_id=(Optional[str], Field(default=None, description="Optional TestRail suite ID to filter test cases")),
-    section_id=(Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
-    title_keyword=(Optional[str], Field(default=None, description="Optional keyword to filter test cases by title")),
-    progress_step=(Optional[int],
-                   Field(default=None, ge=0, le=100, description="Optional step size for progress reporting during indexing")),
-    clean_index=(Optional[bool],
-                       Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
-)
 SUPPORTED_KEYS = {
     "id", "title", "section_id", "template_id", "type_id", "priority_id", "milestone_id",
     "refs", "created_by", "created_on", "updated_by", "updated_on", "estimate",
@@ -317,14 +304,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
     password: Optional[SecretStr] = None,
     email: Optional[str] = None,
     _client: Optional[TestRailAPI] = PrivateAttr() # Private attribute for the TestRail client
-    llm: Any = None
-    connection_string: Optional[SecretStr] = None
-    collection_name: Optional[str] = None
-    embedding_model: Optional[str] = "HuggingFaceEmbeddings"
-    embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
-    vectorstore_type: Optional[str] = "PGVector"
     @model_validator(mode="before")
     @classmethod
@@ -490,7 +469,8 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
                 project_id=project_id, **params
             )
-            cases = extracted_cases.get("cases")
+            # support old versions of testrail_api
+            cases = extracted_cases.get("cases") if isinstance(extracted_cases, dict) else extracted_cases
             if cases is None:
                 return ToolException("No test cases found in the extracted data.")
@@ -554,7 +534,8 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
     def _base_loader(self, project_id: str,
                      suite_id: Optional[str] = None,
                      section_id: Optional[int] = None,
-                     title_keyword: Optional[str] = None
+                     title_keyword: Optional[str] = None,
+                     **kwargs: Any
                      ) -> Generator[Document, None, None]:
         try:
             if suite_id:
@@ -577,7 +558,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
                 'title': case.get('title', ''),
                 'suite_id': suite_id or case.get('suite_id', ''),
                 'id': str(case.get('id', '')),
-                'updated_on': case.get('updated_on') or -1,
+                IndexerKeywords.UPDATED_ON.value: case.get('updated_on') or -1,
                 'labels': [lbl['title'] for lbl in case.get('labels', [])],
                 'type': case.get('type_id') or -1,
                 'priority': case.get('priority_id') or -1,
@@ -588,22 +569,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
                 'entity_type': 'test_case',
             })
-    def index_data(
-            self,
-            project_id: str,
-            suite_id: Optional[str] = None,
-            collection_suffix: str = "",
-            section_id: Optional[int] = None,
-            title_keyword: Optional[str] = None,
-            progress_step: Optional[int] = None,
-            clean_index: Optional[bool] = False
-    ):
-        """Load TestRail test cases into the vector store."""
-        docs = self._base_loader(project_id, suite_id, section_id, title_keyword)
-        embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
-        vs = self._init_vector_store(collection_suffix, embeddings=embedding)
-        return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
     def _process_document(self, document: Document) -> Generator[Document, None, None]:
         """
         Process an existing base document to extract relevant metadata for full document preparation.
@@ -626,16 +591,15 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
             # process each attachment to extract its content
             for attachment in attachments:
-                attachment_id = attachment['id']
+                attachment_id = f"attach_{attachment['id']}"
                 # add attachment id to metadata of parent
                 document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
                 # TODO: pass it to chunkers
                 yield Document(page_content=self._process_attachment(attachment),
                                                      metadata={
                                                          'project_id': base_data.get('project_id', ''),
-                                                         IndexerKeywords.PARENT.value: case_id,
-                                                         'id': attachment_id,
+                                                         'id': str(attachment_id),
+                                                         IndexerKeywords.PARENT.value: str(case_id),
                                                          'filename': attachment['filename'],
                                                          'filetype': attachment['filetype'],
                                                          'created_on': attachment['created_on'],
@@ -663,10 +627,20 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
             try:
                 attachment_path = self._client.attachments.get_attachment(attachment_id=attachment['id'], path=f"./{attachment['filename']}")
                 page_content = parse_file_content(file_name=attachment['filename'], file_content=attachment_path.read_bytes(), llm=self.llm, is_capture_image=True)
+            except BadRequestError as ai_e:
+                logger.error(f"Unable to parse page's content with type: {attachment['filetype']} due to AI service issues: {ai_e}")
             except Exception as e:
                 logger.error(f"Unable to parse page's content with type: {attachment['filetype']}: {e}")
         return page_content
+    def _index_tool_params(self):
+        return {
+            'project_id': (str, Field(description="TestRail project ID to index data from")),
+            'suite_id': (Optional[str],
+                         Field(default=None, description="Optional TestRail suite ID to filter test cases")),
+            'section_id': (Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
+        }
     def _to_markup(self, data: List[Dict], output_format: str) -> str:
         """
         Converts the given data into the specified format: 'json', 'csv', or 'markdown'.
@@ -694,6 +668,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
         if output_format == "markdown":
             return df.to_markdown(index=False)
+    @extend_with_vector_tools
     def get_available_tools(self):
         tools = [
             {
@@ -731,14 +706,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
                 "ref": self.update_case,
                 "description": self.update_case.__doc__,
                 "args_schema": updateCase,
-            },
-            {
-                "name": "index_data",
-                "ref": self.index_data,
-                "description": self.index_data.__doc__,
-                "args_schema": indexData,
             }
         ]
-        # Add vector search from base
-        tools.extend(self._get_vector_search_tools())
         return tools

alita-sdk 0.3.209__py3-none-any.whl → 0.3.210__py3-none-any.whl

alita-sdk 0.3.209py3-none-any.whl → 0.3.210py3-none-any.whl