PyPI - alita-sdk - Versions diffs - 0.3.209__py3-none-any.whl → 0.3.210__py3-none-any.whl - Mend

alita-sdk 0.3.209py3-none-any.whl → 0.3.210py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

alita_sdk/runtime/clients/artifact.py +18 -4
alita_sdk/runtime/langchain/document_loaders/AlitaCSVLoader.py +2 -1
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +3 -3
alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +8 -4
alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
alita_sdk/runtime/langchain/langraph_agent.py +1 -1
alita_sdk/runtime/toolkits/artifact.py +7 -3
alita_sdk/runtime/toolkits/tools.py +8 -1
alita_sdk/runtime/tools/application.py +2 -0
alita_sdk/runtime/tools/artifact.py +65 -8
alita_sdk/runtime/tools/vectorstore.py +125 -41
alita_sdk/runtime/utils/utils.py +3 -0
alita_sdk/tools/ado/__init__.py +8 -0
alita_sdk/tools/ado/repos/repos_wrapper.py +37 -0
alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +0 -7
alita_sdk/tools/ado/work_item/__init__.py +4 -0
alita_sdk/tools/ado/work_item/ado_wrapper.py +37 -4
alita_sdk/tools/aws/delta_lake/__init__.py +1 -1
alita_sdk/tools/bitbucket/__init__.py +13 -1
alita_sdk/tools/bitbucket/api_wrapper.py +31 -4
alita_sdk/tools/bitbucket/cloud_api_wrapper.py +31 -0
alita_sdk/tools/chunkers/code/codeparser.py +18 -10
alita_sdk/tools/confluence/api_wrapper.py +35 -134
alita_sdk/tools/confluence/loader.py +30 -28
alita_sdk/tools/elitea_base.py +112 -11
alita_sdk/tools/figma/__init__.py +13 -1
alita_sdk/tools/figma/api_wrapper.py +47 -3
alita_sdk/tools/github/api_wrapper.py +8 -0
alita_sdk/tools/github/github_client.py +18 -0
alita_sdk/tools/gitlab/__init__.py +4 -0
alita_sdk/tools/gitlab/api_wrapper.py +10 -0
alita_sdk/tools/google/bigquery/__init__.py +1 -1
alita_sdk/tools/jira/__init__.py +21 -13
alita_sdk/tools/jira/api_wrapper.py +285 -5
alita_sdk/tools/sharepoint/__init__.py +11 -1
alita_sdk/tools/sharepoint/api_wrapper.py +23 -53
alita_sdk/tools/testrail/__init__.py +4 -0
alita_sdk/tools/testrail/api_wrapper.py +21 -54
alita_sdk/tools/utils/content_parser.py +72 -8
alita_sdk/tools/xray/__init__.py +8 -1
alita_sdk/tools/xray/api_wrapper.py +505 -14
alita_sdk/tools/zephyr_scale/api_wrapper.py +5 -5
{alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/METADATA +1 -1
{alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/RECORD +47 -47
{alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/top_level.txt +0 -0

alita_sdk/tools/ado/work_item/ado_wrapper.py CHANGED Viewed

@@ -1,19 +1,24 @@
 import json
 import logging
 import urllib.parse
-from typing import Optional, Dict, List
+from typing import Dict, List, Generator, Optional
+from alita_sdk.tools.elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
 from azure.devops.connection import Connection
 from azure.devops.v7_1.core import CoreClient
 from azure.devops.v7_1.wiki import WikiClient
 from azure.devops.v7_1.work_item_tracking import TeamContext, Wiql, WorkItemTrackingClient
+from langchain_core.documents import Document
 from langchain_core.tools import ToolException
 from msrest.authentication import BasicAuthentication
 from pydantic import create_model, PrivateAttr, SecretStr
 from pydantic import model_validator
 from pydantic.fields import Field
-from ...elitea_base import BaseToolApiWrapper
+try:
+    from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
+except ImportError:
+    from alita_sdk.langchain.interfaces.llm_processor import get_embeddings
 logger = logging.getLogger(__name__)
@@ -89,8 +94,7 @@ ADOUnlinkWorkItemsFromWikiPage = create_model(
     page_name=(str, Field(description="Wiki page path to unlink the work items from", examples=["/TargetPage"]))
 )
-class AzureDevOpsApiWrapper(BaseToolApiWrapper):
+class AzureDevOpsApiWrapper(BaseVectorStoreToolApiWrapper):
     organization_url: str
     project: str
     token: SecretStr
@@ -504,6 +508,35 @@ class AzureDevOpsApiWrapper(BaseToolApiWrapper):
             logger.error(f"Error unlinking work items from wiki page '{page_name}': {str(e)}")
             return ToolException(f"An unexpected error occurred while unlinking work items from wiki page '{page_name}': {str(e)}")
+    def _base_loader(self, wiql: str, **kwargs) -> Generator[Document, None, None]:
+        ref_items = self._client.query_by_wiql(Wiql(query=wiql)).work_items
+        for ref in ref_items:
+            wi = self._client.get_work_item(id=ref.id, project=self.project, expand='all')
+            yield Document(page_content=json.dumps(wi.fields), metadata={
+                'id': str(wi.id),
+                'type': wi.fields.get('System.WorkItemType', ''),
+                'title': wi.fields.get('System.Title', ''),
+                'state': wi.fields.get('System.State', ''),
+                'area': wi.fields.get('System.AreaPath', ''),
+                'reason': wi.fields.get('System.Reason', ''),
+                'iteration': wi.fields.get('System.IterationPath', ''),
+                'updated_on': wi.fields.get('System.ChangedDate', ''),
+                'attachment_ids': [rel.url.split('/')[-1] for rel in wi.relations or [] if rel.rel == 'AttachedFile']
+            })
+    def _process_document(self, document: Document) -> Generator[Document, None, None]:
+        for attachment_id in document.metadata.get('attachment_ids', []):
+            content_generator = self._client.get_attachment_content(id=attachment_id, download=True)
+            content = ''.join(str(item) for item in content_generator)
+            yield Document(page_content=content, metadata={'id': attachment_id})
+    def _index_tool_params(self):
+        """Return the parameters for indexing data."""
+        return {
+            "wiql": (str, Field(description="WIQL (Work Item Query Language) query string to select and filter Azure DevOps work items."))
+        }
+    @extend_with_vector_tools
     def get_available_tools(self):
         """Return a list of available tools."""
         return [

alita_sdk/tools/aws/delta_lake/__init__.py CHANGED Viewed

@@ -57,7 +57,7 @@ class DeltaLakeToolkitConfig(BaseModel):
     aws_secret_access_key: Optional[SecretStr] = Field(default=None, description="AWS secret access key", json_schema_extra={"secret": True, "configuration": True})
     aws_session_token: Optional[SecretStr] = Field(default=None, description="AWS session token (optional)", json_schema_extra={"secret": True, "configuration": True})
     aws_region: Optional[str] = Field(default=None, description="AWS region for Delta Lake storage", json_schema_extra={"configuration": True})
-    s3_path: Optional[str] = Field(default=None, description="S3 path to Delta Lake data (e.g., s3://bucket/path)", json_schema_extra={"configuration": True})
+    s3_path: Optional[str] = Field(default=None, description="S3 path to Delta Lake data (e.g., s3://bucket/path)", json_schema_extra={"configuration": True, "configuration_title": True})
     table_path: Optional[str] = Field(default=None, description="Delta Lake table path (if not using s3_path)", json_schema_extra={"configuration": True})
     selected_tools: List[str] = Field(default=[], description="Selected tools", json_schema_extra={"args_schemas": get_available_tools()})

alita_sdk/tools/bitbucket/__init__.py CHANGED Viewed

@@ -24,7 +24,15 @@ def get_tools(tool):
         password=tool['settings']['password'],
         branch=tool['settings']['branch'],
         cloud=tool['settings'].get('cloud'),
-        toolkit_name=tool.get('toolkit_name'),
+        llm=tool['settings'].get('llm', None),
+        alita=tool['settings'].get('alita', None),
+        connection_string=tool['settings'].get('connection_string', None),
+        collection_name=str(tool['id']),
+        doctype='code',
+        embedding_model="HuggingFaceEmbeddings",
+        embedding_model_params={"model_name": "sentence-transformers/all-MiniLM-L6-v2"},
+        vectorstore_type="PGVector",
+        toolkit_name=tool.get('toolkit_name')
     ).get_tools()
@@ -48,6 +56,10 @@ class AlitaBitbucketToolkit(BaseToolkit):
             username=(str, Field(description="Username", json_schema_extra={'configuration': True})),
             password=(SecretStr, Field(description="GitLab private token", json_schema_extra={'secret': True, 'configuration': True})),
             cloud=(Optional[bool], Field(description="Hosting Option", default=None)),
+            # indexer settings
+            connection_string=(Optional[SecretStr], Field(description="Connection string for vectorstore",
+                                                          default=None,
+                                                          json_schema_extra={'secret': True})),
             selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
             __config__=ConfigDict(json_schema_extra=
             {

alita_sdk/tools/bitbucket/api_wrapper.py CHANGED Viewed

@@ -41,6 +41,18 @@ class BitbucketAPIWrapper(BaseCodeToolApiWrapper):
     """Bitbucket installation type: true for cloud, false for server.
     """
+    llm: Optional[Any] = None
+    # Alita instance
+    alita: Optional[Any] = None
+    # Vector store configuration
+    connection_string: Optional[SecretStr] = None
+    collection_name: Optional[str] = None
+    doctype: Optional[str] = 'code'
+    embedding_model: Optional[str] = "HuggingFaceEmbeddings"
+    embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
+    vectorstore_type: Optional[str] = "PGVector"
     @model_validator(mode='before')
     @classmethod
     def validate_env(cls, values: Dict) -> Dict:
@@ -59,7 +71,7 @@ class BitbucketAPIWrapper(BaseCodeToolApiWrapper):
             password=values['password'],
             workspace=values['project'],
             repository=values['repository']
-        ) if values['cloud'] else BitbucketServerApi(
+        ) if values.get('cloud') else BitbucketServerApi(
             url=values['url'],
             username=values['username'],
             password=values['password'],
@@ -213,16 +225,31 @@ class BitbucketAPIWrapper(BaseCodeToolApiWrapper):
         except Exception as e:
             return ToolException(f"Can't add comment to pull request `{pr_id}` due to error:\n{str(e)}")
-    def _get_files(self, file_path: str, branch: str) -> str:
+    def _get_files(self, path: str, branch: str) -> str:
         """
         Get files from the bitbucket repo
         Parameters:
-            file_path(str): the file path
+            path(str): the file path
             branch(str): branch name (by default: active_branch)
         Returns:
             str: List of the files
         """
-        return str(self._bitbucket.get_files_list(file_path=file_path if file_path else '', branch=branch if branch else self._active_branch))
+        return str(self._bitbucket.get_files_list(file_path=path if path else '', branch=branch if branch else self._active_branch))
+    # TODO: review this method, it may not work as expected
+    # def _file_commit_hash(self, file_path: str, branch: str):
+    #     """
+    #     Get the commit hash of a file in the gitlab repo
+    #     Parameters:
+    #         file_path(str): the file path
+    #         branch(str): branch name (by default: active_branch)
+    #     Returns:
+    #         str: The commit hash of the file
+    #     """
+    #     try:
+    #         return self._bitbucket.get_file_commit_hash(file_path=file_path, branch=branch)
+    #     except Exception as e:
+    #         raise ToolException(f"Can't extract file commit hash (`{file_path}`) due to error:\n{str(e)}")
     def _read_file(self, file_path: str, branch: str) -> str:
         """

alita_sdk/tools/bitbucket/cloud_api_wrapper.py CHANGED Viewed

@@ -104,6 +104,22 @@ class BitbucketServerApi(BitbucketApiAbstract):
                                                    data=json.loads(pr_json_data)
                                                    )
+    # TODO: review this method, it may not work as expected
+    def get_file_commit_hash(self, file_path: str, branch: str):
+        """
+        Get the commit hash of a file in a specific branch.
+        Parameters:
+            file_path (str): The path to the file.
+            branch (str): The branch name.
+        Returns:
+            str: The commit hash of the file.
+        """
+        commits = self.api_client.get_commits(project_key=self.project, repository_slug=self.repository,
+                                              filename=file_path, at=branch, limit=1)
+        if commits:
+            return commits[0]['id']
+        return None
     def get_file(self, file_path: str, branch: str) -> str:
         return self.api_client.get_content_of_file(project_key=self.project, repository_slug=self.repository, at=branch,
                                                    filename=file_path).decode('utf-8')
@@ -262,6 +278,21 @@ class BitbucketCloudApi(BitbucketApiAbstract):
         response = self.repository.pullrequests.post(None, data=json.loads(pr_json_data))
         return response['links']['self']['href']
+    # TODO: review this method, it may not work as expected
+    def get_file_commit_hash(self, file_path: str, branch: str):
+        """
+        Get the commit hash of a file in a specific branch.
+        Parameters:
+            file_path (str): The path to the file.
+            branch (str): The branch name.
+        Returns:
+            str: The commit hash of the file.
+        """
+        commits = self.repository.commits.get(path=file_path, branch=branch, pagelen=1)
+        if commits['values']:
+            return commits['values'][0]['hash']
+        return None
     def get_file(self, file_path: str, branch: str) -> str:
         return self.repository.get(path=f'src/{branch}/{file_path}')

alita_sdk/tools/chunkers/code/codeparser.py CHANGED Viewed

@@ -39,13 +39,17 @@ def parse_code_files_for_db(file_content_generator: Generator[str, None, None],
         if programming_language == Language.UNKNOWN:
             documents = TokenTextSplitter(encoding_name="gpt2", chunk_size=256, chunk_overlap=30).split_text(file_content)
             for document in documents:
+                metadata = {
+                    "filename": file_name,
+                    "method_name": node.name,
+                    "language": programming_language.value,
+                }
+                commit_hash = data.get("commit_hash")
+                if commit_hash is not None:
+                    metadata["commit_hash"] = commit_hash
                 document = Document(
                     page_content=document,
-                    metadata={
-                        "filename": file_name,
-                        "method_name": 'text',
-                        "language": programming_language.value,
-                    },
+                    metadata=metadata,
                 )
                 yield document
         else:
@@ -73,13 +77,17 @@ def parse_code_files_for_db(file_content_generator: Generator[str, None, None],
                         splitted_documents = code_splitter.split_text(method_source_code)
                     for splitted_document in splitted_documents:
+                        metadata = {
+                            "filename": file_name,
+                            "method_name": node.name,
+                            "language": programming_language.value,
+                        }
+                        commit_hash = data.get("commit_hash")
+                        if commit_hash is not None:
+                            metadata["commit_hash"] = commit_hash
                         document = Document(
                             page_content=splitted_document,
-                            metadata={
-                                "filename": file_name,
-                                "method_name": node.name,
-                                "language": programming_language.value,
-                            },
+                            metadata=metadata,
                         )
                         yield document
             except Exception as e:

alita_sdk/tools/confluence/api_wrapper.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import shortuuid
 import re
 import logging
 import requests
@@ -17,7 +16,7 @@ from langchain_core.messages import HumanMessage
 from markdownify import markdownify
 from langchain_community.document_loaders.confluence import ContentFormat
-from ..elitea_base import BaseVectorStoreToolApiWrapper, BaseIndexParams
+from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
 from ..llm.img_utils import ImageDescriptionCache
 from ..utils import is_cookie_token, parse_cookie_string
@@ -141,50 +140,6 @@ pageId = create_model(
                                  description="Optional JSON of parameters to be sent in request body or query params. MUST be string with valid JSON. For search/read operations, you MUST always get minimum fields and set max results, until users ask explicitly for more fields. For search/read operations you must generate CQL query string and pass it as params."))
 )
-# loaderParams = create_model(
-#     "LoaderParams",
-#     content_format=(str, Field(description="The format of the content to be retrieved.")),
-#     page_ids=(Optional[List[str]], Field(description="List of page IDs to retrieve.", default=None)),
-#     label=(Optional[str], Field(description="Label to filter pages.", default=None)),
-#     cql=(Optional[str], Field(description="CQL query to filter pages.", default=None)),
-#     include_restricted_content=(Optional[bool], Field(description="Include restricted content.", default=False)),
-#     include_archived_content=(Optional[bool], Field(description="Include archived content.", default=False)),
-#     include_attachments=(Optional[bool], Field(description="Include attachments.", default=False)),
-#     include_comments=(Optional[bool], Field(description="Include comments.", default=False)),
-#     include_labels=(Optional[bool], Field(description="Include labels.", default=False)),
-#     limit=(Optional[int], Field(description="Limit the number of results.", default=10)),
-#     max_pages=(Optional[int], Field(description="Maximum number of pages to retrieve.", default=1000)),
-#     ocr_languages=(Optional[str], Field(description="OCR languages for processing attachments.", default=None)),
-#     keep_markdown_format=(Optional[bool], Field(description="Keep the markdown format.", default=True)),
-#     keep_newlines=(Optional[bool], Field(description="Keep newlines in the content.", default=True)),
-#     bins_with_llm=(Optional[bool], Field(description="Use LLM for processing binary files.", default=False)),
-# )
-indexPagesParams = create_model(
-    "indexPagesParams",
-    __base__=BaseIndexParams,
-    content_format=(Literal['view', 'storage', 'export_view', 'editor', 'anonymous'],
-                    Field(description="The format of the content to be retrieved.")),
-    ### Loader Parameters
-    page_ids=(Optional[List[str]], Field(description="List of page IDs to retrieve.", default=None)),
-    label=(Optional[str], Field(description="Label to filter pages.", default=None)),
-    cql=(Optional[str], Field(description="CQL query to filter pages.", default=None)),
-    limit=(Optional[int], Field(description="Limit the number of results.", default=10)),
-    max_pages=(Optional[int], Field(description="Maximum number of pages to retrieve.", default=1000)),
-    include_restricted_content=(Optional[bool], Field(description="Include restricted content.", default=False)),
-    include_archived_content=(Optional[bool], Field(description="Include archived content.", default=False)),
-    include_attachments=(Optional[bool], Field(description="Include attachments.", default=False)),
-    include_comments=(Optional[bool], Field(description="Include comments.", default=False)),
-    include_labels=(Optional[bool], Field(description="Include labels.", default=True)),
-    ocr_languages=(Optional[str], Field(description="OCR languages for processing attachments.", default='eng')),
-    keep_markdown_format=(Optional[bool], Field(description="Keep the markdown format.", default=True)),
-    keep_newlines=(Optional[bool], Field(description="Keep newlines in the content.", default=True)),
-    bins_with_llm=(Optional[bool], Field(description="Use LLM for processing binary files.", default=False)),
-    ### Chunking Parameters
-    chunking_tool=(Literal['markdown', 'statistical', 'proposal'], Field(description="Name of chunking tool", default="markdown")),
-    chunking_config=(Optional[dict], Field(description="Chunking tool configuration", default_factory=dict)),
-)
 GetPageWithImageDescriptions = create_model(
     "GetPageWithImageDescriptionsModel",
     page_id=(str, Field(description="Confluence page ID from which content with images will be extracted")),
@@ -849,7 +804,7 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
             docs.extend(batch)
         return docs[:max_pages]
-    def _loader(self, **kwargs) -> Generator[str, None, None]:
+    def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
         """
         Loads content from Confluence based on parameters.
         Returns:
@@ -858,7 +813,15 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
         from .loader import AlitaConfluenceLoader
         from copy import copy
         content_format = kwargs.get('content_format', 'view').lower()
+        base_params = {
+            'url': self.base_url,
+            'space_key': self.space,
+            'min_retry_seconds': self.min_retry_seconds,
+            'max_retry_seconds': self.max_retry_seconds,
+            'number_of_retries': self.number_of_retries
+        }
         confluence_loader_params = copy(kwargs)
+        confluence_loader_params.update(base_params)
         mapping = {
             'view': ContentFormat.VIEW,
             'storage': ContentFormat.STORAGE,
@@ -878,86 +841,9 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
         for document in loader._lazy_load(kwargs={}):
             yield document
-    def index_data(self, content_format: str,
-                    collection_suffix: str = "",
-                    page_ids: Optional[List[str]] = None,
-                    label: Optional[str] = None,
-                    cql: Optional[str] = None,
-                    include_restricted_content: Optional[bool] = False,
-                    include_archived_content: Optional[bool] = False,
-                    include_attachments: Optional[bool] = False,
-                    include_comments: Optional[bool] = False,
-                    include_labels: Optional[bool] = False,
-                    limit: Optional[int] = 10,
-                    max_pages: Optional[int] = 10,
-                    keep_markdown_format: Optional[bool] = True,
-                    keep_newlines: Optional[bool] = True,
-                    bins_with_llm: bool = False,
-                    chunking_tool: str = "markdown",
-                    chunking_config: Optional[Dict[str, Any]] = None,
-                    **kwargs) -> Generator[str, None, None]:
-        """Load Confluence pages and index them in the vector store."""
-        from alita_sdk.tools.chunkers import __confluence_chunkers__ as chunkers, __confluence_models__ as models
-        try:
-            from alita_sdk.langchain.interfaces.llm_processor import get_embeddings
-        except ImportError:
-            from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
-        loader_params = {
-            'url': self.base_url,
-            'space_key': self.space,
-            'content_format': content_format,
-            'page_ids': page_ids,
-            'label': label,
-            'cql': cql,
-            'include_restricted_content': include_restricted_content,
-            'include_archived_content': include_archived_content,
-            'include_attachments': include_attachments,
-            'include_comments': include_comments,
-            'include_labels': include_labels,
-            'limit': limit,
-            'max_pages': max_pages,
-            'keep_markdown_format': keep_markdown_format,
-            'keep_newlines': keep_newlines,
-            'bins_with_llm': bins_with_llm,
-            'min_retry_seconds': self.min_retry_seconds,
-            'max_retry_seconds': self.max_retry_seconds,
-            'number_of_retries': self.number_of_retries
-        }
-        documents = self._loader(**loader_params)
-        embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
-        chunker = chunkers.get(chunking_tool)
-        chunking_config = chunking_config or {}
-        if chunker:
-            # Validate and prepare chunking configuration using Pydantic models
-            config_model = models.get(chunking_tool)
-            if config_model:
-                # Set required fields that should come from the instance
-                chunking_config['embedding'] = embedding
-                chunking_config['llm'] = self.llm
-                try:
-                    # Validate the configuration using the appropriate Pydantic model
-                    validated_config = config_model(**chunking_config)
-                    chunking_config = validated_config.model_dump()
-                except Exception as e:
-                    logger.error(f"Invalid chunking configuration for {chunking_tool}: {e}")
-                    raise ToolException(f"Invalid chunking configuration: {e}")
-            else:
-                # Fallback for chunkers without models
-                chunking_config['embedding'] = embedding
-                chunking_config['llm'] = self.llm
-            documents = chunker(documents, chunking_config)
-        # passing embedding to avoid re-initialization
-        vectorstore = self._init_vector_store(collection_suffix, embeddings=embedding)
-        return vectorstore.index_documents(documents)
+    def _process_document(self, document: Document) -> Generator[Document, None, None]:
+        for attachment in self.get_page_attachments(document.metadata.get('id')):
+            yield Document(page_content=attachment.get('content', ''), metadata=attachment.get('metadata', {}))
     def _download_image(self, image_url):
         """
@@ -1685,6 +1571,28 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
             logger.error(f"Error retrieving attachments for page {page_id}: {str(e)}")
             return f"Error retrieving attachments: {str(e)}"
+    def _index_tool_params(self):
+        """Return the parameters for indexing data."""
+        return {
+            "content_format": (Literal['view', 'storage', 'export_view', 'editor', 'anonymous'],
+                            Field(description="The format of the content to be retrieved.")),
+            "page_ids": (Optional[List[str]], Field(description="List of page IDs to retrieve.", default=None)),
+            "label": (Optional[str], Field(description="Label to filter pages.", default=None)),
+            "cql": (Optional[str], Field(description="CQL query to filter pages.", default=None)),
+            "limit": (Optional[int], Field(description="Limit the number of results.", default=10)),
+            "max_pages": (Optional[int], Field(description="Maximum number of pages to retrieve.", default=1000)),
+            "include_restricted_content": (Optional[bool], Field(description="Include restricted content.", default=False)),
+            "include_archived_content": (Optional[bool], Field(description="Include archived content.", default=False)),
+            "include_attachments": (Optional[bool], Field(description="Include attachments.", default=False)),
+            "include_comments": (Optional[bool], Field(description="Include comments.", default=False)),
+            "include_labels": (Optional[bool], Field(description="Include labels.", default=True)),
+            "ocr_languages": (Optional[str], Field(description="OCR languages for processing attachments.", default='eng')),
+            "keep_markdown_format": (Optional[bool], Field(description="Keep the markdown format.", default=True)),
+            "keep_newlines": (Optional[bool], Field(description="Keep newlines in the content.", default=True)),
+            "bins_with_llm": (Optional[bool], Field(description="Use LLM for processing binary files.", default=False)),
+        }
+    @extend_with_vector_tools
     def get_available_tools(self):
         # Confluence-specific tools
         confluence_tools = [
@@ -1796,13 +1704,6 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
                 "description": self.get_page_id_by_title.__doc__,
                 "args_schema": getPageIdByTitleInput,
             },
-            # Confluence-specific vector store indexing
-            {
-                "name": "index_data",
-                "ref": self.index_data,
-                "description": self.index_data.__doc__,
-                "args_schema": indexPagesParams,
-            },
             {
                 "name": "get_page_attachments",
                 "ref": self.get_page_attachments,

alita_sdk/tools/confluence/loader.py CHANGED Viewed

@@ -10,8 +10,8 @@ from langchain_community.document_loaders import ConfluenceLoader
 from langchain_community.document_loaders.confluence import ContentFormat
 from langchain_core.messages import HumanMessage
 from pdf2image import convert_from_bytes
-from reportlab.graphics import renderPM
-from svglib.svglib import svg2rlg
+# from reportlab.graphics import renderPM
+# from svglib.svglib import svg2rlg
 from .utils import image_to_byte_array, bytes_to_base64
@@ -125,6 +125,7 @@ class AlitaConfluenceLoader(ConfluenceLoader):
                     text = title + self.process_doc(absolute_url)
                 elif media_type == "application/vnd.ms-excel":
                     text = title + self.process_xls(absolute_url)
+                # TODO review usage
                 # elif media_type == "image/svg+xml":
                 #     text = title + self.process_svg(absolute_url, ocr_languages)
                 else:
@@ -192,29 +193,30 @@ class AlitaConfluenceLoader(ConfluenceLoader):
         else:
             return super().process_image(link, ocr_languages)
-    def process_svg(
-            self,
-            link: str,
-            ocr_languages: Optional[str] = None,
-    ) -> str:
-        if self.bins_with_llm and self.llm:
-            response = self.confluence.request(path=link, absolute=True)
-            text = ""
-            if (
-                    response.status_code != 200
-                    or response.content == b""
-                    or response.content is None
-            ):
-                return text
-            drawing = svg2rlg(BytesIO(response.content))
-            img_data = BytesIO()
-            renderPM.drawToFile(drawing, img_data, fmt="PNG")
-            img_data.seek(0)
-            image = Image.open(img_data)
-            result = self.__perform_llm_prediction_for_image(image)
-            return result
-        else:
-            return super().process_svg(link, ocr_languages)
+    # TODO review usage
+    # def process_svg(
+    #         self,
+    #         link: str,
+    #         ocr_languages: Optional[str] = None,
+    # ) -> str:
+    #     if self.bins_with_llm and self.llm:
+    #         response = self.confluence.request(path=link, absolute=True)
+    #         text = ""
+    #
+    #         if (
+    #                 response.status_code != 200
+    #                 or response.content == b""
+    #                 or response.content is None
+    #         ):
+    #             return text
+    #
+    #         drawing = svg2rlg(BytesIO(response.content))
+    #
+    #         img_data = BytesIO()
+    #         renderPM.drawToFile(drawing, img_data, fmt="PNG")
+    #         img_data.seek(0)
+    #         image = Image.open(img_data)
+    #         result = self.__perform_llm_prediction_for_image(image)
+    #         return result
+    #     else:
+    #         return super().process_svg(link, ocr_languages)

alita-sdk 0.3.209__py3-none-any.whl → 0.3.210__py3-none-any.whl

alita-sdk 0.3.209py3-none-any.whl → 0.3.210py3-none-any.whl