PyPI - alita-sdk - Versions diffs - 0.3.369__py3-none-any.whl → 0.3.370__py3-none-any.whl - Mend

alita-sdk 0.3.369py3-none-any.whl → 0.3.370py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (7) hide show

alita_sdk/runtime/tools/vectorstore_base.py CHANGED Viewed

@@ -6,6 +6,8 @@ from typing import Any, Optional, List, Dict, Generator
 from langchain_core.documents import Document
 from langchain_core.messages import HumanMessage
+from langchain_core.tools import ToolException
+from psycopg.errors import DataException
 from pydantic import BaseModel, model_validator, Field
 from alita_sdk.tools.elitea_base import BaseToolApiWrapper
@@ -316,6 +318,15 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
                     if doc_id not in unique_docs or score > chunk_type_scores.get(doc_id, 0):
                         unique_docs[doc_id] = doc
                         chunk_type_scores[doc_id] = score
+            except DataException as dimException:
+                exception_str = str(dimException)
+                if 'different vector dimensions' in exception_str:
+                    logger.error(f"Data exception: {exception_str}")
+                    raise ToolException(f"Global search cannot be completed since collections were indexed using "
+                                        f"different embedding models. Use search within a single collection."
+                                        f"\nDetails: {exception_str}")
+                raise ToolException(f"Data exception during search. Possibly invalid filter: {exception_str}")
             except Exception as e:
                 logger.warning(f"Error searching for document chunks: {str(e)}")

alita_sdk/tools/confluence/api_wrapper.py CHANGED Viewed

@@ -815,6 +815,10 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
         from .loader import AlitaConfluenceLoader
         from copy import copy
         content_format = kwargs.get('content_format', 'view').lower()
+        self._index_include_attachments = kwargs.get('include_attachments', False)
+        self._include_extensions = kwargs.get('include_extensions', [])
+        self._skip_extensions = kwargs.get('skip_extensions', [])
         base_params = {
             'url': self.base_url,
             'space_key': self.space,
@@ -847,65 +851,79 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
     def _process_document(self, document: Document) -> Generator[Document, None, None]:
         try:
-            page_id = document.metadata.get('id')
-            attachments = self.client.get_attachments_from_content(page_id)
-            if not attachments or not attachments.get('results'):
-                return f"No attachments found for page ID {page_id}."
-            # Get attachment history for created/updated info
-            history_map = {}
-            for attachment in attachments['results']:
-                try:
-                    hist = self.client.history(attachment['id'])
-                    history_map[attachment['id']] = hist
-                except Exception as e:
-                    logger.warning(f"Failed to fetch history for attachment {attachment.get('title', '')}: {str(e)}")
-                    history_map[attachment['id']] = None
-            import re
-            for attachment in attachments['results']:
-                title = attachment.get('title', '')
-                file_ext = title.lower().split('.')[-1] if '.' in title else ''
-                media_type = attachment.get('metadata', {}).get('mediaType', '')
-                # Core metadata extraction with history
-                hist = history_map.get(attachment['id']) or {}
-                created_by = hist.get('createdBy', {}).get('displayName', '') if hist else attachment.get('creator', {}).get('displayName', '')
-                created_date = hist.get('createdDate', '') if hist else attachment.get('created', '')
-                last_updated = hist.get('lastUpdated', {}).get('when', '') if hist else ''
+            if self._index_include_attachments:
+                page_id = document.metadata.get('id')
+                attachments = self.client.get_attachments_from_content(page_id)
+                if not attachments or not attachments.get('results'):
+                    return f"No attachments found for page ID {page_id}."
+                # Get attachment history for created/updated info
+                history_map = {}
+                for attachment in attachments['results']:
+                    try:
+                        hist = self.client.history(attachment['id'])
+                        history_map[attachment['id']] = hist
+                    except Exception as e:
+                        logger.warning(f"Failed to fetch history for attachment {attachment.get('title', '')}: {str(e)}")
+                        history_map[attachment['id']] = None
+                import re
+                for attachment in attachments['results']:
+                    title = attachment.get('title', '')
+                    file_ext = title.lower().split('.')[-1] if '.' in title else ''
+                    # Re-verify extension filters
+                    # Check if file should be skipped based on skip_extensions
+                    if any(re.match(pattern.replace('*', '.*') + '$', title, re.IGNORECASE)
+                           for pattern in self._skip_extensions):
+                        continue
+                    # Check if file should be included based on include_extensions
+                    # If include_extensions is empty, process all files (that weren't skipped)
+                    if self._include_extensions and not (
+                    any(re.match(pattern.replace('*', '.*') + '$', title, re.IGNORECASE)
+                        for pattern in self._include_extensions)):
+                        continue
+                    media_type = attachment.get('metadata', {}).get('mediaType', '')
+                    # Core metadata extraction with history
+                    hist = history_map.get(attachment['id']) or {}
+                    created_by = hist.get('createdBy', {}).get('displayName', '') if hist else attachment.get('creator', {}).get('displayName', '')
+                    created_date = hist.get('createdDate', '') if hist else attachment.get('created', '')
+                    last_updated = hist.get('lastUpdated', {}).get('when', '') if hist else ''
+                    metadata = {
+                        'name': title,
+                        'size': attachment.get('extensions', {}).get('fileSize', None),
+                        'creator': created_by,
+                        'created': created_date,
+                        'updated': last_updated,
+                        'media_type': media_type,
+                        'labels': [label['name'] for label in
+                                   attachment.get('metadata', {}).get('labels', {}).get('results', [])],
+                        'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
+                            '_links', {}).get('download') else None
+                    }
-                metadata = {
-                    'name': title,
-                    'size': attachment.get('extensions', {}).get('fileSize', None),
-                    'creator': created_by,
-                    'created': created_date,
-                    'updated': last_updated,
-                    'media_type': media_type,
-                    'labels': [label['name'] for label in
-                               attachment.get('metadata', {}).get('labels', {}).get('results', [])],
-                    'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
-                        '_links', {}).get('download') else None
-                }
+                    download_url = self.base_url.rstrip('/') + attachment['_links']['download']
-                download_url = self.base_url.rstrip('/') + attachment['_links']['download']
+                    try:
+                        resp = self.client.request(method="GET", path=download_url[len(self.base_url):], advanced_mode=True)
+                        if resp.status_code == 200:
+                            content = resp.content
+                        else:
+                            content = f"[Failed to download {download_url}: HTTP status code {resp.status_code}]"
+                    except Exception as e:
+                        content = f"[Error downloading content: {str(e)}]"
-                try:
-                    resp = self.client.request(method="GET", path=download_url[len(self.base_url):], advanced_mode=True)
-                    if resp.status_code == 200:
-                        content = resp.content
+                    if isinstance(content, str):
+                        yield Document(page_content=content, metadata=metadata)
                     else:
-                        content = f"[Failed to download {download_url}: HTTP status code {resp.status_code}]"
-                except Exception as e:
-                    content = f"[Error downloading content: {str(e)}]"
-                if isinstance(content, str):
-                    yield Document(page_content=content, metadata=metadata)
-                else:
-                    yield Document(page_content="", metadata={
-                        **metadata,
-                        IndexerKeywords.CONTENT_FILE_NAME.value: f".{file_ext}",
-                        IndexerKeywords.CONTENT_IN_BYTES.value: content
-                    })
+                        yield Document(page_content="", metadata={
+                            **metadata,
+                            IndexerKeywords.CONTENT_FILE_NAME.value: f".{file_ext}",
+                            IndexerKeywords.CONTENT_IN_BYTES.value: content
+                        })
         except Exception as e:
             yield from ()
@@ -1648,6 +1666,13 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
             "include_restricted_content": (Optional[bool], Field(description="Include restricted content.", default=False)),
             "include_archived_content": (Optional[bool], Field(description="Include archived content.", default=False)),
             "include_attachments": (Optional[bool], Field(description="Include attachments.", default=False)),
+            'include_extensions': (Optional[List[str]], Field(
+                description="List of file extensions to include when processing attachments: i.e. ['*.png', '*.jpg']. "
+                            "If empty, all files will be processed (except skip_extensions).",
+                default=[])),
+            'skip_extensions': (Optional[List[str]], Field(
+                description="List of file extensions to skip when processing attachments: i.e. ['*.png', '*.jpg']",
+                default=[])),
             "include_comments": (Optional[bool], Field(description="Include comments.", default=False)),
             "include_labels": (Optional[bool], Field(description="Include labels.", default=True)),
             "ocr_languages": (Optional[str], Field(description="OCR languages for processing attachments.", default='eng')),

{alita_sdk-0.3.369.dist-info → alita_sdk-0.3.370.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.369
+Version: 0.3.370
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0

{alita_sdk-0.3.369.dist-info → alita_sdk-0.3.370.dist-info}/RECORD RENAMED Viewed

@@ -123,7 +123,7 @@ alita_sdk/runtime/tools/router.py,sha256=p7e0tX6YAWw2M2Nq0A_xqw1E2P-Xz1DaJvhUstf
 alita_sdk/runtime/tools/sandbox.py,sha256=WNz-aUMtkGCPg84dDy_0BPkyp-6YjoYB-xjIEFFrtKw,11601
 alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
 alita_sdk/runtime/tools/vectorstore.py,sha256=8vRhi1lGFEs3unvnflEi2p59U2MfV32lStpEizpDms0,34467
-alita_sdk/runtime/tools/vectorstore_base.py,sha256=4POq0NZ8FnMANop2JweeRNK9ViWcrpBM1y4Jl22E46E,26801
+alita_sdk/runtime/tools/vectorstore_base.py,sha256=1DYmMQEBMLetxQgi6D9Wd_vM_xVCa9qGTAfLOo2kNC0,27533
 alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
 alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -230,7 +230,7 @@ alita_sdk/tools/code/loaders/codesearcher.py,sha256=XoXXZtIQZhvjIwZlnl_4wVGHC-3s
 alita_sdk/tools/code/sonar/__init__.py,sha256=iPqj2PnUY4-btJjaDeWIPdn-c9L_uCr_qOoP_uwRoXw,3360
 alita_sdk/tools/code/sonar/api_wrapper.py,sha256=nNqxcWN_6W8c0ckj-Er9HkNuAdgQLoWBXh5UyzNutis,2653
 alita_sdk/tools/confluence/__init__.py,sha256=zRnPBM1c7VTRTS955HNc7AEGV5t8ACc2f9wBXmmeXao,6845
-alita_sdk/tools/confluence/api_wrapper.py,sha256=lUhGzcvYgTXx1bYr2lgK5t2lZFrnTWF4PJ_CWT8q-Ao,87805
+alita_sdk/tools/confluence/api_wrapper.py,sha256=cHIr0EnXZVGQMepcaIcFgMfyTKjlkKGbAd0z79pf-bo,89544
 alita_sdk/tools/confluence/loader.py,sha256=4bf5qrJMEiJzuZp2NlxO2XObLD1w7fxss_WyMUpe8sg,9290
 alita_sdk/tools/confluence/utils.py,sha256=Lxo6dBD0OlvM4o0JuK6qeB_4LV9BptiwJA9e1vqNcDw,435
 alita_sdk/tools/custom_open_api/__init__.py,sha256=9aT5SPNPWcJC6jMZEM-3rUCXVULj_3-qJLQKmnreKNo,2537
@@ -352,8 +352,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
 alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
 alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
 alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
-alita_sdk-0.3.369.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-alita_sdk-0.3.369.dist-info/METADATA,sha256=1fgFT8CTai8He4mId8cLCZhlVTYl6-pkFpBGUDh5Hds,19071
-alita_sdk-0.3.369.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-alita_sdk-0.3.369.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
-alita_sdk-0.3.369.dist-info/RECORD,,
+alita_sdk-0.3.370.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+alita_sdk-0.3.370.dist-info/METADATA,sha256=7o5P_ba4fUU5FVQU9htx-olWpTUnrpVOcfl2o3DwSEs,19071
+alita_sdk-0.3.370.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+alita_sdk-0.3.370.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
+alita_sdk-0.3.370.dist-info/RECORD,,

{alita_sdk-0.3.369.dist-info → alita_sdk-0.3.370.dist-info}/WHEEL RENAMED Viewed

File without changes

{alita_sdk-0.3.369.dist-info → alita_sdk-0.3.370.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{alita_sdk-0.3.369.dist-info → alita_sdk-0.3.370.dist-info}/top_level.txt RENAMED Viewed

File without changes

alita-sdk 0.3.369__py3-none-any.whl → 0.3.370__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.369py3-none-any.whl → 0.3.370py3-none-any.whl