alita-sdk 0.3.369__py3-none-any.whl → 0.3.370__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/runtime/tools/vectorstore_base.py +11 -0
- alita_sdk/tools/confluence/api_wrapper.py +80 -55
- {alita_sdk-0.3.369.dist-info → alita_sdk-0.3.370.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.369.dist-info → alita_sdk-0.3.370.dist-info}/RECORD +7 -7
- {alita_sdk-0.3.369.dist-info → alita_sdk-0.3.370.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.369.dist-info → alita_sdk-0.3.370.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.369.dist-info → alita_sdk-0.3.370.dist-info}/top_level.txt +0 -0
|
@@ -6,6 +6,8 @@ from typing import Any, Optional, List, Dict, Generator
|
|
|
6
6
|
|
|
7
7
|
from langchain_core.documents import Document
|
|
8
8
|
from langchain_core.messages import HumanMessage
|
|
9
|
+
from langchain_core.tools import ToolException
|
|
10
|
+
from psycopg.errors import DataException
|
|
9
11
|
from pydantic import BaseModel, model_validator, Field
|
|
10
12
|
|
|
11
13
|
from alita_sdk.tools.elitea_base import BaseToolApiWrapper
|
|
@@ -316,6 +318,15 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
316
318
|
if doc_id not in unique_docs or score > chunk_type_scores.get(doc_id, 0):
|
|
317
319
|
unique_docs[doc_id] = doc
|
|
318
320
|
chunk_type_scores[doc_id] = score
|
|
321
|
+
except DataException as dimException:
|
|
322
|
+
exception_str = str(dimException)
|
|
323
|
+
if 'different vector dimensions' in exception_str:
|
|
324
|
+
logger.error(f"Data exception: {exception_str}")
|
|
325
|
+
raise ToolException(f"Global search cannot be completed since collections were indexed using "
|
|
326
|
+
f"different embedding models. Use search within a single collection."
|
|
327
|
+
f"\nDetails: {exception_str}")
|
|
328
|
+
raise ToolException(f"Data exception during search. Possibly invalid filter: {exception_str}")
|
|
329
|
+
|
|
319
330
|
except Exception as e:
|
|
320
331
|
logger.warning(f"Error searching for document chunks: {str(e)}")
|
|
321
332
|
|
|
@@ -815,6 +815,10 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
815
815
|
from .loader import AlitaConfluenceLoader
|
|
816
816
|
from copy import copy
|
|
817
817
|
content_format = kwargs.get('content_format', 'view').lower()
|
|
818
|
+
|
|
819
|
+
self._index_include_attachments = kwargs.get('include_attachments', False)
|
|
820
|
+
self._include_extensions = kwargs.get('include_extensions', [])
|
|
821
|
+
self._skip_extensions = kwargs.get('skip_extensions', [])
|
|
818
822
|
base_params = {
|
|
819
823
|
'url': self.base_url,
|
|
820
824
|
'space_key': self.space,
|
|
@@ -847,65 +851,79 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
847
851
|
|
|
848
852
|
def _process_document(self, document: Document) -> Generator[Document, None, None]:
|
|
849
853
|
try:
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
854
|
+
if self._index_include_attachments:
|
|
855
|
+
page_id = document.metadata.get('id')
|
|
856
|
+
attachments = self.client.get_attachments_from_content(page_id)
|
|
857
|
+
if not attachments or not attachments.get('results'):
|
|
858
|
+
return f"No attachments found for page ID {page_id}."
|
|
859
|
+
|
|
860
|
+
# Get attachment history for created/updated info
|
|
861
|
+
history_map = {}
|
|
862
|
+
for attachment in attachments['results']:
|
|
863
|
+
try:
|
|
864
|
+
hist = self.client.history(attachment['id'])
|
|
865
|
+
history_map[attachment['id']] = hist
|
|
866
|
+
except Exception as e:
|
|
867
|
+
logger.warning(f"Failed to fetch history for attachment {attachment.get('title', '')}: {str(e)}")
|
|
868
|
+
history_map[attachment['id']] = None
|
|
869
|
+
|
|
870
|
+
import re
|
|
871
|
+
for attachment in attachments['results']:
|
|
872
|
+
title = attachment.get('title', '')
|
|
873
|
+
file_ext = title.lower().split('.')[-1] if '.' in title else ''
|
|
874
|
+
|
|
875
|
+
# Re-verify extension filters
|
|
876
|
+
# Check if file should be skipped based on skip_extensions
|
|
877
|
+
if any(re.match(pattern.replace('*', '.*') + '$', title, re.IGNORECASE)
|
|
878
|
+
for pattern in self._skip_extensions):
|
|
879
|
+
continue
|
|
880
|
+
|
|
881
|
+
# Check if file should be included based on include_extensions
|
|
882
|
+
# If include_extensions is empty, process all files (that weren't skipped)
|
|
883
|
+
if self._include_extensions and not (
|
|
884
|
+
any(re.match(pattern.replace('*', '.*') + '$', title, re.IGNORECASE)
|
|
885
|
+
for pattern in self._include_extensions)):
|
|
886
|
+
continue
|
|
887
|
+
|
|
888
|
+
media_type = attachment.get('metadata', {}).get('mediaType', '')
|
|
889
|
+
# Core metadata extraction with history
|
|
890
|
+
hist = history_map.get(attachment['id']) or {}
|
|
891
|
+
created_by = hist.get('createdBy', {}).get('displayName', '') if hist else attachment.get('creator', {}).get('displayName', '')
|
|
892
|
+
created_date = hist.get('createdDate', '') if hist else attachment.get('created', '')
|
|
893
|
+
last_updated = hist.get('lastUpdated', {}).get('when', '') if hist else ''
|
|
894
|
+
|
|
895
|
+
metadata = {
|
|
896
|
+
'name': title,
|
|
897
|
+
'size': attachment.get('extensions', {}).get('fileSize', None),
|
|
898
|
+
'creator': created_by,
|
|
899
|
+
'created': created_date,
|
|
900
|
+
'updated': last_updated,
|
|
901
|
+
'media_type': media_type,
|
|
902
|
+
'labels': [label['name'] for label in
|
|
903
|
+
attachment.get('metadata', {}).get('labels', {}).get('results', [])],
|
|
904
|
+
'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
|
|
905
|
+
'_links', {}).get('download') else None
|
|
906
|
+
}
|
|
876
907
|
|
|
877
|
-
|
|
878
|
-
'name': title,
|
|
879
|
-
'size': attachment.get('extensions', {}).get('fileSize', None),
|
|
880
|
-
'creator': created_by,
|
|
881
|
-
'created': created_date,
|
|
882
|
-
'updated': last_updated,
|
|
883
|
-
'media_type': media_type,
|
|
884
|
-
'labels': [label['name'] for label in
|
|
885
|
-
attachment.get('metadata', {}).get('labels', {}).get('results', [])],
|
|
886
|
-
'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
|
|
887
|
-
'_links', {}).get('download') else None
|
|
888
|
-
}
|
|
908
|
+
download_url = self.base_url.rstrip('/') + attachment['_links']['download']
|
|
889
909
|
|
|
890
|
-
|
|
910
|
+
try:
|
|
911
|
+
resp = self.client.request(method="GET", path=download_url[len(self.base_url):], advanced_mode=True)
|
|
912
|
+
if resp.status_code == 200:
|
|
913
|
+
content = resp.content
|
|
914
|
+
else:
|
|
915
|
+
content = f"[Failed to download {download_url}: HTTP status code {resp.status_code}]"
|
|
916
|
+
except Exception as e:
|
|
917
|
+
content = f"[Error downloading content: {str(e)}]"
|
|
891
918
|
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
if resp.status_code == 200:
|
|
895
|
-
content = resp.content
|
|
919
|
+
if isinstance(content, str):
|
|
920
|
+
yield Document(page_content=content, metadata=metadata)
|
|
896
921
|
else:
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
yield Document(page_content=content, metadata=metadata)
|
|
903
|
-
else:
|
|
904
|
-
yield Document(page_content="", metadata={
|
|
905
|
-
**metadata,
|
|
906
|
-
IndexerKeywords.CONTENT_FILE_NAME.value: f".{file_ext}",
|
|
907
|
-
IndexerKeywords.CONTENT_IN_BYTES.value: content
|
|
908
|
-
})
|
|
922
|
+
yield Document(page_content="", metadata={
|
|
923
|
+
**metadata,
|
|
924
|
+
IndexerKeywords.CONTENT_FILE_NAME.value: f".{file_ext}",
|
|
925
|
+
IndexerKeywords.CONTENT_IN_BYTES.value: content
|
|
926
|
+
})
|
|
909
927
|
except Exception as e:
|
|
910
928
|
yield from ()
|
|
911
929
|
|
|
@@ -1648,6 +1666,13 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
1648
1666
|
"include_restricted_content": (Optional[bool], Field(description="Include restricted content.", default=False)),
|
|
1649
1667
|
"include_archived_content": (Optional[bool], Field(description="Include archived content.", default=False)),
|
|
1650
1668
|
"include_attachments": (Optional[bool], Field(description="Include attachments.", default=False)),
|
|
1669
|
+
'include_extensions': (Optional[List[str]], Field(
|
|
1670
|
+
description="List of file extensions to include when processing attachments: i.e. ['*.png', '*.jpg']. "
|
|
1671
|
+
"If empty, all files will be processed (except skip_extensions).",
|
|
1672
|
+
default=[])),
|
|
1673
|
+
'skip_extensions': (Optional[List[str]], Field(
|
|
1674
|
+
description="List of file extensions to skip when processing attachments: i.e. ['*.png', '*.jpg']",
|
|
1675
|
+
default=[])),
|
|
1651
1676
|
"include_comments": (Optional[bool], Field(description="Include comments.", default=False)),
|
|
1652
1677
|
"include_labels": (Optional[bool], Field(description="Include labels.", default=True)),
|
|
1653
1678
|
"ocr_languages": (Optional[str], Field(description="OCR languages for processing attachments.", default='eng')),
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: alita_sdk
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.370
|
|
4
4
|
Summary: SDK for building langchain agents using resources from Alita
|
|
5
5
|
Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -123,7 +123,7 @@ alita_sdk/runtime/tools/router.py,sha256=p7e0tX6YAWw2M2Nq0A_xqw1E2P-Xz1DaJvhUstf
|
|
|
123
123
|
alita_sdk/runtime/tools/sandbox.py,sha256=WNz-aUMtkGCPg84dDy_0BPkyp-6YjoYB-xjIEFFrtKw,11601
|
|
124
124
|
alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
|
|
125
125
|
alita_sdk/runtime/tools/vectorstore.py,sha256=8vRhi1lGFEs3unvnflEi2p59U2MfV32lStpEizpDms0,34467
|
|
126
|
-
alita_sdk/runtime/tools/vectorstore_base.py,sha256=
|
|
126
|
+
alita_sdk/runtime/tools/vectorstore_base.py,sha256=1DYmMQEBMLetxQgi6D9Wd_vM_xVCa9qGTAfLOo2kNC0,27533
|
|
127
127
|
alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
|
|
128
128
|
alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
129
129
|
alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
|
|
@@ -230,7 +230,7 @@ alita_sdk/tools/code/loaders/codesearcher.py,sha256=XoXXZtIQZhvjIwZlnl_4wVGHC-3s
|
|
|
230
230
|
alita_sdk/tools/code/sonar/__init__.py,sha256=iPqj2PnUY4-btJjaDeWIPdn-c9L_uCr_qOoP_uwRoXw,3360
|
|
231
231
|
alita_sdk/tools/code/sonar/api_wrapper.py,sha256=nNqxcWN_6W8c0ckj-Er9HkNuAdgQLoWBXh5UyzNutis,2653
|
|
232
232
|
alita_sdk/tools/confluence/__init__.py,sha256=zRnPBM1c7VTRTS955HNc7AEGV5t8ACc2f9wBXmmeXao,6845
|
|
233
|
-
alita_sdk/tools/confluence/api_wrapper.py,sha256=
|
|
233
|
+
alita_sdk/tools/confluence/api_wrapper.py,sha256=cHIr0EnXZVGQMepcaIcFgMfyTKjlkKGbAd0z79pf-bo,89544
|
|
234
234
|
alita_sdk/tools/confluence/loader.py,sha256=4bf5qrJMEiJzuZp2NlxO2XObLD1w7fxss_WyMUpe8sg,9290
|
|
235
235
|
alita_sdk/tools/confluence/utils.py,sha256=Lxo6dBD0OlvM4o0JuK6qeB_4LV9BptiwJA9e1vqNcDw,435
|
|
236
236
|
alita_sdk/tools/custom_open_api/__init__.py,sha256=9aT5SPNPWcJC6jMZEM-3rUCXVULj_3-qJLQKmnreKNo,2537
|
|
@@ -352,8 +352,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
|
|
|
352
352
|
alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
|
|
353
353
|
alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
|
|
354
354
|
alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
|
|
355
|
-
alita_sdk-0.3.
|
|
356
|
-
alita_sdk-0.3.
|
|
357
|
-
alita_sdk-0.3.
|
|
358
|
-
alita_sdk-0.3.
|
|
359
|
-
alita_sdk-0.3.
|
|
355
|
+
alita_sdk-0.3.370.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
356
|
+
alita_sdk-0.3.370.dist-info/METADATA,sha256=7o5P_ba4fUU5FVQU9htx-olWpTUnrpVOcfl2o3DwSEs,19071
|
|
357
|
+
alita_sdk-0.3.370.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
358
|
+
alita_sdk-0.3.370.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
|
|
359
|
+
alita_sdk-0.3.370.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|