alita-sdk 0.3.365__py3-none-any.whl → 0.3.462__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent_executor.py +144 -0
- alita_sdk/cli/agent_loader.py +197 -0
- alita_sdk/cli/agent_ui.py +166 -0
- alita_sdk/cli/agents.py +1069 -0
- alita_sdk/cli/callbacks.py +576 -0
- alita_sdk/cli/cli.py +159 -0
- alita_sdk/cli/config.py +153 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +330 -0
- alita_sdk/cli/toolkit_loader.py +55 -0
- alita_sdk/cli/tools/__init__.py +9 -0
- alita_sdk/cli/tools/filesystem.py +905 -0
- alita_sdk/configurations/bitbucket.py +95 -0
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/artifact.py +1 -1
- alita_sdk/runtime/clients/client.py +47 -10
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +373 -0
- alita_sdk/runtime/langchain/assistant.py +70 -41
- alita_sdk/runtime/langchain/constants.py +6 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +73 -100
- alita_sdk/runtime/langchain/langraph_agent.py +164 -38
- alita_sdk/runtime/langchain/utils.py +43 -7
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/toolkits/__init__.py +24 -0
- alita_sdk/runtime/toolkits/application.py +8 -1
- alita_sdk/runtime/toolkits/artifact.py +5 -6
- alita_sdk/runtime/toolkits/mcp.py +895 -0
- alita_sdk/runtime/toolkits/tools.py +140 -50
- alita_sdk/runtime/tools/__init__.py +7 -2
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/function.py +94 -5
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +104 -8
- alita_sdk/runtime/tools/llm.py +204 -114
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +166 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
- alita_sdk/runtime/tools/sandbox.py +180 -79
- alita_sdk/runtime/tools/vectorstore.py +22 -21
- alita_sdk/runtime/tools/vectorstore_base.py +79 -26
- alita_sdk/runtime/utils/mcp_oauth.py +164 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
- alita_sdk/runtime/utils/streamlit.py +34 -3
- alita_sdk/runtime/utils/toolkit_utils.py +14 -4
- alita_sdk/runtime/utils/utils.py +1 -0
- alita_sdk/tools/__init__.py +48 -31
- alita_sdk/tools/ado/repos/__init__.py +1 -0
- alita_sdk/tools/ado/test_plan/__init__.py +1 -1
- alita_sdk/tools/ado/wiki/__init__.py +1 -5
- alita_sdk/tools/ado/work_item/__init__.py +1 -5
- alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
- alita_sdk/tools/base_indexer_toolkit.py +194 -112
- alita_sdk/tools/bitbucket/__init__.py +1 -0
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/code/sonar/__init__.py +1 -1
- alita_sdk/tools/code_indexer_toolkit.py +15 -5
- alita_sdk/tools/confluence/__init__.py +2 -2
- alita_sdk/tools/confluence/api_wrapper.py +110 -63
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/elitea_base.py +22 -22
- alita_sdk/tools/github/__init__.py +2 -2
- alita_sdk/tools/gitlab/__init__.py +2 -1
- alita_sdk/tools/gitlab/api_wrapper.py +11 -7
- alita_sdk/tools/gitlab_org/__init__.py +1 -2
- alita_sdk/tools/google_places/__init__.py +2 -1
- alita_sdk/tools/jira/__init__.py +1 -0
- alita_sdk/tools/jira/api_wrapper.py +1 -1
- alita_sdk/tools/memory/__init__.py +1 -1
- alita_sdk/tools/non_code_indexer_toolkit.py +2 -2
- alita_sdk/tools/openapi/__init__.py +10 -1
- alita_sdk/tools/pandas/__init__.py +1 -1
- alita_sdk/tools/postman/__init__.py +2 -1
- alita_sdk/tools/postman/api_wrapper.py +18 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +2 -2
- alita_sdk/tools/qtest/__init__.py +3 -3
- alita_sdk/tools/qtest/api_wrapper.py +1708 -76
- alita_sdk/tools/rally/__init__.py +1 -2
- alita_sdk/tools/report_portal/__init__.py +1 -0
- alita_sdk/tools/salesforce/__init__.py +1 -0
- alita_sdk/tools/servicenow/__init__.py +2 -3
- alita_sdk/tools/sharepoint/__init__.py +1 -0
- alita_sdk/tools/sharepoint/api_wrapper.py +125 -34
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +1 -0
- alita_sdk/tools/sql/__init__.py +2 -1
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +1 -0
- alita_sdk/tools/testrail/__init__.py +1 -3
- alita_sdk/tools/utils/__init__.py +17 -0
- alita_sdk/tools/utils/content_parser.py +35 -24
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +67 -21
- alita_sdk/tools/xray/__init__.py +2 -1
- alita_sdk/tools/zephyr/__init__.py +2 -1
- alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
- alita_sdk/tools/zephyr_essential/__init__.py +1 -0
- alita_sdk/tools/zephyr_scale/__init__.py +1 -0
- alita_sdk/tools/zephyr_squad/__init__.py +1 -0
- {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/METADATA +8 -2
- {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/RECORD +118 -93
- alita_sdk-0.3.462.dist-info/entry_points.txt +2 -0
- {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import ast
|
|
2
2
|
import fnmatch
|
|
3
|
+
import json
|
|
3
4
|
import logging
|
|
4
5
|
from typing import Optional, List, Generator
|
|
5
6
|
|
|
@@ -14,14 +15,14 @@ logger = logging.getLogger(__name__)
|
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
17
|
-
def _get_indexed_data(self,
|
|
18
|
+
def _get_indexed_data(self, index_name: str):
|
|
18
19
|
if not self.vector_adapter:
|
|
19
20
|
raise ToolException("Vector adapter is not initialized. "
|
|
20
21
|
"Check your configuration: embedding_model and vectorstore_type.")
|
|
21
|
-
return self.vector_adapter.get_code_indexed_data(self,
|
|
22
|
+
return self.vector_adapter.get_code_indexed_data(self, index_name)
|
|
22
23
|
|
|
23
24
|
def key_fn(self, document: Document):
|
|
24
|
-
return document.metadata.get(
|
|
25
|
+
return document.metadata.get("filename")
|
|
25
26
|
|
|
26
27
|
def compare_fn(self, document: Document, idx_data):
|
|
27
28
|
return (document.metadata.get('commit_hash') and
|
|
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
46
47
|
)
|
|
47
48
|
|
|
48
49
|
def _extend_data(self, documents: Generator[Document, None, None]):
|
|
49
|
-
yield from
|
|
50
|
+
yield from documents
|
|
50
51
|
|
|
51
52
|
def _index_tool_params(self):
|
|
52
53
|
"""Return the parameters for indexing data."""
|
|
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
117
118
|
if not file_content:
|
|
118
119
|
# empty file, skip
|
|
119
120
|
continue
|
|
121
|
+
#
|
|
122
|
+
# ensure file content is a string
|
|
123
|
+
if isinstance(file_content, bytes):
|
|
124
|
+
file_content = file_content.decode("utf-8", errors="ignore")
|
|
125
|
+
elif isinstance(file_content, dict) and file.endswith('.json'):
|
|
126
|
+
file_content = json.dumps(file_content)
|
|
127
|
+
elif not isinstance(file_content, str):
|
|
128
|
+
file_content = str(file_content)
|
|
129
|
+
#
|
|
120
130
|
# hash the file content to ensure uniqueness
|
|
121
131
|
import hashlib
|
|
122
132
|
file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
|
|
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
127
137
|
self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
|
|
128
138
|
self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
|
|
129
139
|
|
|
130
|
-
return file_content_generator()
|
|
140
|
+
return parse_code_files_for_db(file_content_generator())
|
|
131
141
|
|
|
132
142
|
def __handle_get_files(self, path: str, branch: str):
|
|
133
143
|
"""
|
|
@@ -67,8 +67,7 @@ class ConfluenceToolkit(BaseToolkit):
|
|
|
67
67
|
|
|
68
68
|
model = create_model(
|
|
69
69
|
name,
|
|
70
|
-
space=(str, Field(description="Space",
|
|
71
|
-
'max_toolkit_length': ConfluenceToolkit.toolkit_max_length})),
|
|
70
|
+
space=(str, Field(description="Space")),
|
|
72
71
|
cloud=(bool, Field(description="Hosting Option", json_schema_extra={'configuration': True})),
|
|
73
72
|
limit=(int, Field(description="Pages limit per request", default=5)),
|
|
74
73
|
labels=(Optional[str], Field(
|
|
@@ -95,6 +94,7 @@ class ConfluenceToolkit(BaseToolkit):
|
|
|
95
94
|
'metadata': {
|
|
96
95
|
"label": "Confluence",
|
|
97
96
|
"icon_url": None,
|
|
97
|
+
"max_length": ConfluenceToolkit.toolkit_max_length,
|
|
98
98
|
"categories": ["documentation"],
|
|
99
99
|
"extra_categories": ["confluence", "wiki", "knowledge base", "documentation", "atlassian"]
|
|
100
100
|
}
|
|
@@ -7,12 +7,14 @@ from json import JSONDecodeError
|
|
|
7
7
|
from typing import Optional, List, Any, Dict, Callable, Generator, Literal
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
|
+
from atlassian.errors import ApiError
|
|
10
11
|
from langchain_community.document_loaders.confluence import ContentFormat
|
|
11
12
|
from langchain_core.documents import Document
|
|
12
13
|
from langchain_core.messages import HumanMessage
|
|
13
14
|
from langchain_core.tools import ToolException
|
|
14
15
|
from markdownify import markdownify
|
|
15
16
|
from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
|
|
17
|
+
from requests import HTTPError
|
|
16
18
|
from tenacity import retry, stop_after_attempt, wait_exponential, before_sleep_log
|
|
17
19
|
|
|
18
20
|
from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
|
|
@@ -194,6 +196,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
194
196
|
keep_markdown_format: Optional[bool] = True
|
|
195
197
|
ocr_languages: Optional[str] = None
|
|
196
198
|
keep_newlines: Optional[bool] = True
|
|
199
|
+
_errors: Optional[list[str]] = None
|
|
197
200
|
_image_cache: ImageDescriptionCache = PrivateAttr(default_factory=ImageDescriptionCache)
|
|
198
201
|
|
|
199
202
|
@model_validator(mode='before')
|
|
@@ -498,7 +501,9 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
498
501
|
restrictions = self.client.get_all_restrictions_for_content(page["id"])
|
|
499
502
|
|
|
500
503
|
return (
|
|
501
|
-
page["status"] == "current"
|
|
504
|
+
(page["status"] == "current"
|
|
505
|
+
# allow user to see archived content if needed
|
|
506
|
+
or page["status"] == "archived")
|
|
502
507
|
and not restrictions["read"]["restrictions"]["user"]["results"]
|
|
503
508
|
and not restrictions["read"]["restrictions"]["group"]["results"]
|
|
504
509
|
)
|
|
@@ -518,18 +523,35 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
518
523
|
),
|
|
519
524
|
before_sleep=before_sleep_log(logger, logging.WARNING),
|
|
520
525
|
)(self.client.get_page_by_id)
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
+
try:
|
|
527
|
+
page = get_page(
|
|
528
|
+
page_id=page_id, expand=f"{self.content_format.value},version"
|
|
529
|
+
)
|
|
530
|
+
except (ApiError, HTTPError) as e:
|
|
531
|
+
logger.error(f"Error fetching page with ID {page_id}: {e}")
|
|
532
|
+
page_content_temp = f"Confluence API Error: cannot fetch the page with ID {page_id}: {e}"
|
|
533
|
+
# store errors
|
|
534
|
+
if self._errors is None:
|
|
535
|
+
self._errors = []
|
|
536
|
+
self._errors.append(page_content_temp)
|
|
537
|
+
return Document(page_content=page_content_temp,
|
|
538
|
+
metadata={})
|
|
539
|
+
# TODO: update on toolkit advanced settings level as a separate feature
|
|
540
|
+
# if not self.include_restricted_content and not self.is_public_page(page):
|
|
541
|
+
# continue
|
|
526
542
|
yield self.process_page(page, skip_images)
|
|
527
543
|
|
|
544
|
+
def _log_errors(self):
|
|
545
|
+
""" Log errors encountered during toolkit execution. """
|
|
546
|
+
if self._errors:
|
|
547
|
+
logger.info(f"Errors encountered during toolkit execution: {self._errors}")
|
|
548
|
+
|
|
528
549
|
def read_page_by_id(self, page_id: str, skip_images: bool = False):
|
|
529
550
|
"""Reads a page by its id in the Confluence space. If id is not available, but there is a title - use get_page_id first."""
|
|
530
551
|
result = list(self.get_pages_by_id([page_id], skip_images))
|
|
531
552
|
if not result:
|
|
532
|
-
"
|
|
553
|
+
return f"Pages not found. Errors: {self._errors}" if self._errors \
|
|
554
|
+
else "Pages not found or you do not have access to them."
|
|
533
555
|
return result[0].page_content
|
|
534
556
|
# return self._strip_base64_images(result[0].page_content) if skip_images else result[0].page_content
|
|
535
557
|
|
|
@@ -815,6 +837,10 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
815
837
|
from .loader import AlitaConfluenceLoader
|
|
816
838
|
from copy import copy
|
|
817
839
|
content_format = kwargs.get('content_format', 'view').lower()
|
|
840
|
+
|
|
841
|
+
self._index_include_attachments = kwargs.get('include_attachments', False)
|
|
842
|
+
self._include_extensions = kwargs.get('include_extensions', [])
|
|
843
|
+
self._skip_extensions = kwargs.get('skip_extensions', [])
|
|
818
844
|
base_params = {
|
|
819
845
|
'url': self.base_url,
|
|
820
846
|
'space_key': self.space,
|
|
@@ -847,65 +873,79 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
847
873
|
|
|
848
874
|
def _process_document(self, document: Document) -> Generator[Document, None, None]:
|
|
849
875
|
try:
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
+
if self._index_include_attachments:
|
|
877
|
+
page_id = document.metadata.get('id')
|
|
878
|
+
attachments = self.client.get_attachments_from_content(page_id)
|
|
879
|
+
if not attachments or not attachments.get('results'):
|
|
880
|
+
return f"No attachments found for page ID {page_id}."
|
|
881
|
+
|
|
882
|
+
# Get attachment history for created/updated info
|
|
883
|
+
history_map = {}
|
|
884
|
+
for attachment in attachments['results']:
|
|
885
|
+
try:
|
|
886
|
+
hist = self.client.history(attachment['id'])
|
|
887
|
+
history_map[attachment['id']] = hist
|
|
888
|
+
except Exception as e:
|
|
889
|
+
logger.warning(f"Failed to fetch history for attachment {attachment.get('title', '')}: {str(e)}")
|
|
890
|
+
history_map[attachment['id']] = None
|
|
891
|
+
|
|
892
|
+
import re
|
|
893
|
+
for attachment in attachments['results']:
|
|
894
|
+
title = attachment.get('title', '')
|
|
895
|
+
file_ext = title.lower().split('.')[-1] if '.' in title else ''
|
|
896
|
+
|
|
897
|
+
# Re-verify extension filters
|
|
898
|
+
# Check if file should be skipped based on skip_extensions
|
|
899
|
+
if any(re.match(pattern.replace('*', '.*') + '$', title, re.IGNORECASE)
|
|
900
|
+
for pattern in self._skip_extensions):
|
|
901
|
+
continue
|
|
902
|
+
|
|
903
|
+
# Check if file should be included based on include_extensions
|
|
904
|
+
# If include_extensions is empty, process all files (that weren't skipped)
|
|
905
|
+
if self._include_extensions and not (
|
|
906
|
+
any(re.match(pattern.replace('*', '.*') + '$', title, re.IGNORECASE)
|
|
907
|
+
for pattern in self._include_extensions)):
|
|
908
|
+
continue
|
|
909
|
+
|
|
910
|
+
media_type = attachment.get('metadata', {}).get('mediaType', '')
|
|
911
|
+
# Core metadata extraction with history
|
|
912
|
+
hist = history_map.get(attachment['id']) or {}
|
|
913
|
+
created_by = hist.get('createdBy', {}).get('displayName', '') if hist else attachment.get('creator', {}).get('displayName', '')
|
|
914
|
+
created_date = hist.get('createdDate', '') if hist else attachment.get('created', '')
|
|
915
|
+
last_updated = hist.get('lastUpdated', {}).get('when', '') if hist else ''
|
|
916
|
+
|
|
917
|
+
metadata = {
|
|
918
|
+
'name': title,
|
|
919
|
+
'size': attachment.get('extensions', {}).get('fileSize', None),
|
|
920
|
+
'creator': created_by,
|
|
921
|
+
'created': created_date,
|
|
922
|
+
'updated': last_updated,
|
|
923
|
+
'media_type': media_type,
|
|
924
|
+
'labels': [label['name'] for label in
|
|
925
|
+
attachment.get('metadata', {}).get('labels', {}).get('results', [])],
|
|
926
|
+
'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
|
|
927
|
+
'_links', {}).get('download') else None
|
|
928
|
+
}
|
|
876
929
|
|
|
877
|
-
|
|
878
|
-
'name': title,
|
|
879
|
-
'size': attachment.get('extensions', {}).get('fileSize', None),
|
|
880
|
-
'creator': created_by,
|
|
881
|
-
'created': created_date,
|
|
882
|
-
'updated': last_updated,
|
|
883
|
-
'media_type': media_type,
|
|
884
|
-
'labels': [label['name'] for label in
|
|
885
|
-
attachment.get('metadata', {}).get('labels', {}).get('results', [])],
|
|
886
|
-
'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
|
|
887
|
-
'_links', {}).get('download') else None
|
|
888
|
-
}
|
|
930
|
+
download_url = self.base_url.rstrip('/') + attachment['_links']['download']
|
|
889
931
|
|
|
890
|
-
|
|
932
|
+
try:
|
|
933
|
+
resp = self.client.request(method="GET", path=download_url[len(self.base_url):], advanced_mode=True)
|
|
934
|
+
if resp.status_code == 200:
|
|
935
|
+
content = resp.content
|
|
936
|
+
else:
|
|
937
|
+
content = f"[Failed to download {download_url}: HTTP status code {resp.status_code}]"
|
|
938
|
+
except Exception as e:
|
|
939
|
+
content = f"[Error downloading content: {str(e)}]"
|
|
891
940
|
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
if resp.status_code == 200:
|
|
895
|
-
content = resp.content
|
|
941
|
+
if isinstance(content, str):
|
|
942
|
+
yield Document(page_content=content, metadata=metadata)
|
|
896
943
|
else:
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
yield Document(page_content=content, metadata=metadata)
|
|
903
|
-
else:
|
|
904
|
-
yield Document(page_content="", metadata={
|
|
905
|
-
**metadata,
|
|
906
|
-
IndexerKeywords.CONTENT_FILE_NAME.value: f".{file_ext}",
|
|
907
|
-
IndexerKeywords.CONTENT_IN_BYTES.value: content
|
|
908
|
-
})
|
|
944
|
+
yield Document(page_content="", metadata={
|
|
945
|
+
**metadata,
|
|
946
|
+
IndexerKeywords.CONTENT_FILE_NAME.value: f".{file_ext}",
|
|
947
|
+
IndexerKeywords.CONTENT_IN_BYTES.value: content
|
|
948
|
+
})
|
|
909
949
|
except Exception as e:
|
|
910
950
|
yield from ()
|
|
911
951
|
|
|
@@ -1648,8 +1688,15 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
1648
1688
|
"include_restricted_content": (Optional[bool], Field(description="Include restricted content.", default=False)),
|
|
1649
1689
|
"include_archived_content": (Optional[bool], Field(description="Include archived content.", default=False)),
|
|
1650
1690
|
"include_attachments": (Optional[bool], Field(description="Include attachments.", default=False)),
|
|
1691
|
+
'include_extensions': (Optional[List[str]], Field(
|
|
1692
|
+
description="List of file extensions to include when processing attachments: i.e. ['*.png', '*.jpg']. "
|
|
1693
|
+
"If empty, all files will be processed (except skip_extensions).",
|
|
1694
|
+
default=[])),
|
|
1695
|
+
'skip_extensions': (Optional[List[str]], Field(
|
|
1696
|
+
description="List of file extensions to skip when processing attachments: i.e. ['*.png', '*.jpg']",
|
|
1697
|
+
default=[])),
|
|
1651
1698
|
"include_comments": (Optional[bool], Field(description="Include comments.", default=False)),
|
|
1652
|
-
"include_labels": (Optional[bool], Field(description="Include labels.", default=
|
|
1699
|
+
"include_labels": (Optional[bool], Field(description="Include labels.", default=False)),
|
|
1653
1700
|
"ocr_languages": (Optional[str], Field(description="OCR languages for processing attachments.", default='eng')),
|
|
1654
1701
|
"keep_markdown_format": (Optional[bool], Field(description="Keep the markdown format.", default=True)),
|
|
1655
1702
|
"keep_newlines": (Optional[bool], Field(description="Keep newlines in the content.", default=True)),
|
|
@@ -3,6 +3,7 @@ from typing import Optional, List
|
|
|
3
3
|
from logging import getLogger
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
|
+
from langchain_core.documents import Document
|
|
6
7
|
|
|
7
8
|
logger = getLogger(__name__)
|
|
8
9
|
from PIL import Image
|
|
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
|
|
|
193
194
|
else:
|
|
194
195
|
return super().process_image(link, ocr_languages)
|
|
195
196
|
|
|
197
|
+
def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
|
|
198
|
+
content_format: ContentFormat, ocr_languages: Optional[str] = None,
|
|
199
|
+
keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
|
|
200
|
+
if not page.get("title"):
|
|
201
|
+
# if 'include_restricted_content' set to True, draft pages are loaded and can have no title
|
|
202
|
+
page["title"] = "Untitled"
|
|
203
|
+
return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
|
|
204
|
+
ocr_languages, keep_markdown_format, keep_newlines)
|
|
205
|
+
|
|
196
206
|
# TODO review usage
|
|
197
207
|
# def process_svg(
|
|
198
208
|
# self,
|
alita_sdk/tools/elitea_base.py
CHANGED
|
@@ -33,12 +33,12 @@ LoaderSchema = create_model(
|
|
|
33
33
|
# Base Vector Store Schema Models
|
|
34
34
|
BaseIndexParams = create_model(
|
|
35
35
|
"BaseIndexParams",
|
|
36
|
-
|
|
36
|
+
index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
|
|
37
37
|
)
|
|
38
38
|
|
|
39
39
|
BaseCodeIndexParams = create_model(
|
|
40
40
|
"BaseCodeIndexParams",
|
|
41
|
-
|
|
41
|
+
index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
|
|
42
42
|
clean_index=(Optional[bool], Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
|
|
43
43
|
progress_step=(Optional[int], Field(default=5, ge=0, le=100,
|
|
44
44
|
description="Optional step size for progress reporting during indexing")),
|
|
@@ -50,14 +50,14 @@ BaseCodeIndexParams = create_model(
|
|
|
50
50
|
|
|
51
51
|
RemoveIndexParams = create_model(
|
|
52
52
|
"RemoveIndexParams",
|
|
53
|
-
|
|
53
|
+
index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
|
|
54
54
|
)
|
|
55
55
|
|
|
56
56
|
BaseSearchParams = create_model(
|
|
57
57
|
"BaseSearchParams",
|
|
58
58
|
query=(str, Field(description="Query text to search in the index")),
|
|
59
|
-
|
|
60
|
-
description="Optional
|
|
59
|
+
index_name=(Optional[str], Field(
|
|
60
|
+
description="Optional index name (max 7 characters). Leave empty to search across all datasets",
|
|
61
61
|
default="", max_length=7)),
|
|
62
62
|
filter=(Optional[dict], Field(
|
|
63
63
|
description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
|
|
@@ -87,7 +87,7 @@ BaseSearchParams = create_model(
|
|
|
87
87
|
BaseStepbackSearchParams = create_model(
|
|
88
88
|
"BaseStepbackSearchParams",
|
|
89
89
|
query=(str, Field(description="Query text to search in the index")),
|
|
90
|
-
|
|
90
|
+
index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
|
|
91
91
|
messages=(Optional[List], Field(description="Chat messages for stepback search context", default=[])),
|
|
92
92
|
filter=(Optional[dict], Field(
|
|
93
93
|
description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
|
|
@@ -324,12 +324,12 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
324
324
|
#
|
|
325
325
|
docs = base_chunker(file_content_generator=docs, config=base_chunking_config)
|
|
326
326
|
#
|
|
327
|
-
|
|
327
|
+
index_name = kwargs.get("index_name")
|
|
328
328
|
progress_step = kwargs.get("progress_step")
|
|
329
329
|
clean_index = kwargs.get("clean_index")
|
|
330
330
|
vs = self._init_vector_store()
|
|
331
331
|
#
|
|
332
|
-
return vs.index_documents(docs,
|
|
332
|
+
return vs.index_documents(docs, index_name=index_name, progress_step=progress_step, clean_index=clean_index)
|
|
333
333
|
|
|
334
334
|
def _process_documents(self, documents: List[Document]) -> Generator[Document, None, None]:
|
|
335
335
|
"""
|
|
@@ -399,10 +399,10 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
399
399
|
)
|
|
400
400
|
return self._vector_store
|
|
401
401
|
|
|
402
|
-
def remove_index(self,
|
|
402
|
+
def remove_index(self, index_name: str = ""):
|
|
403
403
|
"""Cleans the indexed data in the collection."""
|
|
404
|
-
self._init_vector_store()._clean_collection(
|
|
405
|
-
return (f"Collection '{
|
|
404
|
+
self._init_vector_store()._clean_collection(index_name=index_name)
|
|
405
|
+
return (f"Collection '{index_name}' has been removed from the vector store.\n"
|
|
406
406
|
f"Available collections: {self.list_collections()}")
|
|
407
407
|
|
|
408
408
|
def list_collections(self):
|
|
@@ -410,19 +410,19 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
410
410
|
vectorstore_wrapper = self._init_vector_store()
|
|
411
411
|
return vectorstore_wrapper.list_collections()
|
|
412
412
|
|
|
413
|
-
def _build_collection_filter(self, filter: dict | str,
|
|
413
|
+
def _build_collection_filter(self, filter: dict | str, index_name: str = "") -> dict:
|
|
414
414
|
"""Builds a filter for the collection based on the provided suffix."""
|
|
415
415
|
|
|
416
416
|
filter = filter if isinstance(filter, dict) else json.loads(filter)
|
|
417
|
-
if
|
|
417
|
+
if index_name:
|
|
418
418
|
filter.update({"collection": {
|
|
419
|
-
"$eq":
|
|
419
|
+
"$eq": index_name.strip()
|
|
420
420
|
}})
|
|
421
421
|
return filter
|
|
422
422
|
|
|
423
423
|
def search_index(self,
|
|
424
424
|
query: str,
|
|
425
|
-
|
|
425
|
+
index_name: str = "",
|
|
426
426
|
filter: dict | str = {}, cut_off: float = 0.5,
|
|
427
427
|
search_top: int = 10, reranker: dict = {},
|
|
428
428
|
full_text_search: Optional[Dict[str, Any]] = None,
|
|
@@ -431,7 +431,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
431
431
|
**kwargs):
|
|
432
432
|
""" Searches indexed documents in the vector store."""
|
|
433
433
|
vectorstore = self._init_vector_store()
|
|
434
|
-
filter = self._build_collection_filter(filter,
|
|
434
|
+
filter = self._build_collection_filter(filter, index_name)
|
|
435
435
|
found_docs = vectorstore.search_documents(
|
|
436
436
|
query,
|
|
437
437
|
doctype=self.doctype,
|
|
@@ -448,7 +448,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
448
448
|
def stepback_search_index(self,
|
|
449
449
|
query: str,
|
|
450
450
|
messages: List[Dict[str, Any]] = [],
|
|
451
|
-
|
|
451
|
+
index_name: str = "",
|
|
452
452
|
filter: dict | str = {}, cut_off: float = 0.5,
|
|
453
453
|
search_top: int = 10, reranker: dict = {},
|
|
454
454
|
full_text_search: Optional[Dict[str, Any]] = None,
|
|
@@ -457,7 +457,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
457
457
|
**kwargs):
|
|
458
458
|
""" Searches indexed documents in the vector store."""
|
|
459
459
|
|
|
460
|
-
filter = self._build_collection_filter(filter,
|
|
460
|
+
filter = self._build_collection_filter(filter, index_name)
|
|
461
461
|
vectorstore = self._init_vector_store()
|
|
462
462
|
found_docs = vectorstore.stepback_search(
|
|
463
463
|
query,
|
|
@@ -475,7 +475,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
475
475
|
def stepback_summary_index(self,
|
|
476
476
|
query: str,
|
|
477
477
|
messages: List[Dict[str, Any]] = [],
|
|
478
|
-
|
|
478
|
+
index_name: str = "",
|
|
479
479
|
filter: dict | str = {}, cut_off: float = 0.5,
|
|
480
480
|
search_top: int = 10, reranker: dict = {},
|
|
481
481
|
full_text_search: Optional[Dict[str, Any]] = None,
|
|
@@ -484,7 +484,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
484
484
|
**kwargs):
|
|
485
485
|
""" Generates a summary of indexed documents using stepback technique."""
|
|
486
486
|
vectorstore = self._init_vector_store()
|
|
487
|
-
filter = self._build_collection_filter(filter,
|
|
487
|
+
filter = self._build_collection_filter(filter, index_name)
|
|
488
488
|
|
|
489
489
|
found_docs = vectorstore.stepback_summary(
|
|
490
490
|
query,
|
|
@@ -655,7 +655,7 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
655
655
|
return parse_code_files_for_db(file_content_generator())
|
|
656
656
|
|
|
657
657
|
def index_data(self,
|
|
658
|
-
|
|
658
|
+
index_name: str,
|
|
659
659
|
branch: Optional[str] = None,
|
|
660
660
|
whitelist: Optional[List[str]] = None,
|
|
661
661
|
blacklist: Optional[List[str]] = None,
|
|
@@ -669,7 +669,7 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
669
669
|
)
|
|
670
670
|
vectorstore = self._init_vector_store()
|
|
671
671
|
clean_index = kwargs.get('clean_index', False)
|
|
672
|
-
return vectorstore.index_documents(documents,
|
|
672
|
+
return vectorstore.index_documents(documents, index_name=index_name,
|
|
673
673
|
clean_index=clean_index, is_code=True,
|
|
674
674
|
progress_step=kwargs.get('progress_step', 5))
|
|
675
675
|
|
|
@@ -53,6 +53,7 @@ class AlitaGitHubToolkit(BaseToolkit):
|
|
|
53
53
|
'metadata': {
|
|
54
54
|
"label": "GitHub",
|
|
55
55
|
"icon_url": None,
|
|
56
|
+
"max_length": AlitaGitHubToolkit.toolkit_max_length,
|
|
56
57
|
"categories": ["code repositories"],
|
|
57
58
|
"extra_categories": ["github", "git", "repository", "code", "version control"],
|
|
58
59
|
},
|
|
@@ -62,8 +63,7 @@ class AlitaGitHubToolkit(BaseToolkit):
|
|
|
62
63
|
json_schema_extra={'configuration_types': ['github']})),
|
|
63
64
|
pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector configuration", default=None,
|
|
64
65
|
json_schema_extra={'configuration_types': ['pgvector']})),
|
|
65
|
-
repository=(str, Field(description="Github repository",
|
|
66
|
-
'max_toolkit_length': AlitaGitHubToolkit.toolkit_max_length})),
|
|
66
|
+
repository=(str, Field(description="Github repository")),
|
|
67
67
|
active_branch=(Optional[str], Field(description="Active branch", default="main")),
|
|
68
68
|
base_branch=(Optional[str], Field(description="Github Base branch", default="main")),
|
|
69
69
|
# embedder settings
|
|
@@ -43,7 +43,7 @@ class AlitaGitlabToolkit(BaseToolkit):
|
|
|
43
43
|
AlitaGitlabToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
44
44
|
return create_model(
|
|
45
45
|
name,
|
|
46
|
-
repository=(str, Field(description="GitLab repository"
|
|
46
|
+
repository=(str, Field(description="GitLab repository")),
|
|
47
47
|
gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration", json_schema_extra={'configuration_types': ['gitlab']})),
|
|
48
48
|
branch=(str, Field(description="Main branch", default="main")),
|
|
49
49
|
# indexer settings
|
|
@@ -57,6 +57,7 @@ class AlitaGitlabToolkit(BaseToolkit):
|
|
|
57
57
|
'metadata': {
|
|
58
58
|
"label": "GitLab",
|
|
59
59
|
"icon_url": None,
|
|
60
|
+
"max_length": AlitaGitlabToolkit.toolkit_max_length,
|
|
60
61
|
"categories": ["code repositories"],
|
|
61
62
|
"extra_categories": ["gitlab", "git", "repository", "code", "version control"],
|
|
62
63
|
}
|
|
@@ -117,7 +117,11 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
|
|
|
117
117
|
|
|
118
118
|
@model_validator(mode='before')
|
|
119
119
|
@classmethod
|
|
120
|
-
def
|
|
120
|
+
def validate_toolkit_before(cls, values: Dict) -> Dict:
|
|
121
|
+
return super().validate_toolkit(values)
|
|
122
|
+
|
|
123
|
+
@model_validator(mode='after')
|
|
124
|
+
def validate_toolkit(self):
|
|
121
125
|
try:
|
|
122
126
|
import gitlab
|
|
123
127
|
except ImportError:
|
|
@@ -125,17 +129,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
|
|
|
125
129
|
"python-gitlab is not installed. "
|
|
126
130
|
"Please install it with `pip install python-gitlab`"
|
|
127
131
|
)
|
|
128
|
-
|
|
132
|
+
self.repository = self._sanitize_url(self.repository)
|
|
129
133
|
g = gitlab.Gitlab(
|
|
130
|
-
url=
|
|
131
|
-
private_token=
|
|
134
|
+
url=self._sanitize_url(self.url),
|
|
135
|
+
private_token=self.private_token.get_secret_value(),
|
|
132
136
|
keep_base_url=True,
|
|
133
137
|
)
|
|
134
138
|
|
|
135
139
|
g.auth()
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
return
|
|
140
|
+
self._git = g
|
|
141
|
+
self._active_branch = self.branch
|
|
142
|
+
return self
|
|
139
143
|
|
|
140
144
|
@property
|
|
141
145
|
def repo_instance(self):
|
|
@@ -30,8 +30,6 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
|
|
|
30
30
|
AlitaGitlabSpaceToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
31
31
|
return create_model(
|
|
32
32
|
name,
|
|
33
|
-
name=(str, Field(description="Toolkit name", json_schema_extra={'toolkit_name': True,
|
|
34
|
-
'max_toolkit_length': AlitaGitlabSpaceToolkit.toolkit_max_length})),
|
|
35
33
|
gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration",
|
|
36
34
|
json_schema_extra={
|
|
37
35
|
'configuration_types': ['gitlab']})),
|
|
@@ -46,6 +44,7 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
|
|
|
46
44
|
'metadata': {
|
|
47
45
|
"label": "GitLab Org",
|
|
48
46
|
"icon_url": None,
|
|
47
|
+
"max_length": AlitaGitlabSpaceToolkit.toolkit_max_length,
|
|
49
48
|
"categories": ["code repositories"],
|
|
50
49
|
"extra_categories": ["gitlab", "git", "repository", "code", "version control"],
|
|
51
50
|
}
|
|
@@ -30,7 +30,7 @@ class GooglePlacesToolkit(BaseToolkit):
|
|
|
30
30
|
GooglePlacesToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
31
31
|
return create_model(
|
|
32
32
|
name,
|
|
33
|
-
results_count=(Optional[int], Field(description="Results number to show", default=None
|
|
33
|
+
results_count=(Optional[int], Field(description="Results number to show", default=None)),
|
|
34
34
|
google_places_configuration=(GooglePlacesConfiguration, Field(description="Google Places Configuration", json_schema_extra={'configuration_types': ['google_places']})),
|
|
35
35
|
selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
|
|
36
36
|
__config__=ConfigDict(json_schema_extra=
|
|
@@ -38,6 +38,7 @@ class GooglePlacesToolkit(BaseToolkit):
|
|
|
38
38
|
'metadata':
|
|
39
39
|
{
|
|
40
40
|
"label": "Google Places", "icon_url": "gplaces-icon.svg",
|
|
41
|
+
"max_length": GooglePlacesToolkit.toolkit_max_length,
|
|
41
42
|
"categories": ["other"],
|
|
42
43
|
"extra_categories": ["google", "places", "maps", "location",
|
|
43
44
|
"geolocation"],
|
alita_sdk/tools/jira/__init__.py
CHANGED
|
@@ -89,6 +89,7 @@ class JiraToolkit(BaseToolkit):
|
|
|
89
89
|
'metadata': {
|
|
90
90
|
"label": "Jira",
|
|
91
91
|
"icon_url": "jira-icon.svg",
|
|
92
|
+
"max_length": JiraToolkit.toolkit_max_length,
|
|
92
93
|
"categories": ["project management"],
|
|
93
94
|
"extra_categories": ["jira", "atlassian", "issue tracking", "project management", "task management"],
|
|
94
95
|
}
|
|
@@ -563,7 +563,7 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
|
|
|
563
563
|
Use the appropriate issue link type (e.g., "Test", "Relates", "Blocks").
|
|
564
564
|
If we use "Test" linktype, the test is inward issue, the story/other issue is outward issue.."""
|
|
565
565
|
|
|
566
|
-
comment = "
|
|
566
|
+
comment = f"Issue {inward_issue_key} was linked to {outward_issue_key}."
|
|
567
567
|
comment_body = {"content": [{"content": [{"text": comment,"type": "text"}],"type": "paragraph"}],"type": "doc","version": 1} if self.api_version == "3" else comment
|
|
568
568
|
link_data = {
|
|
569
569
|
"type": {"name": f"{linktype}"},
|
|
@@ -61,7 +61,7 @@ class MemoryToolkit(BaseToolkit):
|
|
|
61
61
|
|
|
62
62
|
return create_model(
|
|
63
63
|
'memory',
|
|
64
|
-
namespace=(str, Field(description="Memory namespace"
|
|
64
|
+
namespace=(str, Field(description="Memory namespace")),
|
|
65
65
|
pgvector_configuration=(PgVectorConfiguration, Field(description="PgVector Configuration",
|
|
66
66
|
json_schema_extra={
|
|
67
67
|
'configuration_types': ['pgvector']})),
|