alita-sdk 0.3.376__py3-none-any.whl → 0.3.435__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/configurations/bitbucket.py +95 -0
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/client.py +9 -4
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +8 -0
- alita_sdk/runtime/langchain/assistant.py +41 -38
- alita_sdk/runtime/langchain/constants.py +5 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
- alita_sdk/runtime/langchain/langraph_agent.py +91 -27
- alita_sdk/runtime/langchain/utils.py +24 -4
- alita_sdk/runtime/models/mcp_models.py +57 -0
- alita_sdk/runtime/toolkits/__init__.py +24 -0
- alita_sdk/runtime/toolkits/application.py +8 -1
- alita_sdk/runtime/toolkits/mcp.py +787 -0
- alita_sdk/runtime/toolkits/tools.py +98 -50
- alita_sdk/runtime/tools/__init__.py +7 -2
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/function.py +20 -28
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +104 -8
- alita_sdk/runtime/tools/llm.py +146 -114
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +79 -10
- alita_sdk/runtime/tools/sandbox.py +166 -63
- alita_sdk/runtime/tools/vectorstore.py +3 -2
- alita_sdk/runtime/tools/vectorstore_base.py +4 -3
- alita_sdk/runtime/utils/streamlit.py +34 -3
- alita_sdk/runtime/utils/toolkit_utils.py +5 -2
- alita_sdk/runtime/utils/utils.py +1 -0
- alita_sdk/tools/__init__.py +48 -31
- alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
- alita_sdk/tools/base_indexer_toolkit.py +75 -66
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/code_indexer_toolkit.py +13 -3
- alita_sdk/tools/confluence/api_wrapper.py +29 -7
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/elitea_base.py +7 -7
- alita_sdk/tools/gitlab/api_wrapper.py +11 -7
- alita_sdk/tools/jira/api_wrapper.py +1 -1
- alita_sdk/tools/openapi/__init__.py +10 -1
- alita_sdk/tools/qtest/api_wrapper.py +522 -74
- alita_sdk/tools/sharepoint/api_wrapper.py +104 -33
- alita_sdk/tools/sharepoint/authorization_helper.py +175 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/utils/content_parser.py +27 -16
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +19 -6
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/RECORD +60 -55
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/top_level.txt +0 -0
|
@@ -8,6 +8,7 @@ from office365.runtime.auth.client_credential import ClientCredential
|
|
|
8
8
|
from office365.sharepoint.client_context import ClientContext
|
|
9
9
|
from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
|
|
10
10
|
|
|
11
|
+
from .utils import decode_sharepoint_string
|
|
11
12
|
from ..non_code_indexer_toolkit import NonCodeIndexerToolkit
|
|
12
13
|
from ..utils.content_parser import parse_file_content
|
|
13
14
|
from ...runtime.utils.utils import IndexerKeywords
|
|
@@ -91,44 +92,85 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
|
|
|
91
92
|
target_list = self._client.web.lists.get_by_title(list_title)
|
|
92
93
|
self._client.load(target_list)
|
|
93
94
|
self._client.execute_query()
|
|
94
|
-
items = target_list.items.
|
|
95
|
-
logging.info("{0} items from sharepoint loaded successfully.".format(len(items)))
|
|
95
|
+
items = target_list.items.top(limit).get().execute_query()
|
|
96
|
+
logging.info("{0} items from sharepoint loaded successfully via SharePoint REST API.".format(len(items)))
|
|
96
97
|
result = []
|
|
97
98
|
for item in items:
|
|
98
99
|
result.append(item.properties)
|
|
99
100
|
return result
|
|
100
|
-
except Exception as
|
|
101
|
-
logging.
|
|
102
|
-
|
|
101
|
+
except Exception as base_e:
|
|
102
|
+
logging.warning(f"Primary SharePoint REST list read failed: {base_e}. Attempting Graph API fallback.")
|
|
103
|
+
# Attempt Graph API fallback
|
|
104
|
+
try:
|
|
105
|
+
from .authorization_helper import SharepointAuthorizationHelper
|
|
106
|
+
auth_helper = SharepointAuthorizationHelper(
|
|
107
|
+
client_id=self.client_id,
|
|
108
|
+
client_secret=self.client_secret.get_secret_value() if self.client_secret else None,
|
|
109
|
+
tenant="", # optional for graph api (derived inside helper)
|
|
110
|
+
scope="", # optional for graph api
|
|
111
|
+
token_json="", # not needed for client credentials flow here
|
|
112
|
+
)
|
|
113
|
+
graph_items = auth_helper.get_list_items(self.site_url, list_title, limit)
|
|
114
|
+
if graph_items:
|
|
115
|
+
logging.info(f"{len(graph_items)} items from sharepoint loaded successfully via Graph API fallback.")
|
|
116
|
+
return graph_items
|
|
117
|
+
else:
|
|
118
|
+
return ToolException("List appears empty or inaccessible via both REST and Graph APIs.")
|
|
119
|
+
except Exception as graph_e:
|
|
120
|
+
logging.error(f"Graph API fallback failed: {graph_e}")
|
|
121
|
+
return ToolException(f"Cannot read list '{list_title}'. Check list name and permissions: {base_e} | {graph_e}")
|
|
103
122
|
|
|
104
123
|
|
|
105
124
|
def get_files_list(self, folder_name: str = None, limit_files: int = 100):
|
|
106
125
|
""" If folder name is specified, lists all files in this folder under Shared Documents path. If folder name is empty, lists all files under root catalog (Shared Documents). Number of files is limited by limit_files (default is 100)."""
|
|
107
126
|
try:
|
|
127
|
+
# exclude default system libraries like 'Form Templates', 'Site Assets', 'Style Library'
|
|
128
|
+
all_libraries = self._client.web.lists.filter("BaseTemplate eq 101 and Title ne 'Form Templates' and Title ne 'Site Assets' and Title ne 'Style Library'").get().execute_query()
|
|
108
129
|
result = []
|
|
109
130
|
if not limit_files:
|
|
110
131
|
limit_files = 100
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
132
|
+
#
|
|
133
|
+
for lib in all_libraries:
|
|
134
|
+
library_type = decode_sharepoint_string(lib.properties["EntityTypeName"])
|
|
135
|
+
target_folder_url = f"{library_type}/{folder_name}" if folder_name else library_type
|
|
136
|
+
files = (self._client.web.get_folder_by_server_relative_path(target_folder_url)
|
|
137
|
+
.get_files(True)
|
|
138
|
+
.execute_query())
|
|
139
|
+
#
|
|
140
|
+
for file in files:
|
|
141
|
+
if f"{library_type}/Forms" in file.properties['ServerRelativeUrl']:
|
|
142
|
+
# skip files from system folder "Forms"
|
|
143
|
+
continue
|
|
144
|
+
if len(result) >= limit_files:
|
|
145
|
+
break
|
|
146
|
+
temp_props = {
|
|
147
|
+
'Name': file.properties['Name'],
|
|
148
|
+
'Path': file.properties['ServerRelativeUrl'],
|
|
149
|
+
'Created': file.properties['TimeCreated'],
|
|
150
|
+
'Modified': file.properties['TimeLastModified'],
|
|
151
|
+
'Link': file.properties['LinkingUrl'],
|
|
152
|
+
'id': file.properties['UniqueId']
|
|
153
|
+
}
|
|
154
|
+
result.append(temp_props)
|
|
128
155
|
return result if result else ToolException("Can not get files or folder is empty. Please, double check folder name and read permissions.")
|
|
129
156
|
except Exception as e:
|
|
130
|
-
|
|
131
|
-
|
|
157
|
+
# attempt to get via graph api
|
|
158
|
+
try:
|
|
159
|
+
# attempt to get files via graph api
|
|
160
|
+
from .authorization_helper import SharepointAuthorizationHelper
|
|
161
|
+
auth_helper = SharepointAuthorizationHelper(
|
|
162
|
+
client_id=self.client_id,
|
|
163
|
+
client_secret=self.client_secret.get_secret_value(),
|
|
164
|
+
tenant="", # optional for graph api
|
|
165
|
+
scope="", # optional for graph api
|
|
166
|
+
token_json="", # optional for graph api
|
|
167
|
+
)
|
|
168
|
+
files = auth_helper.get_files_list(self.site_url, folder_name, limit_files)
|
|
169
|
+
return files
|
|
170
|
+
except Exception as graph_e:
|
|
171
|
+
logging.error(f"Failed to load files from sharepoint via base api: {e}")
|
|
172
|
+
logging.error(f"Failed to load files from sharepoint via graph api: {graph_e}")
|
|
173
|
+
return ToolException(f"Can not get files. Please, double check folder name and read permissions: {e} and {graph_e}")
|
|
132
174
|
|
|
133
175
|
def read_file(self, path,
|
|
134
176
|
is_capture_image: bool = False,
|
|
@@ -141,11 +183,28 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
|
|
|
141
183
|
self._client.load(file).execute_query()
|
|
142
184
|
|
|
143
185
|
file_content = file.read()
|
|
186
|
+
file_name = file.name
|
|
144
187
|
self._client.execute_query()
|
|
145
188
|
except Exception as e:
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
189
|
+
# attempt to get via graph api
|
|
190
|
+
try:
|
|
191
|
+
# attempt to get files via graph api
|
|
192
|
+
from .authorization_helper import SharepointAuthorizationHelper
|
|
193
|
+
auth_helper = SharepointAuthorizationHelper(
|
|
194
|
+
client_id=self.client_id,
|
|
195
|
+
client_secret=self.client_secret.get_secret_value(),
|
|
196
|
+
tenant="", # optional for graph api
|
|
197
|
+
scope="", # optional for graph api
|
|
198
|
+
token_json="", # optional for graph api
|
|
199
|
+
)
|
|
200
|
+
file_content = auth_helper.get_file_content(self.site_url, path)
|
|
201
|
+
file_name = path.split('/')[-1]
|
|
202
|
+
except Exception as graph_e:
|
|
203
|
+
logging.error(f"Failed to load file from SharePoint via base api: {e}. Path: {path}. Please, double check file name and path.")
|
|
204
|
+
logging.error(f"Failed to load file from SharePoint via graph api: {graph_e}. Path: {path}. Please, double check file name and path.")
|
|
205
|
+
return ToolException(f"File not found. Please, check file name and path: {e} and {graph_e}")
|
|
206
|
+
#
|
|
207
|
+
return parse_file_content(file_name=file_name,
|
|
149
208
|
file_content=file_content,
|
|
150
209
|
is_capture_image=is_capture_image,
|
|
151
210
|
page_number=page_number,
|
|
@@ -219,12 +278,24 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
|
|
|
219
278
|
yield document
|
|
220
279
|
|
|
221
280
|
def _load_file_content_in_bytes(self, path):
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
281
|
+
try:
|
|
282
|
+
file = self._client.web.get_file_by_server_relative_path(path)
|
|
283
|
+
self._client.load(file).execute_query()
|
|
284
|
+
file_content = file.read()
|
|
285
|
+
self._client.execute_query()
|
|
286
|
+
#
|
|
287
|
+
return file_content
|
|
288
|
+
except Exception as e:
|
|
289
|
+
# attempt to get via graph api
|
|
290
|
+
from .authorization_helper import SharepointAuthorizationHelper
|
|
291
|
+
auth_helper = SharepointAuthorizationHelper(
|
|
292
|
+
client_id=self.client_id,
|
|
293
|
+
client_secret=self.client_secret.get_secret_value(),
|
|
294
|
+
tenant="", # optional for graph api
|
|
295
|
+
scope="", # optional for graph api
|
|
296
|
+
token_json="", # optional for graph api
|
|
297
|
+
)
|
|
298
|
+
return auth_helper.get_file_content(self.site_url, path)
|
|
228
299
|
|
|
229
300
|
def get_available_tools(self):
|
|
230
301
|
return super().get_available_tools() + [
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
from datetime import datetime, timezone
|
|
2
|
+
from urllib.parse import unquote, urlparse, quote
|
|
2
3
|
|
|
3
4
|
import jwt
|
|
4
5
|
import requests
|
|
6
|
+
from botocore.response import get_response
|
|
7
|
+
|
|
5
8
|
|
|
6
9
|
class SharepointAuthorizationHelper:
|
|
7
10
|
|
|
@@ -54,4 +57,175 @@ class SharepointAuthorizationHelper:
|
|
|
54
57
|
except jwt.ExpiredSignatureError:
|
|
55
58
|
return False
|
|
56
59
|
except jwt.InvalidTokenError:
|
|
57
|
-
return False
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
def _validate_response(self, response, required_field, error_prefix=None):
|
|
63
|
+
if response.status_code != 200:
|
|
64
|
+
raise RuntimeError(f"{error_prefix or 'Request'} failed: {response.status_code} {response.text}")
|
|
65
|
+
json_data = response.json()
|
|
66
|
+
if required_field not in json_data:
|
|
67
|
+
raise KeyError(f"'{required_field}' missing in response")
|
|
68
|
+
return json_data[required_field]
|
|
69
|
+
|
|
70
|
+
def generate_token_and_site_id(self, site_url: str) -> tuple[str, str]:
|
|
71
|
+
try:
|
|
72
|
+
parsed = urlparse(site_url)
|
|
73
|
+
domain = parsed.hostname
|
|
74
|
+
site_path = parsed.path.strip('/')
|
|
75
|
+
if not domain or not site_path:
|
|
76
|
+
raise ValueError(f"site_url missing domain or site path: {site_url}")
|
|
77
|
+
app_name = domain.split('.')[0]
|
|
78
|
+
openid_config_url = f"https://login.microsoftonline.com/{app_name}.onmicrosoft.com/v2.0/.well-known/openid-configuration"
|
|
79
|
+
response = requests.get(openid_config_url)
|
|
80
|
+
token_url = self._validate_response(response, required_field="token_endpoint", error_prefix="OpenID config")
|
|
81
|
+
token_data = {
|
|
82
|
+
"grant_type": "client_credentials",
|
|
83
|
+
"client_id": self.client_id,
|
|
84
|
+
"client_secret": self.client_secret,
|
|
85
|
+
"scope": "https://graph.microsoft.com/.default"
|
|
86
|
+
}
|
|
87
|
+
token_response = requests.post(token_url, data=token_data)
|
|
88
|
+
access_token = self._validate_response(token_response, required_field="access_token", error_prefix="Token request")
|
|
89
|
+
graph_site_url = f"https://graph.microsoft.com/v1.0/sites/{domain}:/{site_path}"
|
|
90
|
+
headers = {"Authorization": f"Bearer {access_token}"}
|
|
91
|
+
site_response = requests.get(graph_site_url, headers=headers)
|
|
92
|
+
site_id = self._validate_response(site_response, required_field="id", error_prefix="Site info")
|
|
93
|
+
return access_token, site_id
|
|
94
|
+
except Exception as e:
|
|
95
|
+
raise RuntimeError(f"Error while obtaining access_token and site_id: {e}")
|
|
96
|
+
|
|
97
|
+
def get_files_list(self, site_url: str, folder_name: str = None, limit_files: int = 100):
|
|
98
|
+
if not site_url or not site_url.startswith("https://"):
|
|
99
|
+
raise ValueError(f"Invalid site_url format: {site_url}")
|
|
100
|
+
if limit_files is not None and (not isinstance(limit_files, int) or limit_files <= 0):
|
|
101
|
+
raise ValueError(f"limit_files must be a positive integer, got: {limit_files}")
|
|
102
|
+
try:
|
|
103
|
+
access_token, site_id = self.generate_token_and_site_id(site_url)
|
|
104
|
+
headers = {"Authorization": f"Bearer {access_token}"}
|
|
105
|
+
drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
|
|
106
|
+
drives_response = requests.get(drives_url, headers=headers)
|
|
107
|
+
drives = self._validate_response(drives_response, required_field="value", error_prefix="Drives request")
|
|
108
|
+
result = []
|
|
109
|
+
def _recurse_drive(drive_id, drive_path, parent_folder, limit_files):
|
|
110
|
+
# Escape folder_name for URL safety if present
|
|
111
|
+
if parent_folder:
|
|
112
|
+
safe_folder_name = quote(parent_folder.strip('/'), safe="/")
|
|
113
|
+
url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root:/{safe_folder_name}:/children?$top={limit_files}"
|
|
114
|
+
else:
|
|
115
|
+
url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root/children?$top={limit_files}"
|
|
116
|
+
response = requests.get(url, headers=headers)
|
|
117
|
+
if response.status_code != 200:
|
|
118
|
+
return []
|
|
119
|
+
files_json = response.json()
|
|
120
|
+
if "value" not in files_json:
|
|
121
|
+
return []
|
|
122
|
+
files = []
|
|
123
|
+
for file in files_json["value"]:
|
|
124
|
+
file_name = file.get('name', '')
|
|
125
|
+
# Build full path reflecting nested folders
|
|
126
|
+
if parent_folder:
|
|
127
|
+
full_path = '/' + '/'.join([drive_path.strip('/'), parent_folder.strip('/'), file_name.strip('/')])
|
|
128
|
+
else:
|
|
129
|
+
full_path = '/' + '/'.join([drive_path.strip('/'), file_name.strip('/')])
|
|
130
|
+
temp_props = {
|
|
131
|
+
'Name': file_name,
|
|
132
|
+
'Path': full_path,
|
|
133
|
+
'Created': file.get('createdDateTime'),
|
|
134
|
+
'Modified': file.get('lastModifiedDateTime'),
|
|
135
|
+
'Link': file.get('webUrl'),
|
|
136
|
+
'id': file.get('id')
|
|
137
|
+
}
|
|
138
|
+
if not all([temp_props['Name'], temp_props['Path'], temp_props['id']]):
|
|
139
|
+
continue # skip files with missing required fields
|
|
140
|
+
if 'folder' in file:
|
|
141
|
+
# Recursively extract files from this folder
|
|
142
|
+
inner_folder = parent_folder + '/' + file_name if parent_folder else file_name
|
|
143
|
+
inner_files = _recurse_drive(drive_id, drive_path, inner_folder, limit_files)
|
|
144
|
+
files.extend(inner_files)
|
|
145
|
+
else:
|
|
146
|
+
files.append(temp_props)
|
|
147
|
+
if limit_files is not None and len(result) + len(files) >= limit_files:
|
|
148
|
+
return files[:limit_files - len(result)]
|
|
149
|
+
return files
|
|
150
|
+
for drive in drives:
|
|
151
|
+
drive_id = drive.get("id")
|
|
152
|
+
drive_path = unquote(urlparse(drive.get("webUrl")).path) if drive.get("webUrl") else ""
|
|
153
|
+
if not drive_id:
|
|
154
|
+
continue # skip drives without id
|
|
155
|
+
files = _recurse_drive(drive_id, drive_path, folder_name, limit_files)
|
|
156
|
+
result.extend(files)
|
|
157
|
+
if limit_files is not None and len(result) >= limit_files:
|
|
158
|
+
return result[:limit_files]
|
|
159
|
+
return result
|
|
160
|
+
except Exception as e:
|
|
161
|
+
raise RuntimeError(f"Error in get_files_list: {e}")
|
|
162
|
+
|
|
163
|
+
def get_file_content(self, site_url: str, path: str):
|
|
164
|
+
try:
|
|
165
|
+
access_token, site_id = self.generate_token_and_site_id(site_url)
|
|
166
|
+
headers = {"Authorization": f"Bearer {access_token}"}
|
|
167
|
+
drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
|
|
168
|
+
drives_response = requests.get(drives_url, headers=headers)
|
|
169
|
+
drives = self._validate_response(drives_response, required_field="value", error_prefix="Drives request")
|
|
170
|
+
path = path.strip('/')
|
|
171
|
+
#
|
|
172
|
+
for drive in drives:
|
|
173
|
+
drive_path = unquote(urlparse(drive.get("webUrl")).path).strip('/')
|
|
174
|
+
if not drive_path or not path.startswith(drive_path):
|
|
175
|
+
continue
|
|
176
|
+
drive_id = drive.get("id")
|
|
177
|
+
if not drive_id:
|
|
178
|
+
continue
|
|
179
|
+
path = path.replace(drive_path, '').strip('/')
|
|
180
|
+
safe_path = quote(path, safe="")
|
|
181
|
+
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{safe_path}:/content"
|
|
182
|
+
response = requests.get(url, headers=headers)
|
|
183
|
+
if response.status_code == 200:
|
|
184
|
+
return response.content
|
|
185
|
+
raise RuntimeError(f"File '{path}' not found in any private or shared documents.")
|
|
186
|
+
except Exception as e:
|
|
187
|
+
raise RuntimeError(f"Error in get_file_content: {e}")
|
|
188
|
+
|
|
189
|
+
def get_list_items(self, site_url: str, list_title: str, limit: int = 1000):
|
|
190
|
+
"""Fallback Graph API method to read SharePoint list items by list title.
|
|
191
|
+
|
|
192
|
+
Returns a list of dictionaries representing list item fields.
|
|
193
|
+
"""
|
|
194
|
+
if not site_url or not site_url.startswith("https://"):
|
|
195
|
+
raise ValueError(f"Invalid site_url format: {site_url}")
|
|
196
|
+
try:
|
|
197
|
+
access_token, site_id = self.generate_token_and_site_id(site_url)
|
|
198
|
+
headers = {"Authorization": f"Bearer {access_token}"}
|
|
199
|
+
lists_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists"
|
|
200
|
+
response = requests.get(lists_url, headers=headers)
|
|
201
|
+
if response.status_code != 200:
|
|
202
|
+
raise RuntimeError(f"Lists request failed: {response.status_code} {response.text}")
|
|
203
|
+
lists_json = response.json()
|
|
204
|
+
lists = lists_json.get("value", [])
|
|
205
|
+
target_list = None
|
|
206
|
+
normalized_title = list_title.strip().lower()
|
|
207
|
+
for lst in lists:
|
|
208
|
+
# displayName is the user-visible title. name can differ (internal name)
|
|
209
|
+
display_name = (lst.get("displayName") or lst.get("name") or '').strip().lower()
|
|
210
|
+
if display_name == normalized_title:
|
|
211
|
+
target_list = lst
|
|
212
|
+
break
|
|
213
|
+
if not target_list:
|
|
214
|
+
raise RuntimeError(f"List '{list_title}' not found via Graph API.")
|
|
215
|
+
list_id = target_list.get('id')
|
|
216
|
+
if not list_id:
|
|
217
|
+
raise RuntimeError(f"List '{list_title}' missing id field.")
|
|
218
|
+
items_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items?expand=fields&$top={limit}"
|
|
219
|
+
items_response = requests.get(items_url, headers=headers)
|
|
220
|
+
if items_response.status_code != 200:
|
|
221
|
+
raise RuntimeError(f"List items request failed: {items_response.status_code} {items_response.text}")
|
|
222
|
+
items_json = items_response.json()
|
|
223
|
+
values = items_json.get('value', [])
|
|
224
|
+
result = []
|
|
225
|
+
for item in values:
|
|
226
|
+
fields = item.get('fields', {})
|
|
227
|
+
if fields:
|
|
228
|
+
result.append(fields)
|
|
229
|
+
return result
|
|
230
|
+
except Exception as e:
|
|
231
|
+
raise RuntimeError(f"Error in get_list_items: {e}")
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
|
|
1
|
+
import re
|
|
2
2
|
from io import BytesIO
|
|
3
|
+
from docx import Document
|
|
4
|
+
|
|
3
5
|
|
|
4
6
|
def read_docx_from_bytes(file_content):
|
|
5
7
|
"""Read and return content from a .docx file using a byte stream."""
|
|
@@ -11,4 +13,8 @@ def read_docx_from_bytes(file_content):
|
|
|
11
13
|
return '\n'.join(text)
|
|
12
14
|
except Exception as e:
|
|
13
15
|
print(f"Error reading .docx from bytes: {e}")
|
|
14
|
-
return ""
|
|
16
|
+
return ""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def decode_sharepoint_string(s):
|
|
20
|
+
return re.sub(r'_x([0-9A-Fa-f]{4})_', lambda m: chr(int(m.group(1), 16)), s)
|
|
@@ -92,21 +92,24 @@ def parse_file_content(file_name=None, file_content=None, is_capture_image: bool
|
|
|
92
92
|
return ToolException(
|
|
93
93
|
"Not supported type of files entered. Supported types are TXT, DOCX, PDF, PPTX, XLSX and XLS only.")
|
|
94
94
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
extension = Path(file_path if file_path else file_name).suffix
|
|
99
|
-
loader_kwargs = get_loader_kwargs(loaders_map.get(extension), file_name, file_content, is_capture_image, page_number, sheet_name, llm, file_path, excel_by_sheets)
|
|
100
|
-
if file_content:
|
|
101
|
-
return load_content_from_bytes(file_content=file_content,
|
|
102
|
-
extension=extension,
|
|
103
|
-
loader_extra_config=loader_kwargs,
|
|
104
|
-
llm=llm)
|
|
95
|
+
try:
|
|
96
|
+
if hasattr(loader, 'get_content'):
|
|
97
|
+
return loader.get_content()
|
|
105
98
|
else:
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
99
|
+
extension = Path(file_path if file_path else file_name).suffix
|
|
100
|
+
loader_kwargs = get_loader_kwargs(loaders_map.get(extension), file_name, file_content, is_capture_image, page_number, sheet_name, llm, file_path, excel_by_sheets)
|
|
101
|
+
if file_content:
|
|
102
|
+
return load_content_from_bytes(file_content=file_content,
|
|
103
|
+
extension=extension,
|
|
104
|
+
loader_extra_config=loader_kwargs,
|
|
105
|
+
llm=llm)
|
|
106
|
+
else:
|
|
107
|
+
return load_content(file_path=file_path,
|
|
108
|
+
extension=extension,
|
|
109
|
+
loader_extra_config=loader_kwargs,
|
|
110
|
+
llm=llm)
|
|
111
|
+
except Exception as e:
|
|
112
|
+
return ToolException(f"Error reading file ({file_name or file_path}) content. Make sure these types are supported: {str(e)}")
|
|
110
113
|
|
|
111
114
|
def load_file_docs(file_name=None, file_content=None, is_capture_image: bool = False, page_number: int = None,
|
|
112
115
|
sheet_name: str = None, llm=None, file_path: str = None, excel_by_sheets: bool = False) -> List[Document] | ToolException:
|
|
@@ -153,7 +156,7 @@ def prepare_loader(file_name=None, file_content=None, is_capture_image: bool = F
|
|
|
153
156
|
|
|
154
157
|
loader_object = loaders_map.get(extension)
|
|
155
158
|
if not loader_object:
|
|
156
|
-
|
|
159
|
+
loader_object = loaders_map.get('.txt') # Default to text loader if no specific loader found
|
|
157
160
|
loader_kwargs = get_loader_kwargs(loader_object, file_name, file_content, is_capture_image, page_number, sheet_name, llm, file_path, excel_by_sheets, prompt)
|
|
158
161
|
loader = loader_object['class'](**loader_kwargs)
|
|
159
162
|
return loader
|
|
@@ -222,10 +225,18 @@ def process_document_by_type(content, extension_source: str, document: Document
|
|
|
222
225
|
metadata={**document.metadata, 'chunk_id': 1}
|
|
223
226
|
)
|
|
224
227
|
return
|
|
228
|
+
#
|
|
229
|
+
chunks_counter = 0
|
|
225
230
|
for chunk in chunks:
|
|
231
|
+
chunks_counter += 1
|
|
232
|
+
metadata = {**document.metadata, **chunk.metadata}
|
|
233
|
+
#
|
|
234
|
+
# ensure each chunk has a unique chunk_id
|
|
235
|
+
metadata['chunk_id'] = chunks_counter
|
|
236
|
+
#
|
|
226
237
|
yield Document(
|
|
227
238
|
page_content=sanitize_for_postgres(chunk.page_content),
|
|
228
|
-
metadata=
|
|
239
|
+
metadata=metadata
|
|
229
240
|
)
|
|
230
241
|
|
|
231
242
|
|
|
@@ -109,7 +109,7 @@ class PGVectorAdapter(VectorStoreAdapter):
|
|
|
109
109
|
def get_indexed_ids(self, vectorstore_wrapper, index_name: Optional[str] = '') -> List[str]:
|
|
110
110
|
"""Get all indexed document IDs from PGVector"""
|
|
111
111
|
from sqlalchemy.orm import Session
|
|
112
|
-
from sqlalchemy import func
|
|
112
|
+
from sqlalchemy import func, or_
|
|
113
113
|
|
|
114
114
|
store = vectorstore_wrapper.vectorstore
|
|
115
115
|
try:
|
|
@@ -119,7 +119,12 @@ class PGVectorAdapter(VectorStoreAdapter):
|
|
|
119
119
|
# Apply filter only if index_name is provided
|
|
120
120
|
if index_name:
|
|
121
121
|
query = query.filter(
|
|
122
|
-
func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
|
|
122
|
+
func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name,
|
|
123
|
+
or_(
|
|
124
|
+
func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type').is_(None),
|
|
125
|
+
func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata,
|
|
126
|
+
'type') != IndexerKeywords.INDEX_META_TYPE.value
|
|
127
|
+
)
|
|
123
128
|
)
|
|
124
129
|
ids = query.all()
|
|
125
130
|
return [str(id_tuple[0]) for id_tuple in ids]
|
|
@@ -131,7 +136,15 @@ class PGVectorAdapter(VectorStoreAdapter):
|
|
|
131
136
|
"""Clean the vectorstore collection by deleting all indexed data."""
|
|
132
137
|
# This logic deletes all data from the vectorstore collection without removal of collection.
|
|
133
138
|
# Collection itself remains available for future indexing.
|
|
134
|
-
|
|
139
|
+
from sqlalchemy.orm import Session
|
|
140
|
+
from sqlalchemy import func
|
|
141
|
+
|
|
142
|
+
store = vectorstore_wrapper.vectorstore
|
|
143
|
+
with Session(store.session_maker.bind) as session:
|
|
144
|
+
session.query(store.EmbeddingStore).filter(
|
|
145
|
+
func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
|
|
146
|
+
).delete(synchronize_session=False)
|
|
147
|
+
session.commit()
|
|
135
148
|
|
|
136
149
|
def is_vectorstore_type(self, vectorstore) -> bool:
|
|
137
150
|
"""Check if the vectorstore is a PGVector store."""
|
|
@@ -145,7 +158,7 @@ class PGVectorAdapter(VectorStoreAdapter):
|
|
|
145
158
|
|
|
146
159
|
result = {}
|
|
147
160
|
try:
|
|
148
|
-
vectorstore_wrapper.
|
|
161
|
+
vectorstore_wrapper._log_tool_event("Retrieving already indexed data from PGVector vectorstore",
|
|
149
162
|
tool_name="get_indexed_data")
|
|
150
163
|
store = vectorstore_wrapper.vectorstore
|
|
151
164
|
with Session(store.session_maker.bind) as session:
|
|
@@ -194,7 +207,7 @@ class PGVectorAdapter(VectorStoreAdapter):
|
|
|
194
207
|
|
|
195
208
|
result = {}
|
|
196
209
|
try:
|
|
197
|
-
vectorstore_wrapper.
|
|
210
|
+
vectorstore_wrapper._log_tool_event(message="Retrieving already indexed code data from PGVector vectorstore",
|
|
198
211
|
tool_name="index_code_data")
|
|
199
212
|
store = vectorstore_wrapper.vectorstore
|
|
200
213
|
with (Session(store.session_maker.bind) as session):
|
|
@@ -307,7 +320,7 @@ class ChromaAdapter(VectorStoreAdapter):
|
|
|
307
320
|
|
|
308
321
|
def list_collections(self, vectorstore_wrapper) -> str:
|
|
309
322
|
vector_client = vectorstore_wrapper.vectorstore._client
|
|
310
|
-
return ','.join([collection.name for collection in vector_client.
|
|
323
|
+
return ','.join([collection.name for collection in vector_client.list_collections()])
|
|
311
324
|
|
|
312
325
|
def remove_collection(self, vectorstore_wrapper, collection_name: str):
|
|
313
326
|
vectorstore_wrapper.vectorstore.delete_collection()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: alita_sdk
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.435
|
|
4
4
|
Summary: SDK for building langchain agents using resources from Alita
|
|
5
5
|
Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
|
|
6
6
|
License-Expression: Apache-2.0
|