alita-sdk 0.3.405__py3-none-any.whl → 0.3.407__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

@@ -156,11 +156,28 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
156
156
  self._client.load(file).execute_query()
157
157
 
158
158
  file_content = file.read()
159
+ file_name = file.name
159
160
  self._client.execute_query()
160
161
  except Exception as e:
161
- logging.error(f"Failed to load file from SharePoint: {e}. Path: {path}. Please, double check file name and path.")
162
- return ToolException("File not found. Please, check file name and path.")
163
- return parse_file_content(file_name=file.name,
162
+ # attempt to get via graph api
163
+ try:
164
+ # attempt to get files via graph api
165
+ from .authorization_helper import SharepointAuthorizationHelper
166
+ auth_helper = SharepointAuthorizationHelper(
167
+ client_id=self.client_id,
168
+ client_secret=self.client_secret.get_secret_value(),
169
+ tenant="", # optional for graph api
170
+ scope="", # optional for graph api
171
+ token_json="", # optional for graph api
172
+ )
173
+ file_content = auth_helper.get_file_content(self.site_url, path)
174
+ file_name = path.split('/')[-1]
175
+ except Exception as graph_e:
176
+ logging.error(f"Failed to load file from SharePoint via base api: {e}. Path: {path}. Please, double check file name and path.")
177
+ logging.error(f"Failed to load file from SharePoint via graph api: {graph_e}. Path: {path}. Please, double check file name and path.")
178
+ return ToolException(f"File not found. Please, check file name and path: {e} and {graph_e}")
179
+ #
180
+ return parse_file_content(file_name=file_name,
164
181
  file_content=file_content,
165
182
  is_capture_image=is_capture_image,
166
183
  page_number=page_number,
@@ -1,8 +1,10 @@
1
1
  from datetime import datetime, timezone
2
- from urllib.parse import urlparse
2
+ from urllib.parse import unquote, urlparse, quote
3
3
 
4
4
  import jwt
5
5
  import requests
6
+ from botocore.response import get_response
7
+
6
8
 
7
9
  class SharepointAuthorizationHelper:
8
10
 
@@ -57,6 +59,13 @@ class SharepointAuthorizationHelper:
57
59
  except jwt.InvalidTokenError:
58
60
  return False
59
61
 
62
+ def _validate_response(self, response, required_field, error_prefix=None):
63
+ if response.status_code != 200:
64
+ raise RuntimeError(f"{error_prefix or 'Request'} failed: {response.status_code} {response.text}")
65
+ json_data = response.json()
66
+ if required_field not in json_data:
67
+ raise KeyError(f"'{required_field}' missing in response")
68
+ return json_data[required_field]
60
69
 
61
70
  def generate_token_and_site_id(self, site_url: str) -> tuple[str, str]:
62
71
  try:
@@ -65,16 +74,10 @@ class SharepointAuthorizationHelper:
65
74
  site_path = parsed.path.strip('/')
66
75
  if not domain or not site_path:
67
76
  raise ValueError(f"site_url missing domain or site path: {site_url}")
68
- #
69
77
  app_name = domain.split('.')[0]
70
78
  openid_config_url = f"https://login.microsoftonline.com/{app_name}.onmicrosoft.com/v2.0/.well-known/openid-configuration"
71
79
  response = requests.get(openid_config_url)
72
- if response.status_code != 200:
73
- raise RuntimeError(f"Failed to get OpenID config: {response.status_code} {response.text}")
74
- token_url = response.json().get("token_endpoint")
75
- if not token_url:
76
- raise KeyError("'token_endpoint' missing in OpenID config response")
77
- #
80
+ token_url = self._validate_response(response, required_field="token_endpoint", error_prefix="OpenID config")
78
81
  token_data = {
79
82
  "grant_type": "client_credentials",
80
83
  "client_id": self.client_id,
@@ -82,21 +85,11 @@ class SharepointAuthorizationHelper:
82
85
  "scope": "https://graph.microsoft.com/.default"
83
86
  }
84
87
  token_response = requests.post(token_url, data=token_data)
85
- if token_response.status_code != 200:
86
- raise RuntimeError(f"Failed to get access token: {token_response.status_code} {token_response.text}")
87
- access_token = token_response.json().get("access_token")
88
- if not access_token:
89
- raise KeyError("'access_token' missing in token response")
90
- #
88
+ access_token = self._validate_response(token_response, required_field="access_token", error_prefix="Token request")
91
89
  graph_site_url = f"https://graph.microsoft.com/v1.0/sites/{domain}:/{site_path}"
92
90
  headers = {"Authorization": f"Bearer {access_token}"}
93
91
  site_response = requests.get(graph_site_url, headers=headers)
94
- if site_response.status_code != 200:
95
- raise RuntimeError(f"Failed to get site info: {site_response.status_code} {site_response.text}")
96
- site_id = site_response.json().get("id")
97
- if not site_id:
98
- raise KeyError("'id' missing in site response")
99
- #
92
+ site_id = self._validate_response(site_response, required_field="id", error_prefix="Site info")
100
93
  return access_token, site_id
101
94
  except Exception as e:
102
95
  raise RuntimeError(f"Error while obtaining access_token and site_id: {e}")
@@ -111,46 +104,84 @@ class SharepointAuthorizationHelper:
111
104
  headers = {"Authorization": f"Bearer {access_token}"}
112
105
  drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
113
106
  drives_response = requests.get(drives_url, headers=headers)
114
- if drives_response.status_code != 200:
115
- raise RuntimeError(f"Failed to get drives: {drives_response.status_code} {drives_response.text}")
116
- drives_json = drives_response.json()
117
- if "value" not in drives_json or not drives_json["value"]:
118
- raise KeyError("'value' missing or empty in drives response")
119
- drive_id = drives_json["value"][0].get("id")
120
- if not drive_id:
121
- raise KeyError("'id' missing in drive object")
122
- #
123
- # Build the correct endpoint for folder or root
124
- if folder_name:
125
- # Validate folder_name for safe URL usage
126
- if any(c in folder_name for c in ['..', '//', '\\']):
127
- raise ValueError(f"Unsafe folder_name: {folder_name}")
128
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root:/{folder_name}:/children?$top={limit_files}"
129
- else:
130
- url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root/children?$top={limit_files}"
131
- response = requests.get(url, headers=headers)
132
- if response.status_code != 200:
133
- raise RuntimeError(f"Failed to get files list: {response.status_code} {response.text}")
134
- files_json = response.json()
135
- if "value" not in files_json:
136
- raise KeyError("'value' missing in files response")
137
- #
107
+ drives = self._validate_response(drives_response, required_field="value", error_prefix="Drives request")
138
108
  result = []
139
- for file in files_json["value"]:
140
- temp_props = {
141
- 'Name': file.get('name'),
142
- 'Path': file.get('webUrl'),
143
- 'Created': file.get('createdDateTime'),
144
- 'Modified': file.get('lastModifiedDateTime'),
145
- 'Link': file.get('webUrl'),
146
- 'id': file.get('id')
147
- }
148
- if not all([temp_props['Name'], temp_props['Path'], temp_props['id']]):
149
- raise KeyError(f"Missing required file fields in: {file}")
150
- result.append(temp_props)
151
- # If API doesn't respect $top, slice in Python
152
- if limit_files is not None:
153
- result = result[:limit_files]
109
+ def _recurse_drive(drive_id, drive_path, parent_folder, limit_files):
110
+ # Escape folder_name for URL safety if present
111
+ if parent_folder:
112
+ safe_folder_name = quote(parent_folder.strip('/'), safe="/")
113
+ url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root:/{safe_folder_name}:/children?$top={limit_files}"
114
+ else:
115
+ url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root/children?$top={limit_files}"
116
+ response = requests.get(url, headers=headers)
117
+ if response.status_code != 200:
118
+ return []
119
+ files_json = response.json()
120
+ if "value" not in files_json:
121
+ return []
122
+ files = []
123
+ for file in files_json["value"]:
124
+ file_name = file.get('name', '')
125
+ # Build full path reflecting nested folders
126
+ if parent_folder:
127
+ full_path = '/' + '/'.join([drive_path.strip('/'), parent_folder.strip('/'), file_name.strip('/')])
128
+ else:
129
+ full_path = '/' + '/'.join([drive_path.strip('/'), file_name.strip('/')])
130
+ temp_props = {
131
+ 'Name': file_name,
132
+ 'Path': full_path,
133
+ 'Created': file.get('createdDateTime'),
134
+ 'Modified': file.get('lastModifiedDateTime'),
135
+ 'Link': file.get('webUrl'),
136
+ 'id': file.get('id')
137
+ }
138
+ if not all([temp_props['Name'], temp_props['Path'], temp_props['id']]):
139
+ continue # skip files with missing required fields
140
+ if 'folder' in file:
141
+ # Recursively extract files from this folder
142
+ inner_folder = parent_folder + '/' + file_name if parent_folder else file_name
143
+ inner_files = _recurse_drive(drive_id, drive_path, inner_folder, limit_files)
144
+ files.extend(inner_files)
145
+ else:
146
+ files.append(temp_props)
147
+ if limit_files is not None and len(result) + len(files) >= limit_files:
148
+ return files[:limit_files - len(result)]
149
+ return files
150
+ for drive in drives:
151
+ drive_id = drive.get("id")
152
+ drive_path = unquote(urlparse(drive.get("webUrl")).path) if drive.get("webUrl") else ""
153
+ if not drive_id:
154
+ continue # skip drives without id
155
+ files = _recurse_drive(drive_id, drive_path, folder_name, limit_files)
156
+ result.extend(files)
157
+ if limit_files is not None and len(result) >= limit_files:
158
+ return result[:limit_files]
154
159
  return result
155
160
  except Exception as e:
156
161
  raise RuntimeError(f"Error in get_files_list: {e}")
162
+
163
+ def get_file_content(self, site_url: str, path: str):
164
+ try:
165
+ access_token, site_id = self.generate_token_and_site_id(site_url)
166
+ headers = {"Authorization": f"Bearer {access_token}"}
167
+ drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
168
+ drives_response = requests.get(drives_url, headers=headers)
169
+ drives = self._validate_response(drives_response, required_field="value", error_prefix="Drives request")
170
+ path = path.strip('/')
171
+ #
172
+ for drive in drives:
173
+ drive_path = unquote(urlparse(drive.get("webUrl")).path).strip('/')
174
+ if not drive_path or not path.startswith(drive_path):
175
+ continue
176
+ drive_id = drive.get("id")
177
+ if not drive_id:
178
+ continue
179
+ path = path.replace(drive_path, '').strip('/')
180
+ safe_path = quote(path, safe="")
181
+ url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{safe_path}:/content"
182
+ response = requests.get(url, headers=headers)
183
+ if response.status_code == 200:
184
+ return response.content
185
+ raise RuntimeError(f"File '{path}' not found in any private or shared documents.")
186
+ except Exception as e:
187
+ raise RuntimeError(f"Error in get_file_content: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.405
3
+ Version: 0.3.407
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -317,8 +317,8 @@ alita_sdk/tools/servicenow/__init__.py,sha256=ziEt2juPrGFyB98ZXbGf25v6gZo4UJTHsz
317
317
  alita_sdk/tools/servicenow/api_wrapper.py,sha256=WpH-bBLGFdhehs4g-K-WAkNuaD1CSrwsDpdgB3RG53s,6120
318
318
  alita_sdk/tools/servicenow/servicenow_client.py,sha256=Rdqfu-ll-qbnclMzChLZBsfXRDzgoX_FdeI2WLApWxc,3269
319
319
  alita_sdk/tools/sharepoint/__init__.py,sha256=5z2iSmm-0kbHKf70wN6OOgS4Px7tOzwkIpHXz0Vrbj4,4045
320
- alita_sdk/tools/sharepoint/api_wrapper.py,sha256=xUQOlJBHha0bmzkz00GVgzUnAsE28saGkRYHqXTj7Ac,13105
321
- alita_sdk/tools/sharepoint/authorization_helper.py,sha256=WfkSZh28gfB2aOlWk1T4mHjc0PBW5SPwLzVVTPr_dkM,7476
320
+ alita_sdk/tools/sharepoint/api_wrapper.py,sha256=d8B0I4C9x8qt1dvmLjsfYnas98T_gXueJAWrNykZP0U,14075
321
+ alita_sdk/tools/sharepoint/authorization_helper.py,sha256=QvxWFBjYZfhI1h_KkSrDbRh8D5BlFX8xWDLmlIoO4mo,9569
322
322
  alita_sdk/tools/sharepoint/utils.py,sha256=fZ1YzAu5CTjKSZeslowpOPH974902S8vCp1Wu7L44LM,446
323
323
  alita_sdk/tools/slack/__init__.py,sha256=YiPAoRc6y6uVpfHl0K1Qi-flcyPlWFIMVcVbhicGWXY,3990
324
324
  alita_sdk/tools/slack/api_wrapper.py,sha256=5VrV7iSGno8ZcDzEHdGPNhInhtODGPPvAzoZ9W9iQWE,14009
@@ -353,8 +353,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
353
353
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
354
354
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
355
355
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
356
- alita_sdk-0.3.405.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
357
- alita_sdk-0.3.405.dist-info/METADATA,sha256=zNo9z-qhfD29QynTObDSSd-1sgmJKEdTLqdM_NdmCCI,19071
358
- alita_sdk-0.3.405.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
359
- alita_sdk-0.3.405.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
360
- alita_sdk-0.3.405.dist-info/RECORD,,
356
+ alita_sdk-0.3.407.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
357
+ alita_sdk-0.3.407.dist-info/METADATA,sha256=5zQGxSdLNttnUzPpD95nDrhddTdPZmHyoLAtHQJJ0mo,19071
358
+ alita_sdk-0.3.407.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
359
+ alita_sdk-0.3.407.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
360
+ alita_sdk-0.3.407.dist-info/RECORD,,