PyPI - alita-sdk - Versions diffs - 0.3.405__py3-none-any.whl → 0.3.407__py3-none-any.whl - Mend

alita-sdk 0.3.405py3-none-any.whl → 0.3.407py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (7) hide show

alita_sdk/tools/sharepoint/api_wrapper.py CHANGED Viewed

@@ -156,11 +156,28 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
             self._client.load(file).execute_query()
             file_content = file.read()
+            file_name = file.name
             self._client.execute_query()
         except Exception as e:
-            logging.error(f"Failed to load file from SharePoint: {e}. Path: {path}. Please, double check file name and path.")
-            return ToolException("File not found. Please, check file name and path.")
-        return parse_file_content(file_name=file.name,
+            # attempt to get via graph api
+            try:
+                # attempt to get files via graph api
+                from .authorization_helper import SharepointAuthorizationHelper
+                auth_helper = SharepointAuthorizationHelper(
+                    client_id=self.client_id,
+                    client_secret=self.client_secret.get_secret_value(),
+                    tenant="",  # optional for graph api
+                    scope="",  # optional for graph api
+                    token_json="",  # optional for graph api
+                )
+                file_content = auth_helper.get_file_content(self.site_url, path)
+                file_name = path.split('/')[-1]
+            except Exception as graph_e:
+                logging.error(f"Failed to load file from SharePoint via base api: {e}. Path: {path}. Please, double check file name and path.")
+                logging.error(f"Failed to load file from SharePoint via graph api: {graph_e}. Path: {path}. Please, double check file name and path.")
+                return ToolException(f"File not found. Please, check file name and path: {e} and {graph_e}")
+        #
+        return parse_file_content(file_name=file_name,
                                   file_content=file_content,
                                   is_capture_image=is_capture_image,
                                   page_number=page_number,

alita_sdk/tools/sharepoint/authorization_helper.py CHANGED Viewed

@@ -1,8 +1,10 @@
 from datetime import datetime, timezone
-from urllib.parse import urlparse
+from urllib.parse import unquote, urlparse, quote
 import jwt
 import requests
+from botocore.response import get_response
 class SharepointAuthorizationHelper:
@@ -57,6 +59,13 @@ class SharepointAuthorizationHelper:
         except jwt.InvalidTokenError:
             return False
+    def _validate_response(self, response, required_field, error_prefix=None):
+        if response.status_code != 200:
+            raise RuntimeError(f"{error_prefix or 'Request'} failed: {response.status_code} {response.text}")
+        json_data = response.json()
+        if required_field not in json_data:
+            raise KeyError(f"'{required_field}' missing in response")
+        return json_data[required_field]
     def generate_token_and_site_id(self, site_url: str) -> tuple[str, str]:
         try:
@@ -65,16 +74,10 @@ class SharepointAuthorizationHelper:
             site_path = parsed.path.strip('/')
             if not domain or not site_path:
                 raise ValueError(f"site_url missing domain or site path: {site_url}")
-            #
             app_name = domain.split('.')[0]
             openid_config_url = f"https://login.microsoftonline.com/{app_name}.onmicrosoft.com/v2.0/.well-known/openid-configuration"
             response = requests.get(openid_config_url)
-            if response.status_code != 200:
-                raise RuntimeError(f"Failed to get OpenID config: {response.status_code} {response.text}")
-            token_url = response.json().get("token_endpoint")
-            if not token_url:
-                raise KeyError("'token_endpoint' missing in OpenID config response")
-            #
+            token_url = self._validate_response(response, required_field="token_endpoint", error_prefix="OpenID config")
             token_data = {
                 "grant_type": "client_credentials",
                 "client_id": self.client_id,
@@ -82,21 +85,11 @@ class SharepointAuthorizationHelper:
                 "scope": "https://graph.microsoft.com/.default"
             }
             token_response = requests.post(token_url, data=token_data)
-            if token_response.status_code != 200:
-                raise RuntimeError(f"Failed to get access token: {token_response.status_code} {token_response.text}")
-            access_token = token_response.json().get("access_token")
-            if not access_token:
-                raise KeyError("'access_token' missing in token response")
-            #
+            access_token = self._validate_response(token_response, required_field="access_token", error_prefix="Token request")
             graph_site_url = f"https://graph.microsoft.com/v1.0/sites/{domain}:/{site_path}"
             headers = {"Authorization": f"Bearer {access_token}"}
             site_response = requests.get(graph_site_url, headers=headers)
-            if site_response.status_code != 200:
-                raise RuntimeError(f"Failed to get site info: {site_response.status_code} {site_response.text}")
-            site_id = site_response.json().get("id")
-            if not site_id:
-                raise KeyError("'id' missing in site response")
-            #
+            site_id = self._validate_response(site_response, required_field="id", error_prefix="Site info")
             return access_token, site_id
         except Exception as e:
             raise RuntimeError(f"Error while obtaining access_token and site_id: {e}")
@@ -111,46 +104,84 @@ class SharepointAuthorizationHelper:
             headers = {"Authorization": f"Bearer {access_token}"}
             drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
             drives_response = requests.get(drives_url, headers=headers)
-            if drives_response.status_code != 200:
-                raise RuntimeError(f"Failed to get drives: {drives_response.status_code} {drives_response.text}")
-            drives_json = drives_response.json()
-            if "value" not in drives_json or not drives_json["value"]:
-                raise KeyError("'value' missing or empty in drives response")
-            drive_id = drives_json["value"][0].get("id")
-            if not drive_id:
-                raise KeyError("'id' missing in drive object")
-            #
-            # Build the correct endpoint for folder or root
-            if folder_name:
-                # Validate folder_name for safe URL usage
-                if any(c in folder_name for c in ['..', '//', '\\']):
-                    raise ValueError(f"Unsafe folder_name: {folder_name}")
-                url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root:/{folder_name}:/children?$top={limit_files}"
-            else:
-                url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root/children?$top={limit_files}"
-            response = requests.get(url, headers=headers)
-            if response.status_code != 200:
-                raise RuntimeError(f"Failed to get files list: {response.status_code} {response.text}")
-            files_json = response.json()
-            if "value" not in files_json:
-                raise KeyError("'value' missing in files response")
-            #
+            drives = self._validate_response(drives_response, required_field="value", error_prefix="Drives request")
             result = []
-            for file in files_json["value"]:
-                temp_props = {
-                    'Name': file.get('name'),
-                    'Path': file.get('webUrl'),
-                    'Created': file.get('createdDateTime'),
-                    'Modified': file.get('lastModifiedDateTime'),
-                    'Link': file.get('webUrl'),
-                    'id': file.get('id')
-                }
-                if not all([temp_props['Name'], temp_props['Path'], temp_props['id']]):
-                    raise KeyError(f"Missing required file fields in: {file}")
-                result.append(temp_props)
-            # If API doesn't respect $top, slice in Python
-            if limit_files is not None:
-                result = result[:limit_files]
+            def _recurse_drive(drive_id, drive_path, parent_folder, limit_files):
+                # Escape folder_name for URL safety if present
+                if parent_folder:
+                    safe_folder_name = quote(parent_folder.strip('/'), safe="/")
+                    url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root:/{safe_folder_name}:/children?$top={limit_files}"
+                else:
+                    url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root/children?$top={limit_files}"
+                response = requests.get(url, headers=headers)
+                if response.status_code != 200:
+                    return []
+                files_json = response.json()
+                if "value" not in files_json:
+                    return []
+                files = []
+                for file in files_json["value"]:
+                    file_name = file.get('name', '')
+                    # Build full path reflecting nested folders
+                    if parent_folder:
+                        full_path = '/' + '/'.join([drive_path.strip('/'), parent_folder.strip('/'), file_name.strip('/')])
+                    else:
+                        full_path = '/' + '/'.join([drive_path.strip('/'), file_name.strip('/')])
+                    temp_props = {
+                        'Name': file_name,
+                        'Path': full_path,
+                        'Created': file.get('createdDateTime'),
+                        'Modified': file.get('lastModifiedDateTime'),
+                        'Link': file.get('webUrl'),
+                        'id': file.get('id')
+                    }
+                    if not all([temp_props['Name'], temp_props['Path'], temp_props['id']]):
+                        continue  # skip files with missing required fields
+                    if 'folder' in file:
+                        # Recursively extract files from this folder
+                        inner_folder = parent_folder + '/' + file_name if parent_folder else file_name
+                        inner_files = _recurse_drive(drive_id, drive_path, inner_folder, limit_files)
+                        files.extend(inner_files)
+                    else:
+                        files.append(temp_props)
+                    if limit_files is not None and len(result) + len(files) >= limit_files:
+                        return files[:limit_files - len(result)]
+                return files
+            for drive in drives:
+                drive_id = drive.get("id")
+                drive_path = unquote(urlparse(drive.get("webUrl")).path) if drive.get("webUrl") else ""
+                if not drive_id:
+                    continue  # skip drives without id
+                files = _recurse_drive(drive_id, drive_path, folder_name, limit_files)
+                result.extend(files)
+                if limit_files is not None and len(result) >= limit_files:
+                    return result[:limit_files]
             return result
         except Exception as e:
             raise RuntimeError(f"Error in get_files_list: {e}")
+    def get_file_content(self, site_url: str, path: str):
+        try:
+            access_token, site_id = self.generate_token_and_site_id(site_url)
+            headers = {"Authorization": f"Bearer {access_token}"}
+            drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
+            drives_response = requests.get(drives_url, headers=headers)
+            drives = self._validate_response(drives_response, required_field="value", error_prefix="Drives request")
+            path = path.strip('/')
+            #
+            for drive in drives:
+                drive_path = unquote(urlparse(drive.get("webUrl")).path).strip('/')
+                if not drive_path or not path.startswith(drive_path):
+                    continue
+                drive_id = drive.get("id")
+                if not drive_id:
+                    continue
+                path = path.replace(drive_path, '').strip('/')
+                safe_path = quote(path, safe="")
+                url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{safe_path}:/content"
+                response = requests.get(url, headers=headers)
+                if response.status_code == 200:
+                    return response.content
+            raise RuntimeError(f"File '{path}' not found in any private or shared documents.")
+        except Exception as e:
+            raise RuntimeError(f"Error in get_file_content: {e}")

{alita_sdk-0.3.405.dist-info → alita_sdk-0.3.407.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.405
+Version: 0.3.407
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0

{alita_sdk-0.3.405.dist-info → alita_sdk-0.3.407.dist-info}/RECORD RENAMED Viewed

@@ -317,8 +317,8 @@ alita_sdk/tools/servicenow/__init__.py,sha256=ziEt2juPrGFyB98ZXbGf25v6gZo4UJTHsz
 alita_sdk/tools/servicenow/api_wrapper.py,sha256=WpH-bBLGFdhehs4g-K-WAkNuaD1CSrwsDpdgB3RG53s,6120
 alita_sdk/tools/servicenow/servicenow_client.py,sha256=Rdqfu-ll-qbnclMzChLZBsfXRDzgoX_FdeI2WLApWxc,3269
 alita_sdk/tools/sharepoint/__init__.py,sha256=5z2iSmm-0kbHKf70wN6OOgS4Px7tOzwkIpHXz0Vrbj4,4045
-alita_sdk/tools/sharepoint/api_wrapper.py,sha256=xUQOlJBHha0bmzkz00GVgzUnAsE28saGkRYHqXTj7Ac,13105
-alita_sdk/tools/sharepoint/authorization_helper.py,sha256=WfkSZh28gfB2aOlWk1T4mHjc0PBW5SPwLzVVTPr_dkM,7476
+alita_sdk/tools/sharepoint/api_wrapper.py,sha256=d8B0I4C9x8qt1dvmLjsfYnas98T_gXueJAWrNykZP0U,14075
+alita_sdk/tools/sharepoint/authorization_helper.py,sha256=QvxWFBjYZfhI1h_KkSrDbRh8D5BlFX8xWDLmlIoO4mo,9569
 alita_sdk/tools/sharepoint/utils.py,sha256=fZ1YzAu5CTjKSZeslowpOPH974902S8vCp1Wu7L44LM,446
 alita_sdk/tools/slack/__init__.py,sha256=YiPAoRc6y6uVpfHl0K1Qi-flcyPlWFIMVcVbhicGWXY,3990
 alita_sdk/tools/slack/api_wrapper.py,sha256=5VrV7iSGno8ZcDzEHdGPNhInhtODGPPvAzoZ9W9iQWE,14009
@@ -353,8 +353,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
 alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
 alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
 alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
-alita_sdk-0.3.405.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-alita_sdk-0.3.405.dist-info/METADATA,sha256=zNo9z-qhfD29QynTObDSSd-1sgmJKEdTLqdM_NdmCCI,19071
-alita_sdk-0.3.405.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-alita_sdk-0.3.405.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
-alita_sdk-0.3.405.dist-info/RECORD,,
+alita_sdk-0.3.407.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+alita_sdk-0.3.407.dist-info/METADATA,sha256=5zQGxSdLNttnUzPpD95nDrhddTdPZmHyoLAtHQJJ0mo,19071
+alita_sdk-0.3.407.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+alita_sdk-0.3.407.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
+alita_sdk-0.3.407.dist-info/RECORD,,

{alita_sdk-0.3.405.dist-info → alita_sdk-0.3.407.dist-info}/WHEEL RENAMED Viewed

File without changes

{alita_sdk-0.3.405.dist-info → alita_sdk-0.3.407.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{alita_sdk-0.3.405.dist-info → alita_sdk-0.3.407.dist-info}/top_level.txt RENAMED Viewed

File without changes

alita-sdk 0.3.405__py3-none-any.whl → 0.3.407__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.405py3-none-any.whl → 0.3.407py3-none-any.whl