PyPI - alita-sdk - Versions diffs - 0.3.375__py3-none-any.whl → 0.3.417__py3-none-any.whl - Mend

alita-sdk 0.3.375py3-none-any.whl → 0.3.417py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

alita_sdk/configurations/bitbucket.py +95 -0
alita_sdk/configurations/confluence.py +96 -1
alita_sdk/configurations/gitlab.py +79 -0
alita_sdk/configurations/jira.py +103 -0
alita_sdk/configurations/testrail.py +88 -0
alita_sdk/configurations/xray.py +93 -0
alita_sdk/configurations/zephyr_enterprise.py +93 -0
alita_sdk/configurations/zephyr_essential.py +75 -0
alita_sdk/runtime/clients/client.py +3 -2
alita_sdk/runtime/langchain/assistant.py +56 -40
alita_sdk/runtime/langchain/constants.py +2 -0
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
alita_sdk/runtime/langchain/langraph_agent.py +52 -27
alita_sdk/runtime/langchain/utils.py +15 -4
alita_sdk/runtime/toolkits/application.py +8 -1
alita_sdk/runtime/toolkits/tools.py +79 -49
alita_sdk/runtime/tools/__init__.py +7 -2
alita_sdk/runtime/tools/application.py +7 -0
alita_sdk/runtime/tools/function.py +28 -23
alita_sdk/runtime/tools/graph.py +10 -4
alita_sdk/runtime/tools/image_generation.py +104 -8
alita_sdk/runtime/tools/llm.py +142 -114
alita_sdk/runtime/tools/sandbox.py +166 -63
alita_sdk/runtime/tools/vectorstore.py +2 -1
alita_sdk/runtime/tools/vectorstore_base.py +2 -1
alita_sdk/runtime/utils/utils.py +1 -0
alita_sdk/tools/__init__.py +43 -31
alita_sdk/tools/base_indexer_toolkit.py +54 -60
alita_sdk/tools/code_indexer_toolkit.py +13 -3
alita_sdk/tools/confluence/api_wrapper.py +29 -7
alita_sdk/tools/confluence/loader.py +10 -0
alita_sdk/tools/elitea_base.py +1 -1
alita_sdk/tools/gitlab/api_wrapper.py +8 -9
alita_sdk/tools/jira/api_wrapper.py +1 -1
alita_sdk/tools/qtest/api_wrapper.py +7 -10
alita_sdk/tools/sharepoint/api_wrapper.py +81 -28
alita_sdk/tools/sharepoint/authorization_helper.py +131 -1
alita_sdk/tools/sharepoint/utils.py +8 -2
alita_sdk/tools/utils/content_parser.py +27 -16
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +18 -5
{alita_sdk-0.3.375.dist-info → alita_sdk-0.3.417.dist-info}/METADATA +1 -1
{alita_sdk-0.3.375.dist-info → alita_sdk-0.3.417.dist-info}/RECORD +47 -47
{alita_sdk-0.3.375.dist-info → alita_sdk-0.3.417.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.375.dist-info → alita_sdk-0.3.417.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.375.dist-info → alita_sdk-0.3.417.dist-info}/top_level.txt +0 -0

alita_sdk/tools/base_indexer_toolkit.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import Any, Optional, List, Dict, Generator
 from langchain_core.documents import Document
 from pydantic import create_model, Field, SecretStr
-from .utils import make_json_serializable
 from .utils.content_parser import file_extension_by_chunker, process_document_by_type
 from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
 from ..runtime.langchain.document_loaders.constants import loaders_allowed_to_override
@@ -111,7 +110,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def __init__(self, **kwargs):
         conn = kwargs.get('connection_string', None)
         connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
-        collection_name = kwargs.get('collection_name')
+        collection_name = kwargs.get('collection_schema')
         if 'vectorstore_type' not in kwargs:
             kwargs['vectorstore_type'] = 'PGVector'
@@ -152,39 +151,45 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def index_data(self, **kwargs):
         index_name = kwargs.get("index_name")
-        progress_step = kwargs.get("progress_step")
         clean_index = kwargs.get("clean_index")
         chunking_tool = kwargs.get("chunking_tool")
         chunking_config = kwargs.get("chunking_config")
+        result = {"count": 0}
         #
-        if clean_index:
-            self._clean_index(index_name)
-        #
-        self.index_meta_init(index_name, kwargs)
-        #
-        self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
-        self._log_tool_event(f"Loading the documents to index...{kwargs}")
-        documents = self._base_loader(**kwargs)
-        documents = list(documents) # consume/exhaust generator to count items
-        documents_count = len(documents)
-        documents = (doc for doc in documents)
-        self._log_tool_event(f"Base documents were pre-loaded. "
-                             f"Search for possible document duplicates and remove them from the indexing list...")
-        documents = self._reduce_duplicates(documents, index_name)
-        self._log_tool_event(f"Duplicates were removed. "
-                             f"Processing documents to collect dependencies and prepare them for indexing...")
-        result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, progress_step=progress_step)
-        #
-        self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, result)
-        #
-        return {"status": "ok", "message": f"successfully indexed {result} documents"}
-    def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, index_name: Optional[str] = None, progress_step: int = 20):
+        try:
+            if clean_index:
+                self._clean_index(index_name)
+            #
+            self.index_meta_init(index_name, kwargs)
+            #
+            self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
+            self._log_tool_event(f"Loading the documents to index...{kwargs}")
+            documents = self._base_loader(**kwargs)
+            documents = list(documents) # consume/exhaust generator to count items
+            documents_count = len(documents)
+            documents = (doc for doc in documents)
+            self._log_tool_event(f"Base documents were pre-loaded. "
+                                 f"Search for possible document duplicates and remove them from the indexing list...")
+            documents = self._reduce_duplicates(documents, index_name)
+            self._log_tool_event(f"Duplicates were removed. "
+                                 f"Processing documents to collect dependencies and prepare them for indexing...")
+            self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
+            #
+            results_count = result["count"]
+            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
+            #
+            return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
+            else "no new documents to index"}
+        except Exception as e:
+            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
+            raise e
+    def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, result, index_name: Optional[str] = None):
         self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
         from ..runtime.langchain.interfaces.llm_processor import add_documents
         #
         base_doc_counter = 0
-        total_counter = 0
         pg_vector_add_docs_chunk = []
         for base_doc in base_documents:
             base_doc_counter += 1
@@ -232,10 +237,9 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             msg = f"Indexed base document #{base_doc_counter} out of {base_total} (with {dependent_docs_counter} dependencies)."
             logger.debug(msg)
             self._log_tool_event(msg)
-            total_counter += dependent_docs_counter
+            result["count"] += dependent_docs_counter
         if pg_vector_add_docs_chunk:
             add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
-        return total_counter
     def _apply_loaders_chunkers(self, documents: Generator[Document, None, None], chunking_tool: str=None, chunking_config=None) -> Generator[Document, None, None]:
         from ..tools.chunkers import __all__ as chunkers
@@ -454,37 +458,27 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         )
     def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
-        index_meta_raw = super().get_index_meta(index_name)
-        from ..runtime.langchain.interfaces.llm_processor import add_documents
-        created_on = time.time()
-        metadata = {
-            "collection": index_name,
-            "type": IndexerKeywords.INDEX_META_TYPE.value,
-            "indexed": 0,
-            "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
-            "index_configuration": index_configuration,
-            "created_on": created_on,
-            "updated_on": created_on,
-            "history": "[]",
-        }
-        index_meta_ids = None
-        #
-        if index_meta_raw:
-            history_raw = index_meta_raw.get("metadata", {}).get("history", "[]")
-            if isinstance(history_raw, str) and history_raw.strip():
-                try:
-                    history = json.loads(history_raw)
-                except (json.JSONDecodeError, TypeError):
-                    history = []
-            else:
-                history = []
-            new_history_item = {k: v for k, v in index_meta_raw.get("metadata", {}).items() if k != "history"}
-            history.append(new_history_item)
-            metadata["history"] = json.dumps(history)
-            index_meta_ids = [index_meta_raw.get("id")]
-        #
-        index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
-        add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
+        index_meta = super().get_index_meta(index_name)
+        if not index_meta:
+            self._log_tool_event(
+                f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
+                tool_name="index_data"
+            )
+            from ..runtime.langchain.interfaces.llm_processor import add_documents
+            created_on = time.time()
+            metadata = {
+                "collection": index_name,
+                "type": IndexerKeywords.INDEX_META_TYPE.value,
+                "indexed": 0,
+                "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
+                "index_configuration": index_configuration,
+                "created_on": created_on,
+                "updated_on": created_on,
+                "history": "[]",
+                "task_id": None,
+            }
+            index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
+            add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
     def index_meta_update(self, index_name: str, state: str, result: int):
         index_meta_raw = super().get_index_meta(index_name)

alita_sdk/tools/code_indexer_toolkit.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import ast
 import fnmatch
+import json
 import logging
 from typing import Optional, List, Generator
@@ -21,7 +22,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
         return self.vector_adapter.get_code_indexed_data(self, index_name)
     def key_fn(self, document: Document):
-        return document.metadata.get('id')
+        return document.metadata.get("filename")
     def compare_fn(self, document: Document, idx_data):
         return (document.metadata.get('commit_hash') and
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
         )
     def _extend_data(self, documents: Generator[Document, None, None]):
-        yield from parse_code_files_for_db(documents)
+        yield from documents
     def _index_tool_params(self):
         """Return the parameters for indexing data."""
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
                     if not file_content:
                         # empty file, skip
                         continue
+                    #
+                    # ensure file content is a string
+                    if isinstance(file_content, bytes):
+                        file_content = file_content.decode("utf-8", errors="ignore")
+                    elif isinstance(file_content, dict) and file.endswith('.json'):
+                        file_content = json.dumps(file_content)
+                    elif not isinstance(file_content, str):
+                        file_content = str(file_content)
+                    #
                     # hash the file content to ensure uniqueness
                     import hashlib
                     file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
                     self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
             self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
-        return file_content_generator()
+        return parse_code_files_for_db(file_content_generator())
     def __handle_get_files(self, path: str, branch: str):
         """

alita_sdk/tools/confluence/api_wrapper.py CHANGED Viewed

@@ -7,12 +7,14 @@ from json import JSONDecodeError
 from typing import Optional, List, Any, Dict, Callable, Generator, Literal
 import requests
+from atlassian.errors import ApiError
 from langchain_community.document_loaders.confluence import ContentFormat
 from langchain_core.documents import Document
 from langchain_core.messages import HumanMessage
 from langchain_core.tools import ToolException
 from markdownify import markdownify
 from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
+from requests import HTTPError
 from tenacity import retry, stop_after_attempt, wait_exponential, before_sleep_log
 from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
@@ -194,6 +196,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
     keep_markdown_format: Optional[bool] = True
     ocr_languages: Optional[str] = None
     keep_newlines: Optional[bool] = True
+    _errors: Optional[list[str]] = None
     _image_cache: ImageDescriptionCache = PrivateAttr(default_factory=ImageDescriptionCache)
     @model_validator(mode='before')
@@ -498,7 +501,9 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
         restrictions = self.client.get_all_restrictions_for_content(page["id"])
         return (
-                page["status"] == "current"
+                (page["status"] == "current"
+                # allow user to see archived content if needed
+                 or page["status"] == "archived")
                 and not restrictions["read"]["restrictions"]["user"]["results"]
                 and not restrictions["read"]["restrictions"]["group"]["results"]
         )
@@ -518,18 +523,35 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
                 ),
                 before_sleep=before_sleep_log(logger, logging.WARNING),
             )(self.client.get_page_by_id)
-            page = get_page(
-                page_id=page_id, expand=f"{self.content_format.value},version"
-            )
-            if not self.include_restricted_content and not self.is_public_page(page):
-                continue
+            try:
+                page = get_page(
+                    page_id=page_id, expand=f"{self.content_format.value},version"
+                )
+            except (ApiError, HTTPError) as e:
+                logger.error(f"Error fetching page with ID {page_id}: {e}")
+                page_content_temp = f"Confluence API Error: cannot fetch the page with ID {page_id}: {e}"
+                # store errors
+                if self._errors is None:
+                    self._errors = []
+                self._errors.append(page_content_temp)
+                return Document(page_content=page_content_temp,
+                                metadata={})
+            # TODO: update on toolkit advanced settings level as a separate feature
+            # if not self.include_restricted_content and not self.is_public_page(page):
+            #     continue
             yield self.process_page(page, skip_images)
+    def _log_errors(self):
+        """ Log errors encountered during toolkit execution. """
+        if self._errors:
+            logger.info(f"Errors encountered during toolkit execution: {self._errors}")
     def read_page_by_id(self, page_id: str, skip_images: bool = False):
         """Reads a page by its id in the Confluence space. If id is not available, but there is a title - use get_page_id first."""
         result = list(self.get_pages_by_id([page_id], skip_images))
         if not result:
-            "Page not found"
+            return f"Pages not found. Errors: {self._errors}" if self._errors \
+                else "Pages not found or you do not have access to them."
         return result[0].page_content
         # return self._strip_base64_images(result[0].page_content) if skip_images else result[0].page_content

alita_sdk/tools/confluence/loader.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Optional, List
 from logging import getLogger
 import requests
+from langchain_core.documents import Document
 logger = getLogger(__name__)
 from PIL import Image
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
         else:
             return super().process_image(link, ocr_languages)
+    def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
+                     content_format: ContentFormat, ocr_languages: Optional[str] = None,
+                     keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
+        if not page.get("title"):
+            # if 'include_restricted_content' set to True, draft pages are loaded and can have no title
+            page["title"] = "Untitled"
+        return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
+                                    ocr_languages, keep_markdown_format, keep_newlines)
     # TODO review usage
     # def process_svg(
     #         self,

alita_sdk/tools/elitea_base.py CHANGED Viewed

@@ -537,7 +537,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
                 "args_schema": RemoveIndexParams
             },
             {
-                "name": "list_indexes",
+                "name": "list_collections",
                 "mode": "list_collections",
                 "ref": self.list_collections,
                 "description": self.list_collections.__doc__,

alita_sdk/tools/gitlab/api_wrapper.py CHANGED Viewed

@@ -115,9 +115,8 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
         """Remove trailing slash from URL if present."""
         return url.rstrip('/') if url else url
-    @model_validator(mode='before')
-    @classmethod
-    def validate_toolkit(cls, values: Dict) -> Dict:
+    @model_validator(mode='after')
+    def validate_toolkit(self):
         try:
            import gitlab
         except ImportError:
@@ -125,17 +124,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
                 "python-gitlab is not installed. "
                 "Please install it with `pip install python-gitlab`"
             )
-        values['repository'] = cls._sanitize_url(values['repository'])
+        self.repository = self._sanitize_url(self.repository)
         g = gitlab.Gitlab(
-            url=cls._sanitize_url(values['url']),
-            private_token=values['private_token'],
+            url=self._sanitize_url(self.url),
+            private_token=self.private_token.get_secret_value(),
             keep_base_url=True,
         )
         g.auth()
-        cls._git = g
-        cls._active_branch = values.get('branch')
-        return super().validate_toolkit(values)
+        self._git = g
+        self._active_branch = self.branch
+        return self
     @property
     def repo_instance(self):

alita_sdk/tools/jira/api_wrapper.py CHANGED Viewed

@@ -563,7 +563,7 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
         Use the appropriate issue link type (e.g., "Test", "Relates", "Blocks").
         If we use "Test" linktype, the test is inward issue, the story/other issue is outward issue.."""
-        comment = "This test is linked to the story."
+        comment = f"Issue {inward_issue_key} was linked to {outward_issue_key}."
         comment_body = {"content": [{"content": [{"text": comment,"type": "text"}],"type": "paragraph"}],"type": "doc","version": 1} if self.api_version == "3" else comment
         link_data = {
             "type": {"name": f"{linktype}"},

alita_sdk/tools/qtest/api_wrapper.py CHANGED Viewed

@@ -135,9 +135,8 @@ class QtestApiWrapper(BaseToolApiWrapper):
             values['qtest_project_id'] = values.pop('project_id')
         return values
-    @model_validator(mode='before')
-    @classmethod
-    def validate_toolkit(cls, values):
+    @model_validator(mode='after')
+    def validate_toolkit(self):
         try:
             import swagger_client  # noqa: F401
         except ImportError:
@@ -146,15 +145,13 @@ class QtestApiWrapper(BaseToolApiWrapper):
                 "`pip install git+https://github.com/Roman-Mitusov/qtest-api.git`"
             )
-        url = values['base_url']
-        api_token = values.get('qtest_api_token')
-        if api_token:
+        if self.qtest_api_token:
             configuration = swagger_client.Configuration()
-            configuration.host = url
-            configuration.api_key['Authorization'] = api_token
+            configuration.host = self.base_url
+            configuration.api_key['Authorization'] = self.qtest_api_token.get_secret_value()
             configuration.api_key_prefix['Authorization'] = 'Bearer'
-            cls._client = swagger_client.ApiClient(configuration)
-        return values
+            self._client = swagger_client.ApiClient(configuration)
+        return self
     def __instantiate_test_api_instance(self) -> TestCaseApi:
         # Instantiate the TestCaseApi instance according to the qtest api documentation and swagger client

alita_sdk/tools/sharepoint/api_wrapper.py CHANGED Viewed

@@ -8,6 +8,7 @@ from office365.runtime.auth.client_credential import ClientCredential
 from office365.sharepoint.client_context import ClientContext
 from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
+from .utils import decode_sharepoint_string
 from ..non_code_indexer_toolkit import NonCodeIndexerToolkit
 from ..utils.content_parser import parse_file_content
 from ...runtime.utils.utils import IndexerKeywords
@@ -105,30 +106,53 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
     def get_files_list(self, folder_name: str = None, limit_files: int = 100):
         """ If folder name is specified, lists all files in this folder under Shared Documents path. If folder name is empty, lists all files under root catalog (Shared Documents). Number of files is limited by limit_files (default is 100)."""
         try:
+            # exclude default system libraries like 'Form Templates', 'Site Assets', 'Style Library'
+            all_libraries = self._client.web.lists.filter("BaseTemplate eq 101 and Title ne 'Form Templates' and Title ne 'Site Assets' and Title ne 'Style Library'").get().execute_query()
             result = []
             if not limit_files:
                 limit_files = 100
-            target_folder_url = f"Shared Documents/{folder_name}" if folder_name else "Shared Documents"
-            files = (self._client.web.get_folder_by_server_relative_path(target_folder_url)
-                     .get_files(True)
-                     .execute_query())
-            for file in files:
-                if len(result) >= limit_files:
-                    break
-                temp_props = {
-                    'Name': file.properties['Name'],
-                    'Path': file.properties['ServerRelativeUrl'],
-                    'Created': file.properties['TimeCreated'],
-                    'Modified': file.properties['TimeLastModified'],
-                    'Link': file.properties['LinkingUrl'],
-                    'id': file.properties['UniqueId']
-                }
-                result.append(temp_props)
+            #
+            for lib in all_libraries:
+                library_type = decode_sharepoint_string(lib.properties["EntityTypeName"])
+                target_folder_url = f"{library_type}/{folder_name}" if folder_name else library_type
+                files = (self._client.web.get_folder_by_server_relative_path(target_folder_url)
+                         .get_files(True)
+                         .execute_query())
+                #
+                for file in files:
+                    if f"{library_type}/Forms" in file.properties['ServerRelativeUrl']:
+                        # skip files from system folder "Forms"
+                        continue
+                    if len(result) >= limit_files:
+                        break
+                    temp_props = {
+                        'Name': file.properties['Name'],
+                        'Path': file.properties['ServerRelativeUrl'],
+                        'Created': file.properties['TimeCreated'],
+                        'Modified': file.properties['TimeLastModified'],
+                        'Link': file.properties['LinkingUrl'],
+                        'id': file.properties['UniqueId']
+                    }
+                    result.append(temp_props)
             return result if result else ToolException("Can not get files or folder is empty. Please, double check folder name and read permissions.")
         except Exception as e:
-            logging.error(f"Failed to load files from sharepoint: {e}")
-            return ToolException("Can not get files. Please, double check folder name and read permissions.")
+            # attempt to get via graph api
+            try:
+                # attempt to get files via graph api
+                from .authorization_helper import SharepointAuthorizationHelper
+                auth_helper = SharepointAuthorizationHelper(
+                    client_id=self.client_id,
+                    client_secret=self.client_secret.get_secret_value(),
+                    tenant="", # optional for graph api
+                    scope="", # optional for graph api
+                    token_json="", # optional for graph api
+                )
+                files = auth_helper.get_files_list(self.site_url, folder_name, limit_files)
+                return files
+            except Exception as graph_e:
+                logging.error(f"Failed to load files from sharepoint via base api: {e}")
+                logging.error(f"Failed to load files from sharepoint via graph api: {graph_e}")
+                return ToolException(f"Can not get files. Please, double check folder name and read permissions: {e} and {graph_e}")
     def read_file(self, path,
                   is_capture_image: bool = False,
@@ -141,11 +165,28 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
             self._client.load(file).execute_query()
             file_content = file.read()
+            file_name = file.name
             self._client.execute_query()
         except Exception as e:
-            logging.error(f"Failed to load file from SharePoint: {e}. Path: {path}. Please, double check file name and path.")
-            return ToolException("File not found. Please, check file name and path.")
-        return parse_file_content(file_name=file.name,
+            # attempt to get via graph api
+            try:
+                # attempt to get files via graph api
+                from .authorization_helper import SharepointAuthorizationHelper
+                auth_helper = SharepointAuthorizationHelper(
+                    client_id=self.client_id,
+                    client_secret=self.client_secret.get_secret_value(),
+                    tenant="",  # optional for graph api
+                    scope="",  # optional for graph api
+                    token_json="",  # optional for graph api
+                )
+                file_content = auth_helper.get_file_content(self.site_url, path)
+                file_name = path.split('/')[-1]
+            except Exception as graph_e:
+                logging.error(f"Failed to load file from SharePoint via base api: {e}. Path: {path}. Please, double check file name and path.")
+                logging.error(f"Failed to load file from SharePoint via graph api: {graph_e}. Path: {path}. Please, double check file name and path.")
+                return ToolException(f"File not found. Please, check file name and path: {e} and {graph_e}")
+        #
+        return parse_file_content(file_name=file_name,
                                   file_content=file_content,
                                   is_capture_image=is_capture_image,
                                   page_number=page_number,
@@ -219,12 +260,24 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
                 yield document
     def _load_file_content_in_bytes(self, path):
-        file = self._client.web.get_file_by_server_relative_path(path)
-        self._client.load(file).execute_query()
-        file_content = file.read()
-        self._client.execute_query()
-        #
-        return file_content
+        try:
+            file = self._client.web.get_file_by_server_relative_path(path)
+            self._client.load(file).execute_query()
+            file_content = file.read()
+            self._client.execute_query()
+            #
+            return file_content
+        except Exception as e:
+            # attempt to get via graph api
+            from .authorization_helper import SharepointAuthorizationHelper
+            auth_helper = SharepointAuthorizationHelper(
+                client_id=self.client_id,
+                client_secret=self.client_secret.get_secret_value(),
+                tenant="",  # optional for graph api
+                scope="",  # optional for graph api
+                token_json="",  # optional for graph api
+            )
+            return auth_helper.get_file_content(self.site_url, path)
     def get_available_tools(self):
         return super().get_available_tools() + [

alita-sdk 0.3.375__py3-none-any.whl → 0.3.417__py3-none-any.whl

alita-sdk 0.3.375py3-none-any.whl → 0.3.417py3-none-any.whl