alita-sdk 0.3.375__py3-none-any.whl → 0.3.417__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. alita_sdk/configurations/bitbucket.py +95 -0
  2. alita_sdk/configurations/confluence.py +96 -1
  3. alita_sdk/configurations/gitlab.py +79 -0
  4. alita_sdk/configurations/jira.py +103 -0
  5. alita_sdk/configurations/testrail.py +88 -0
  6. alita_sdk/configurations/xray.py +93 -0
  7. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  8. alita_sdk/configurations/zephyr_essential.py +75 -0
  9. alita_sdk/runtime/clients/client.py +3 -2
  10. alita_sdk/runtime/langchain/assistant.py +56 -40
  11. alita_sdk/runtime/langchain/constants.py +2 -0
  12. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  13. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
  14. alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
  15. alita_sdk/runtime/langchain/langraph_agent.py +52 -27
  16. alita_sdk/runtime/langchain/utils.py +15 -4
  17. alita_sdk/runtime/toolkits/application.py +8 -1
  18. alita_sdk/runtime/toolkits/tools.py +79 -49
  19. alita_sdk/runtime/tools/__init__.py +7 -2
  20. alita_sdk/runtime/tools/application.py +7 -0
  21. alita_sdk/runtime/tools/function.py +28 -23
  22. alita_sdk/runtime/tools/graph.py +10 -4
  23. alita_sdk/runtime/tools/image_generation.py +104 -8
  24. alita_sdk/runtime/tools/llm.py +142 -114
  25. alita_sdk/runtime/tools/sandbox.py +166 -63
  26. alita_sdk/runtime/tools/vectorstore.py +2 -1
  27. alita_sdk/runtime/tools/vectorstore_base.py +2 -1
  28. alita_sdk/runtime/utils/utils.py +1 -0
  29. alita_sdk/tools/__init__.py +43 -31
  30. alita_sdk/tools/base_indexer_toolkit.py +54 -60
  31. alita_sdk/tools/code_indexer_toolkit.py +13 -3
  32. alita_sdk/tools/confluence/api_wrapper.py +29 -7
  33. alita_sdk/tools/confluence/loader.py +10 -0
  34. alita_sdk/tools/elitea_base.py +1 -1
  35. alita_sdk/tools/gitlab/api_wrapper.py +8 -9
  36. alita_sdk/tools/jira/api_wrapper.py +1 -1
  37. alita_sdk/tools/qtest/api_wrapper.py +7 -10
  38. alita_sdk/tools/sharepoint/api_wrapper.py +81 -28
  39. alita_sdk/tools/sharepoint/authorization_helper.py +131 -1
  40. alita_sdk/tools/sharepoint/utils.py +8 -2
  41. alita_sdk/tools/utils/content_parser.py +27 -16
  42. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +18 -5
  43. {alita_sdk-0.3.375.dist-info → alita_sdk-0.3.417.dist-info}/METADATA +1 -1
  44. {alita_sdk-0.3.375.dist-info → alita_sdk-0.3.417.dist-info}/RECORD +47 -47
  45. {alita_sdk-0.3.375.dist-info → alita_sdk-0.3.417.dist-info}/WHEEL +0 -0
  46. {alita_sdk-0.3.375.dist-info → alita_sdk-0.3.417.dist-info}/licenses/LICENSE +0 -0
  47. {alita_sdk-0.3.375.dist-info → alita_sdk-0.3.417.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,6 @@ from typing import Any, Optional, List, Dict, Generator
7
7
  from langchain_core.documents import Document
8
8
  from pydantic import create_model, Field, SecretStr
9
9
 
10
- from .utils import make_json_serializable
11
10
  from .utils.content_parser import file_extension_by_chunker, process_document_by_type
12
11
  from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
13
12
  from ..runtime.langchain.document_loaders.constants import loaders_allowed_to_override
@@ -111,7 +110,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
111
110
  def __init__(self, **kwargs):
112
111
  conn = kwargs.get('connection_string', None)
113
112
  connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
114
- collection_name = kwargs.get('collection_name')
113
+ collection_name = kwargs.get('collection_schema')
115
114
 
116
115
  if 'vectorstore_type' not in kwargs:
117
116
  kwargs['vectorstore_type'] = 'PGVector'
@@ -152,39 +151,45 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
152
151
 
153
152
  def index_data(self, **kwargs):
154
153
  index_name = kwargs.get("index_name")
155
- progress_step = kwargs.get("progress_step")
156
154
  clean_index = kwargs.get("clean_index")
157
155
  chunking_tool = kwargs.get("chunking_tool")
158
156
  chunking_config = kwargs.get("chunking_config")
157
+ result = {"count": 0}
159
158
  #
160
- if clean_index:
161
- self._clean_index(index_name)
162
- #
163
- self.index_meta_init(index_name, kwargs)
164
- #
165
- self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
166
- self._log_tool_event(f"Loading the documents to index...{kwargs}")
167
- documents = self._base_loader(**kwargs)
168
- documents = list(documents) # consume/exhaust generator to count items
169
- documents_count = len(documents)
170
- documents = (doc for doc in documents)
171
- self._log_tool_event(f"Base documents were pre-loaded. "
172
- f"Search for possible document duplicates and remove them from the indexing list...")
173
- documents = self._reduce_duplicates(documents, index_name)
174
- self._log_tool_event(f"Duplicates were removed. "
175
- f"Processing documents to collect dependencies and prepare them for indexing...")
176
- result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, progress_step=progress_step)
177
- #
178
- self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, result)
179
- #
180
- return {"status": "ok", "message": f"successfully indexed {result} documents"}
181
-
182
- def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, index_name: Optional[str] = None, progress_step: int = 20):
159
+ try:
160
+ if clean_index:
161
+ self._clean_index(index_name)
162
+ #
163
+ self.index_meta_init(index_name, kwargs)
164
+ #
165
+ self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
166
+ self._log_tool_event(f"Loading the documents to index...{kwargs}")
167
+ documents = self._base_loader(**kwargs)
168
+ documents = list(documents) # consume/exhaust generator to count items
169
+ documents_count = len(documents)
170
+ documents = (doc for doc in documents)
171
+ self._log_tool_event(f"Base documents were pre-loaded. "
172
+ f"Search for possible document duplicates and remove them from the indexing list...")
173
+ documents = self._reduce_duplicates(documents, index_name)
174
+ self._log_tool_event(f"Duplicates were removed. "
175
+ f"Processing documents to collect dependencies and prepare them for indexing...")
176
+ self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
177
+ #
178
+ results_count = result["count"]
179
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
180
+ #
181
+ return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
182
+ else "no new documents to index"}
183
+ except Exception as e:
184
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
185
+ raise e
186
+
187
+
188
+ def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, result, index_name: Optional[str] = None):
183
189
  self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
184
190
  from ..runtime.langchain.interfaces.llm_processor import add_documents
185
191
  #
186
192
  base_doc_counter = 0
187
- total_counter = 0
188
193
  pg_vector_add_docs_chunk = []
189
194
  for base_doc in base_documents:
190
195
  base_doc_counter += 1
@@ -232,10 +237,9 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
232
237
  msg = f"Indexed base document #{base_doc_counter} out of {base_total} (with {dependent_docs_counter} dependencies)."
233
238
  logger.debug(msg)
234
239
  self._log_tool_event(msg)
235
- total_counter += dependent_docs_counter
240
+ result["count"] += dependent_docs_counter
236
241
  if pg_vector_add_docs_chunk:
237
242
  add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
238
- return total_counter
239
243
 
240
244
  def _apply_loaders_chunkers(self, documents: Generator[Document, None, None], chunking_tool: str=None, chunking_config=None) -> Generator[Document, None, None]:
241
245
  from ..tools.chunkers import __all__ as chunkers
@@ -454,37 +458,27 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
454
458
  )
455
459
 
456
460
  def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
457
- index_meta_raw = super().get_index_meta(index_name)
458
- from ..runtime.langchain.interfaces.llm_processor import add_documents
459
- created_on = time.time()
460
- metadata = {
461
- "collection": index_name,
462
- "type": IndexerKeywords.INDEX_META_TYPE.value,
463
- "indexed": 0,
464
- "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
465
- "index_configuration": index_configuration,
466
- "created_on": created_on,
467
- "updated_on": created_on,
468
- "history": "[]",
469
- }
470
- index_meta_ids = None
471
- #
472
- if index_meta_raw:
473
- history_raw = index_meta_raw.get("metadata", {}).get("history", "[]")
474
- if isinstance(history_raw, str) and history_raw.strip():
475
- try:
476
- history = json.loads(history_raw)
477
- except (json.JSONDecodeError, TypeError):
478
- history = []
479
- else:
480
- history = []
481
- new_history_item = {k: v for k, v in index_meta_raw.get("metadata", {}).items() if k != "history"}
482
- history.append(new_history_item)
483
- metadata["history"] = json.dumps(history)
484
- index_meta_ids = [index_meta_raw.get("id")]
485
- #
486
- index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
487
- add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
461
+ index_meta = super().get_index_meta(index_name)
462
+ if not index_meta:
463
+ self._log_tool_event(
464
+ f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
465
+ tool_name="index_data"
466
+ )
467
+ from ..runtime.langchain.interfaces.llm_processor import add_documents
468
+ created_on = time.time()
469
+ metadata = {
470
+ "collection": index_name,
471
+ "type": IndexerKeywords.INDEX_META_TYPE.value,
472
+ "indexed": 0,
473
+ "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
474
+ "index_configuration": index_configuration,
475
+ "created_on": created_on,
476
+ "updated_on": created_on,
477
+ "history": "[]",
478
+ "task_id": None,
479
+ }
480
+ index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
481
+ add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
488
482
 
489
483
  def index_meta_update(self, index_name: str, state: str, result: int):
490
484
  index_meta_raw = super().get_index_meta(index_name)
@@ -1,5 +1,6 @@
1
1
  import ast
2
2
  import fnmatch
3
+ import json
3
4
  import logging
4
5
  from typing import Optional, List, Generator
5
6
 
@@ -21,7 +22,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
21
22
  return self.vector_adapter.get_code_indexed_data(self, index_name)
22
23
 
23
24
  def key_fn(self, document: Document):
24
- return document.metadata.get('id')
25
+ return document.metadata.get("filename")
25
26
 
26
27
  def compare_fn(self, document: Document, idx_data):
27
28
  return (document.metadata.get('commit_hash') and
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
46
47
  )
47
48
 
48
49
  def _extend_data(self, documents: Generator[Document, None, None]):
49
- yield from parse_code_files_for_db(documents)
50
+ yield from documents
50
51
 
51
52
  def _index_tool_params(self):
52
53
  """Return the parameters for indexing data."""
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
117
118
  if not file_content:
118
119
  # empty file, skip
119
120
  continue
121
+ #
122
+ # ensure file content is a string
123
+ if isinstance(file_content, bytes):
124
+ file_content = file_content.decode("utf-8", errors="ignore")
125
+ elif isinstance(file_content, dict) and file.endswith('.json'):
126
+ file_content = json.dumps(file_content)
127
+ elif not isinstance(file_content, str):
128
+ file_content = str(file_content)
129
+ #
120
130
  # hash the file content to ensure uniqueness
121
131
  import hashlib
122
132
  file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
127
137
  self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
128
138
  self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
129
139
 
130
- return file_content_generator()
140
+ return parse_code_files_for_db(file_content_generator())
131
141
 
132
142
  def __handle_get_files(self, path: str, branch: str):
133
143
  """
@@ -7,12 +7,14 @@ from json import JSONDecodeError
7
7
  from typing import Optional, List, Any, Dict, Callable, Generator, Literal
8
8
 
9
9
  import requests
10
+ from atlassian.errors import ApiError
10
11
  from langchain_community.document_loaders.confluence import ContentFormat
11
12
  from langchain_core.documents import Document
12
13
  from langchain_core.messages import HumanMessage
13
14
  from langchain_core.tools import ToolException
14
15
  from markdownify import markdownify
15
16
  from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
17
+ from requests import HTTPError
16
18
  from tenacity import retry, stop_after_attempt, wait_exponential, before_sleep_log
17
19
 
18
20
  from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
@@ -194,6 +196,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
194
196
  keep_markdown_format: Optional[bool] = True
195
197
  ocr_languages: Optional[str] = None
196
198
  keep_newlines: Optional[bool] = True
199
+ _errors: Optional[list[str]] = None
197
200
  _image_cache: ImageDescriptionCache = PrivateAttr(default_factory=ImageDescriptionCache)
198
201
 
199
202
  @model_validator(mode='before')
@@ -498,7 +501,9 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
498
501
  restrictions = self.client.get_all_restrictions_for_content(page["id"])
499
502
 
500
503
  return (
501
- page["status"] == "current"
504
+ (page["status"] == "current"
505
+ # allow user to see archived content if needed
506
+ or page["status"] == "archived")
502
507
  and not restrictions["read"]["restrictions"]["user"]["results"]
503
508
  and not restrictions["read"]["restrictions"]["group"]["results"]
504
509
  )
@@ -518,18 +523,35 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
518
523
  ),
519
524
  before_sleep=before_sleep_log(logger, logging.WARNING),
520
525
  )(self.client.get_page_by_id)
521
- page = get_page(
522
- page_id=page_id, expand=f"{self.content_format.value},version"
523
- )
524
- if not self.include_restricted_content and not self.is_public_page(page):
525
- continue
526
+ try:
527
+ page = get_page(
528
+ page_id=page_id, expand=f"{self.content_format.value},version"
529
+ )
530
+ except (ApiError, HTTPError) as e:
531
+ logger.error(f"Error fetching page with ID {page_id}: {e}")
532
+ page_content_temp = f"Confluence API Error: cannot fetch the page with ID {page_id}: {e}"
533
+ # store errors
534
+ if self._errors is None:
535
+ self._errors = []
536
+ self._errors.append(page_content_temp)
537
+ return Document(page_content=page_content_temp,
538
+ metadata={})
539
+ # TODO: update on toolkit advanced settings level as a separate feature
540
+ # if not self.include_restricted_content and not self.is_public_page(page):
541
+ # continue
526
542
  yield self.process_page(page, skip_images)
527
543
 
544
+ def _log_errors(self):
545
+ """ Log errors encountered during toolkit execution. """
546
+ if self._errors:
547
+ logger.info(f"Errors encountered during toolkit execution: {self._errors}")
548
+
528
549
  def read_page_by_id(self, page_id: str, skip_images: bool = False):
529
550
  """Reads a page by its id in the Confluence space. If id is not available, but there is a title - use get_page_id first."""
530
551
  result = list(self.get_pages_by_id([page_id], skip_images))
531
552
  if not result:
532
- "Page not found"
553
+ return f"Pages not found. Errors: {self._errors}" if self._errors \
554
+ else "Pages not found or you do not have access to them."
533
555
  return result[0].page_content
534
556
  # return self._strip_base64_images(result[0].page_content) if skip_images else result[0].page_content
535
557
 
@@ -3,6 +3,7 @@ from typing import Optional, List
3
3
  from logging import getLogger
4
4
 
5
5
  import requests
6
+ from langchain_core.documents import Document
6
7
 
7
8
  logger = getLogger(__name__)
8
9
  from PIL import Image
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
193
194
  else:
194
195
  return super().process_image(link, ocr_languages)
195
196
 
197
+ def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
198
+ content_format: ContentFormat, ocr_languages: Optional[str] = None,
199
+ keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
200
+ if not page.get("title"):
201
+ # if 'include_restricted_content' set to True, draft pages are loaded and can have no title
202
+ page["title"] = "Untitled"
203
+ return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
204
+ ocr_languages, keep_markdown_format, keep_newlines)
205
+
196
206
  # TODO review usage
197
207
  # def process_svg(
198
208
  # self,
@@ -537,7 +537,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
537
537
  "args_schema": RemoveIndexParams
538
538
  },
539
539
  {
540
- "name": "list_indexes",
540
+ "name": "list_collections",
541
541
  "mode": "list_collections",
542
542
  "ref": self.list_collections,
543
543
  "description": self.list_collections.__doc__,
@@ -115,9 +115,8 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
115
115
  """Remove trailing slash from URL if present."""
116
116
  return url.rstrip('/') if url else url
117
117
 
118
- @model_validator(mode='before')
119
- @classmethod
120
- def validate_toolkit(cls, values: Dict) -> Dict:
118
+ @model_validator(mode='after')
119
+ def validate_toolkit(self):
121
120
  try:
122
121
  import gitlab
123
122
  except ImportError:
@@ -125,17 +124,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
125
124
  "python-gitlab is not installed. "
126
125
  "Please install it with `pip install python-gitlab`"
127
126
  )
128
- values['repository'] = cls._sanitize_url(values['repository'])
127
+ self.repository = self._sanitize_url(self.repository)
129
128
  g = gitlab.Gitlab(
130
- url=cls._sanitize_url(values['url']),
131
- private_token=values['private_token'],
129
+ url=self._sanitize_url(self.url),
130
+ private_token=self.private_token.get_secret_value(),
132
131
  keep_base_url=True,
133
132
  )
134
133
 
135
134
  g.auth()
136
- cls._git = g
137
- cls._active_branch = values.get('branch')
138
- return super().validate_toolkit(values)
135
+ self._git = g
136
+ self._active_branch = self.branch
137
+ return self
139
138
 
140
139
  @property
141
140
  def repo_instance(self):
@@ -563,7 +563,7 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
563
563
  Use the appropriate issue link type (e.g., "Test", "Relates", "Blocks").
564
564
  If we use "Test" linktype, the test is inward issue, the story/other issue is outward issue.."""
565
565
 
566
- comment = "This test is linked to the story."
566
+ comment = f"Issue {inward_issue_key} was linked to {outward_issue_key}."
567
567
  comment_body = {"content": [{"content": [{"text": comment,"type": "text"}],"type": "paragraph"}],"type": "doc","version": 1} if self.api_version == "3" else comment
568
568
  link_data = {
569
569
  "type": {"name": f"{linktype}"},
@@ -135,9 +135,8 @@ class QtestApiWrapper(BaseToolApiWrapper):
135
135
  values['qtest_project_id'] = values.pop('project_id')
136
136
  return values
137
137
 
138
- @model_validator(mode='before')
139
- @classmethod
140
- def validate_toolkit(cls, values):
138
+ @model_validator(mode='after')
139
+ def validate_toolkit(self):
141
140
  try:
142
141
  import swagger_client # noqa: F401
143
142
  except ImportError:
@@ -146,15 +145,13 @@ class QtestApiWrapper(BaseToolApiWrapper):
146
145
  "`pip install git+https://github.com/Roman-Mitusov/qtest-api.git`"
147
146
  )
148
147
 
149
- url = values['base_url']
150
- api_token = values.get('qtest_api_token')
151
- if api_token:
148
+ if self.qtest_api_token:
152
149
  configuration = swagger_client.Configuration()
153
- configuration.host = url
154
- configuration.api_key['Authorization'] = api_token
150
+ configuration.host = self.base_url
151
+ configuration.api_key['Authorization'] = self.qtest_api_token.get_secret_value()
155
152
  configuration.api_key_prefix['Authorization'] = 'Bearer'
156
- cls._client = swagger_client.ApiClient(configuration)
157
- return values
153
+ self._client = swagger_client.ApiClient(configuration)
154
+ return self
158
155
 
159
156
  def __instantiate_test_api_instance(self) -> TestCaseApi:
160
157
  # Instantiate the TestCaseApi instance according to the qtest api documentation and swagger client
@@ -8,6 +8,7 @@ from office365.runtime.auth.client_credential import ClientCredential
8
8
  from office365.sharepoint.client_context import ClientContext
9
9
  from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
10
10
 
11
+ from .utils import decode_sharepoint_string
11
12
  from ..non_code_indexer_toolkit import NonCodeIndexerToolkit
12
13
  from ..utils.content_parser import parse_file_content
13
14
  from ...runtime.utils.utils import IndexerKeywords
@@ -105,30 +106,53 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
105
106
  def get_files_list(self, folder_name: str = None, limit_files: int = 100):
106
107
  """ If folder name is specified, lists all files in this folder under Shared Documents path. If folder name is empty, lists all files under root catalog (Shared Documents). Number of files is limited by limit_files (default is 100)."""
107
108
  try:
109
+ # exclude default system libraries like 'Form Templates', 'Site Assets', 'Style Library'
110
+ all_libraries = self._client.web.lists.filter("BaseTemplate eq 101 and Title ne 'Form Templates' and Title ne 'Site Assets' and Title ne 'Style Library'").get().execute_query()
108
111
  result = []
109
112
  if not limit_files:
110
113
  limit_files = 100
111
- target_folder_url = f"Shared Documents/{folder_name}" if folder_name else "Shared Documents"
112
- files = (self._client.web.get_folder_by_server_relative_path(target_folder_url)
113
- .get_files(True)
114
- .execute_query())
115
-
116
- for file in files:
117
- if len(result) >= limit_files:
118
- break
119
- temp_props = {
120
- 'Name': file.properties['Name'],
121
- 'Path': file.properties['ServerRelativeUrl'],
122
- 'Created': file.properties['TimeCreated'],
123
- 'Modified': file.properties['TimeLastModified'],
124
- 'Link': file.properties['LinkingUrl'],
125
- 'id': file.properties['UniqueId']
126
- }
127
- result.append(temp_props)
114
+ #
115
+ for lib in all_libraries:
116
+ library_type = decode_sharepoint_string(lib.properties["EntityTypeName"])
117
+ target_folder_url = f"{library_type}/{folder_name}" if folder_name else library_type
118
+ files = (self._client.web.get_folder_by_server_relative_path(target_folder_url)
119
+ .get_files(True)
120
+ .execute_query())
121
+ #
122
+ for file in files:
123
+ if f"{library_type}/Forms" in file.properties['ServerRelativeUrl']:
124
+ # skip files from system folder "Forms"
125
+ continue
126
+ if len(result) >= limit_files:
127
+ break
128
+ temp_props = {
129
+ 'Name': file.properties['Name'],
130
+ 'Path': file.properties['ServerRelativeUrl'],
131
+ 'Created': file.properties['TimeCreated'],
132
+ 'Modified': file.properties['TimeLastModified'],
133
+ 'Link': file.properties['LinkingUrl'],
134
+ 'id': file.properties['UniqueId']
135
+ }
136
+ result.append(temp_props)
128
137
  return result if result else ToolException("Can not get files or folder is empty. Please, double check folder name and read permissions.")
129
138
  except Exception as e:
130
- logging.error(f"Failed to load files from sharepoint: {e}")
131
- return ToolException("Can not get files. Please, double check folder name and read permissions.")
139
+ # attempt to get via graph api
140
+ try:
141
+ # attempt to get files via graph api
142
+ from .authorization_helper import SharepointAuthorizationHelper
143
+ auth_helper = SharepointAuthorizationHelper(
144
+ client_id=self.client_id,
145
+ client_secret=self.client_secret.get_secret_value(),
146
+ tenant="", # optional for graph api
147
+ scope="", # optional for graph api
148
+ token_json="", # optional for graph api
149
+ )
150
+ files = auth_helper.get_files_list(self.site_url, folder_name, limit_files)
151
+ return files
152
+ except Exception as graph_e:
153
+ logging.error(f"Failed to load files from sharepoint via base api: {e}")
154
+ logging.error(f"Failed to load files from sharepoint via graph api: {graph_e}")
155
+ return ToolException(f"Can not get files. Please, double check folder name and read permissions: {e} and {graph_e}")
132
156
 
133
157
  def read_file(self, path,
134
158
  is_capture_image: bool = False,
@@ -141,11 +165,28 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
141
165
  self._client.load(file).execute_query()
142
166
 
143
167
  file_content = file.read()
168
+ file_name = file.name
144
169
  self._client.execute_query()
145
170
  except Exception as e:
146
- logging.error(f"Failed to load file from SharePoint: {e}. Path: {path}. Please, double check file name and path.")
147
- return ToolException("File not found. Please, check file name and path.")
148
- return parse_file_content(file_name=file.name,
171
+ # attempt to get via graph api
172
+ try:
173
+ # attempt to get files via graph api
174
+ from .authorization_helper import SharepointAuthorizationHelper
175
+ auth_helper = SharepointAuthorizationHelper(
176
+ client_id=self.client_id,
177
+ client_secret=self.client_secret.get_secret_value(),
178
+ tenant="", # optional for graph api
179
+ scope="", # optional for graph api
180
+ token_json="", # optional for graph api
181
+ )
182
+ file_content = auth_helper.get_file_content(self.site_url, path)
183
+ file_name = path.split('/')[-1]
184
+ except Exception as graph_e:
185
+ logging.error(f"Failed to load file from SharePoint via base api: {e}. Path: {path}. Please, double check file name and path.")
186
+ logging.error(f"Failed to load file from SharePoint via graph api: {graph_e}. Path: {path}. Please, double check file name and path.")
187
+ return ToolException(f"File not found. Please, check file name and path: {e} and {graph_e}")
188
+ #
189
+ return parse_file_content(file_name=file_name,
149
190
  file_content=file_content,
150
191
  is_capture_image=is_capture_image,
151
192
  page_number=page_number,
@@ -219,12 +260,24 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
219
260
  yield document
220
261
 
221
262
  def _load_file_content_in_bytes(self, path):
222
- file = self._client.web.get_file_by_server_relative_path(path)
223
- self._client.load(file).execute_query()
224
- file_content = file.read()
225
- self._client.execute_query()
226
- #
227
- return file_content
263
+ try:
264
+ file = self._client.web.get_file_by_server_relative_path(path)
265
+ self._client.load(file).execute_query()
266
+ file_content = file.read()
267
+ self._client.execute_query()
268
+ #
269
+ return file_content
270
+ except Exception as e:
271
+ # attempt to get via graph api
272
+ from .authorization_helper import SharepointAuthorizationHelper
273
+ auth_helper = SharepointAuthorizationHelper(
274
+ client_id=self.client_id,
275
+ client_secret=self.client_secret.get_secret_value(),
276
+ tenant="", # optional for graph api
277
+ scope="", # optional for graph api
278
+ token_json="", # optional for graph api
279
+ )
280
+ return auth_helper.get_file_content(self.site_url, path)
228
281
 
229
282
  def get_available_tools(self):
230
283
  return super().get_available_tools() + [