alita-sdk 0.3.208__py3-none-any.whl → 0.3.210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. alita_sdk/runtime/clients/artifact.py +18 -4
  2. alita_sdk/runtime/langchain/document_loaders/AlitaCSVLoader.py +2 -1
  3. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +3 -3
  4. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +8 -4
  5. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
  6. alita_sdk/runtime/langchain/langraph_agent.py +9 -6
  7. alita_sdk/runtime/toolkits/artifact.py +7 -3
  8. alita_sdk/runtime/toolkits/tools.py +8 -1
  9. alita_sdk/runtime/tools/application.py +2 -0
  10. alita_sdk/runtime/tools/artifact.py +65 -8
  11. alita_sdk/runtime/tools/vectorstore.py +125 -42
  12. alita_sdk/runtime/utils/utils.py +3 -0
  13. alita_sdk/tools/ado/__init__.py +8 -0
  14. alita_sdk/tools/ado/repos/repos_wrapper.py +37 -0
  15. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +0 -7
  16. alita_sdk/tools/ado/work_item/__init__.py +4 -0
  17. alita_sdk/tools/ado/work_item/ado_wrapper.py +37 -4
  18. alita_sdk/tools/aws/delta_lake/__init__.py +1 -1
  19. alita_sdk/tools/bitbucket/__init__.py +13 -1
  20. alita_sdk/tools/bitbucket/api_wrapper.py +31 -4
  21. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +31 -0
  22. alita_sdk/tools/chunkers/code/codeparser.py +18 -10
  23. alita_sdk/tools/confluence/api_wrapper.py +35 -134
  24. alita_sdk/tools/confluence/loader.py +30 -28
  25. alita_sdk/tools/elitea_base.py +112 -11
  26. alita_sdk/tools/figma/__init__.py +13 -1
  27. alita_sdk/tools/figma/api_wrapper.py +47 -3
  28. alita_sdk/tools/github/api_wrapper.py +8 -0
  29. alita_sdk/tools/github/github_client.py +18 -0
  30. alita_sdk/tools/gitlab/__init__.py +4 -0
  31. alita_sdk/tools/gitlab/api_wrapper.py +10 -0
  32. alita_sdk/tools/google/bigquery/__init__.py +1 -1
  33. alita_sdk/tools/jira/__init__.py +21 -13
  34. alita_sdk/tools/jira/api_wrapper.py +285 -5
  35. alita_sdk/tools/sharepoint/__init__.py +11 -1
  36. alita_sdk/tools/sharepoint/api_wrapper.py +23 -53
  37. alita_sdk/tools/testrail/__init__.py +4 -0
  38. alita_sdk/tools/testrail/api_wrapper.py +28 -56
  39. alita_sdk/tools/utils/content_parser.py +123 -9
  40. alita_sdk/tools/xray/__init__.py +8 -1
  41. alita_sdk/tools/xray/api_wrapper.py +505 -14
  42. alita_sdk/tools/zephyr_scale/api_wrapper.py +5 -5
  43. {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/METADATA +1 -1
  44. {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/RECORD +47 -47
  45. {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/WHEEL +0 -0
  46. {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/licenses/LICENSE +0 -0
  47. {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/top_level.txt +0 -0
@@ -4,17 +4,20 @@ import re
4
4
  import traceback
5
5
  from json import JSONDecodeError
6
6
  from traceback import format_exc
7
- from typing import List, Optional, Any, Dict
7
+ from typing import List, Optional, Any, Dict, Generator
8
8
  import os
9
9
 
10
10
  from atlassian import Jira
11
+ from langchain_core.documents import Document
11
12
  from langchain_core.tools import ToolException
12
13
  from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
13
14
  import requests
14
15
 
15
- from ..elitea_base import BaseToolApiWrapper
16
+ from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
16
17
  from ..llm.img_utils import ImageDescriptionCache
17
18
  from ..utils import is_cookie_token, parse_cookie_string
19
+ from ..utils.content_parser import parse_file_content, load_content_from_bytes
20
+ from ...runtime.utils.utils import IndexerKeywords
18
21
 
19
22
  logger = logging.getLogger(__name__)
20
23
 
@@ -388,7 +391,7 @@ def process_search_response(jira_url, response, payload_params: Dict[str, Any] =
388
391
 
389
392
  return str(processed_issues)
390
393
 
391
- class JiraApiWrapper(BaseToolApiWrapper):
394
+ class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
392
395
  base_url: str
393
396
  api_version: Optional[str] = "2",
394
397
  api_key: Optional[SecretStr] = None,
@@ -402,7 +405,6 @@ class JiraApiWrapper(BaseToolApiWrapper):
402
405
  _client: Jira = PrivateAttr()
403
406
  _image_cache: ImageDescriptionCache = PrivateAttr(default_factory=lambda: ImageDescriptionCache(max_size=50))
404
407
  issue_search_pattern: str = r'/rest/api/\d+/search'
405
- llm: Any = None
406
408
 
407
409
  @model_validator(mode='before')
408
410
  @classmethod
@@ -1061,7 +1063,7 @@ class JiraApiWrapper(BaseToolApiWrapper):
1061
1063
  def process_image_match(match):
1062
1064
  """Process each image reference and get its contextual description"""
1063
1065
  image_ref = match.group(1)
1064
- full_match = match.group(0) # The complete image reference with markers
1066
+ full_match = match.group(0) # The complete image reference with markers
1065
1067
 
1066
1068
  logger.info(f"Processing image reference: {image_ref} (full match: {full_match})")
1067
1069
 
@@ -1221,6 +1223,284 @@ class JiraApiWrapper(BaseToolApiWrapper):
1221
1223
  logger.error(f"Error processing comments with images: {stacktrace}")
1222
1224
  return f"Error processing comments with images: {str(e)}"
1223
1225
 
1226
+ def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
1227
+ """
1228
+ Base loader for Jira issues, used to load issues as documents.
1229
+ Uses the existing Jira client instance to fetch and process issues.
1230
+ """
1231
+ # Extract parameters from kwargs
1232
+ jql = kwargs.get('jql')
1233
+ fields_to_extract = kwargs.get('fields_to_extract')
1234
+ fields_to_index = kwargs.get('fields_to_index')
1235
+ include_attachments = kwargs.get('include_attachments', False)
1236
+ max_total_issues = kwargs.get('max_total_issues', 1000)
1237
+
1238
+ # set values for skipped attachment extensions
1239
+ self._skipped_attachment_extensions = kwargs.get('skip_attachment_extensions', [])
1240
+ self._included_fields = fields_to_extract.copy() if fields_to_extract else []
1241
+
1242
+ try:
1243
+ # Prepare fields to extract
1244
+ DEFAULT_FIELDS = ['status', 'summary', 'reporter', 'description', 'created', 'updated', 'assignee', 'project', 'issuetype']
1245
+ fields = DEFAULT_FIELDS.copy()
1246
+
1247
+ if fields_to_extract:
1248
+ fields.extend(fields_to_extract)
1249
+
1250
+ if include_attachments:
1251
+ fields.append('attachment')
1252
+
1253
+ # Use provided JQL query or default to all issues
1254
+ if not jql:
1255
+ jql_query = "ORDER BY updated DESC" # Default to get all issues ordered by update time
1256
+ else:
1257
+ jql_query = jql
1258
+
1259
+ # Remove duplicates and prepare fields
1260
+ final_fields = ','.join({field.lower() for field in fields})
1261
+
1262
+ # Fetch issues using the existing Jira client
1263
+ issue_generator = self._jql_get_tickets(
1264
+ jql_query,
1265
+ fields=final_fields,
1266
+ limit=max_total_issues
1267
+ )
1268
+
1269
+ # Process each batch of issues
1270
+ for issues_batch in issue_generator:
1271
+ for issue in issues_batch:
1272
+ issue_doc = self._process_issue_for_indexing(
1273
+ issue,
1274
+ fields_to_index
1275
+ )
1276
+ if issue_doc:
1277
+ yield issue_doc
1278
+
1279
+ except Exception as e:
1280
+ logger.error(f"Error loading Jira issues: {str(e)}")
1281
+ raise ToolException(f"Unable to load Jira issues: {str(e)}")
1282
+
1283
+ def _process_document(self, base_document: Document) -> Generator[Document, None, None]:
1284
+ """
1285
+ Process a base document to extract and index Jira issues extra fields: comments, attachments, etc..
1286
+ """
1287
+
1288
+ issue_key = base_document.metadata.get('issue_key')
1289
+ # get attachments content
1290
+
1291
+ issue = self._client.issue(issue_key, fields="attachment")
1292
+ attachments = issue.get('fields', {}).get('attachment', [])
1293
+ for attachment in attachments:
1294
+ # get extension
1295
+ ext = f".{attachment['filename'].split('.')[-1].lower()}"
1296
+ if ext not in self._skipped_attachment_extensions:
1297
+ attachment_id = f"attach_{attachment['id']}"
1298
+ base_document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
1299
+ try:
1300
+ attachment_content = self._client.get_attachment_content(attachment['id'])
1301
+ except Exception as e:
1302
+ logger.error(f"Failed to download attachment {attachment['filename']} for issue {issue_key}: {str(e)}")
1303
+ attachment_content = self._client.get(path=f"secure/attachment/{attachment['id']}/{attachment['filename']}", not_json_response=True)
1304
+ content = load_content_from_bytes(attachment_content, ext, llm=self.llm) if ext not in '.pdf' \
1305
+ else parse_file_content(file_content=attachment_content, file_name=attachment['filename'], llm=self.llm, is_capture_image=True)
1306
+ if not content:
1307
+ continue
1308
+ yield Document(page_content=content,
1309
+ metadata={
1310
+ 'id': attachment_id,
1311
+ 'issue_key': issue_key,
1312
+ 'source': f"{self.base_url}/browse/{issue_key}",
1313
+ 'filename': attachment['filename'],
1314
+ 'created': attachment['created'],
1315
+ 'mimeType': attachment['mimeType'],
1316
+ 'author': attachment.get('author', {}).get('name'),
1317
+ IndexerKeywords.PARENT.value: base_document.metadata.get('id', None),
1318
+ 'type': 'attachment',
1319
+ })
1320
+
1321
+ def _jql_get_tickets(self, jql, fields="*all", start=0, limit=None, expand=None, validate_query=None):
1322
+ """
1323
+ Generator that yields batches of Jira issues based on JQL query.
1324
+ """
1325
+ from atlassian.errors import ApiError
1326
+
1327
+ params = {}
1328
+ if limit is not None:
1329
+ params["maxResults"] = int(limit)
1330
+ if fields is not None:
1331
+ if isinstance(fields, (list, tuple, set)):
1332
+ fields = ",".join(fields)
1333
+ params["fields"] = fields
1334
+ if jql is not None:
1335
+ params["jql"] = jql
1336
+ if expand is not None:
1337
+ params["expand"] = expand
1338
+ if validate_query is not None:
1339
+ params["validateQuery"] = validate_query
1340
+
1341
+ url = self._client.resource_url("search")
1342
+
1343
+ while True:
1344
+ params["startAt"] = int(start)
1345
+ try:
1346
+ response = self._client.get(url, params=params)
1347
+ if not response:
1348
+ break
1349
+ except ApiError as e:
1350
+ error_message = f"Jira API error: {str(e)}"
1351
+ raise ValueError(f"Failed to fetch issues from Jira: {error_message}")
1352
+
1353
+ issues = response["issues"]
1354
+ yield issues
1355
+ if limit is not None and len(response["issues"]) + start >= limit:
1356
+ break
1357
+ if not response["issues"]:
1358
+ break
1359
+ start += len(issues)
1360
+
1361
+ def _process_issue_for_indexing(self, issue: dict, fields_to_index=None) -> Document:
1362
+ """
1363
+ Process a single Jira issue into a Document for indexing.
1364
+ Copied and adapted from AlitaJiraLoader logic.
1365
+ """
1366
+ try:
1367
+ # Build content starting with summary
1368
+ content = f"{issue['fields']['summary']}\n"
1369
+
1370
+ # Add description if present
1371
+ description = issue['fields'].get('description', '')
1372
+ if description:
1373
+ content += f"{description}\n"
1374
+ else:
1375
+ # If no description, still create document but with minimal content
1376
+ logger.debug(f"Issue {issue.get('key', 'unknown')} has no description")
1377
+
1378
+ # Add comments if present
1379
+ if 'comment' in issue['fields'] and issue['fields']['comment'].get('comments'):
1380
+ for comment in issue['fields']['comment']['comments']:
1381
+ content += f"{comment['body']}\n"
1382
+
1383
+ # Add additional fields to index
1384
+ if fields_to_index:
1385
+ for field in fields_to_index:
1386
+ if field in issue['fields'] and issue['fields'][field]:
1387
+ field_value = issue['fields'][field]
1388
+ # Convert complex objects to string representation
1389
+ if isinstance(field_value, dict):
1390
+ field_value = str(field_value)
1391
+ elif isinstance(field_value, list):
1392
+ field_value = ', '.join(str(item) for item in field_value)
1393
+ content += f"{field_value}\n"
1394
+
1395
+ # Create metadata
1396
+ metadata = {
1397
+ "id": issue["id"],
1398
+ "issue_key": issue["key"],
1399
+ "source": f"{self.base_url}/browse/{issue['key']}",
1400
+ "author": issue["fields"].get("reporter", {}).get("emailAddress") if issue["fields"].get("reporter") else None,
1401
+ "status": issue["fields"].get("status", {}).get("name") if issue["fields"].get("status") else None,
1402
+ "updated_on": issue["fields"].get("updated"),
1403
+ "created_on": issue["fields"].get("created"),
1404
+ "project": issue["fields"].get("project", {}).get("key") if issue["fields"].get("project") else None,
1405
+ "issuetype": issue["fields"].get("issuetype", {}).get("name") if issue["fields"].get("issuetype") else None,
1406
+ "type": "jira_issue",
1407
+ }
1408
+
1409
+ return Document(page_content=content, metadata=metadata)
1410
+
1411
+ except Exception as e:
1412
+ logger.error(f"Error processing issue {issue.get('key', 'unknown')}: {str(e)}")
1413
+ return None
1414
+
1415
+ def _index_tool_params(self, **kwargs) -> dict[str, tuple[type, Field]]:
1416
+ return {
1417
+ 'jql': (Optional[str], Field(
1418
+ description="JQL query to filter issues. If not provided, all accessible issues will be indexed. Examples: 'project=PROJ', 'parentEpic=EPIC-123', 'status=Open'",
1419
+ default=None)),
1420
+ 'fields_to_extract': (Optional[List[str]],
1421
+ Field(description="Additional fields to extract from issues", default=None)),
1422
+ 'fields_to_index': (Optional[List[str]],
1423
+ Field(description="Additional fields to include in indexed content", default=None)),
1424
+ 'include_attachments': (Optional[bool],
1425
+ Field(description="Whether to include attachment content in indexing",
1426
+ default=False)),
1427
+ 'max_total_issues': (Optional[int], Field(description="Maximum number of issues to index", default=1000)),
1428
+ 'skip_attachment_extensions': (Optional[str], Field(
1429
+ description="Comma-separated list of file extensions to skip when processing attachments",
1430
+ default=None)),
1431
+ }
1432
+
1433
+ # def index_data(self,
1434
+ # jql: Optional[str] = None,
1435
+ # fields_to_extract: Optional[List[str]] = None,
1436
+ # fields_to_index: Optional[List[str]] = None,
1437
+ # include_attachments: Optional[bool] = False,
1438
+ # max_total_issues: Optional[int] = 1000,
1439
+ # skip_attachment_extensions: Optional[List[str]] = None,
1440
+ # collection_suffix: str = "",
1441
+ # progress_step: Optional[int] = None,
1442
+ # clean_index: Optional[bool] = False):
1443
+ # """
1444
+ # Index Jira issues into the vector store.
1445
+ #
1446
+ # Args:
1447
+ # jql: JQL query to filter issues. If not provided, all accessible issues will be indexed
1448
+ # fields_to_extract: Additional fields to extract from issues
1449
+ # fields_to_index: Additional fields to include in indexed content
1450
+ # include_attachments: Whether to include attachment content in indexing
1451
+ # max_total_issues: Maximum number of issues to index
1452
+ # skip_attachment_extensions: Comma-separated list of file extensions to skip when processing attachments
1453
+ # collection_suffix: Optional suffix for collection name (max 7 characters)
1454
+ # progress_step: Optional step size for progress reporting during indexing
1455
+ # clean_index: Optional flag to enforce clean existing index before indexing new data
1456
+ #
1457
+ # Returns:
1458
+ # Result message from the vector store indexing operation
1459
+ # """
1460
+ # try:
1461
+ # # Validate that at least one filter is provided
1462
+ # if not any([jql]):
1463
+ # raise ToolException("Must provide at least one of: jql to filter issues for indexing")
1464
+ #
1465
+ # # set extensions to skip for post-processing
1466
+ # self._skipped_attachment_extensions = skip_attachment_extensions if skip_attachment_extensions else []
1467
+ #
1468
+ # # Get embeddings
1469
+ # from ...runtime.langchain.interfaces.llm_processor import get_embeddings
1470
+ # embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
1471
+ #
1472
+ # # Initialize vector store
1473
+ # vs = self._init_vector_store(collection_suffix, embeddings=embedding)
1474
+ #
1475
+ # # Prepare parameters for the loader
1476
+ # loader_params = {
1477
+ # 'jql': jql,
1478
+ # 'fields_to_extract': fields_to_extract,
1479
+ # 'fields_to_index': fields_to_index,
1480
+ # 'include_attachments': include_attachments,
1481
+ # 'max_total_issues': max_total_issues,
1482
+ # 'skip_attachment_extensions': skip_attachment_extensions,
1483
+ # }
1484
+ #
1485
+ # # Load documents using _base_loader
1486
+ # docs = self._base_loader(**loader_params)
1487
+ #
1488
+ # if not docs:
1489
+ # return "No Jira issues found matching the specified criteria."
1490
+ #
1491
+ # docs = list(docs) # Convert generator to list for logging and indexing
1492
+ # logger.info(f"Loaded {len(docs)} Jira issues for indexing")
1493
+ #
1494
+ # # Index the documents
1495
+ # result = vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
1496
+ #
1497
+ # return f"Successfully indexed {len(docs)} Jira issues. {result}"
1498
+ #
1499
+ # except Exception as e:
1500
+ # logger.error(f"Error indexing Jira issues: {str(e)}")
1501
+ # raise ToolException(f"Error indexing Jira issues: {str(e)}")
1502
+
1503
+ @extend_with_vector_tools
1224
1504
  def get_available_tools(self):
1225
1505
  return [
1226
1506
  {
@@ -15,7 +15,13 @@ def get_tools(tool):
15
15
  client_id=tool['settings'].get('client_id', None),
16
16
  client_secret=tool['settings'].get('client_secret', None),
17
17
  toolkit_name=tool.get('toolkit_name'),
18
- llm=tool['settings'].get('llm'))
18
+ llm=tool['settings'].get('llm'),
19
+ # indexer settings
20
+ connection_string=tool['settings'].get('connection_string', None),
21
+ collection_name=f"{tool.get('toolkit_name')}_{str(tool['id'])}",
22
+ embedding_model="HuggingFaceEmbeddings",
23
+ embedding_model_params={"model_name": "sentence-transformers/all-MiniLM-L6-v2"},
24
+ vectorstore_type="PGVector")
19
25
  .get_tools())
20
26
 
21
27
 
@@ -33,6 +39,10 @@ class SharepointToolkit(BaseToolkit):
33
39
  client_id=(str, Field(description="Client ID")),
34
40
  client_secret=(SecretStr, Field(description="Client Secret", json_schema_extra={'secret': True})),
35
41
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
42
+ # indexer settings
43
+ connection_string = (Optional[SecretStr], Field(description="Connection string for vectorstore",
44
+ default=None,
45
+ json_schema_extra={'secret': True})),
36
46
  __config__=ConfigDict(json_schema_extra={
37
47
  'metadata': {
38
48
  "label": "Sharepoint", "icon_url": "sharepoint.svg",
@@ -1,17 +1,15 @@
1
1
  import json
2
2
  import logging
3
- from typing import Optional, List, Dict, Any, Generator
3
+ from typing import Optional, List, Generator
4
4
 
5
- from ..chunkers import markdown_chunker
6
- from ..utils.content_parser import parse_file_content
5
+ from langchain_core.documents import Document
7
6
  from langchain_core.tools import ToolException
8
7
  from office365.runtime.auth.client_credential import ClientCredential
9
8
  from office365.sharepoint.client_context import ClientContext
10
9
  from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
11
10
 
12
- from ..elitea_base import BaseToolApiWrapper, BaseIndexParams, BaseVectorStoreToolApiWrapper
13
- from ...runtime.langchain.interfaces.llm_processor import get_embeddings
14
- from langchain_core.documents import Document
11
+ from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
12
+ from ..utils.content_parser import parse_file_content
15
13
 
16
14
  NoInput = create_model(
17
15
  "NoInput"
@@ -39,15 +37,6 @@ ReadDocument = create_model(
39
37
  default=None))
40
38
  )
41
39
 
42
- indexData = create_model(
43
- "indexData",
44
- __base__=BaseIndexParams,
45
- progress_step=(Optional[int], Field(default=None, ge=0, le=100,
46
- description="Optional step size for progress reporting during indexing")),
47
- clean_index=(Optional[bool], Field(default=False,
48
- description="Optional flag to enforce clean existing index before indexing new data")),
49
- )
50
-
51
40
 
52
41
  class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
53
42
  site_url: str
@@ -56,13 +45,6 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
56
45
  token: SecretStr = None
57
46
  _client: Optional[ClientContext] = PrivateAttr() # Private attribute for the office365 client
58
47
 
59
- llm: Any = None
60
- connection_string: Optional[SecretStr] = None
61
- collection_name: Optional[str] = None
62
- embedding_model: Optional[str] = "HuggingFaceEmbeddings"
63
- embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
64
- vectorstore_type: Optional[str] = "PGVector"
65
-
66
48
  @model_validator(mode='before')
67
49
  @classmethod
68
50
  def validate_toolkit(cls, values):
@@ -143,7 +125,11 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
143
125
  logging.error(f"Failed to load files from sharepoint: {e}")
144
126
  return ToolException("Can not get files. Please, double check folder name and read permissions.")
145
127
 
146
- def read_file(self, path, is_capture_image: bool = False, page_number: int = None, sheet_name: str=None):
128
+ def read_file(self, path,
129
+ is_capture_image: bool = False,
130
+ page_number: int = None,
131
+ sheet_name: str = None,
132
+ excel_by_sheets: bool = False):
147
133
  """ Reads file located at the specified server-relative path. """
148
134
  try:
149
135
  file = self._client.web.get_file_by_server_relative_path(path)
@@ -159,9 +145,10 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
159
145
  is_capture_image=is_capture_image,
160
146
  page_number=page_number,
161
147
  sheet_name=sheet_name,
148
+ excel_by_sheets=excel_by_sheets,
162
149
  llm=self.llm)
163
150
 
164
- def _base_loader(self) -> List[Document]:
151
+ def _base_loader(self, **kwargs) -> List[Document]:
165
152
  try:
166
153
  all_files = self.get_files_list()
167
154
  except Exception as e:
@@ -170,35 +157,24 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
170
157
  docs: List[Document] = []
171
158
  for file in all_files:
172
159
  metadata = {
173
- ("updated_at" if k == "Modified" else k): str(v)
160
+ ("updated_on" if k == "Modified" else k): str(v)
174
161
  for k, v in file.items()
175
162
  }
176
163
  docs.append(Document(page_content="", metadata=metadata))
177
164
  return docs
178
165
 
179
- def index_data(self,
180
- collection_suffix: str = '',
181
- progress_step: int = None,
182
- clean_index: bool = False):
183
- docs = self._base_loader()
184
- embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
185
- vs = self._init_vector_store(collection_suffix, embeddings=embedding)
186
- return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
187
-
188
166
  def _process_document(self, document: Document) -> Generator[Document, None, None]:
189
- config = {
190
- "max_tokens": self.llm.model_config.get('max_tokens', 512),
191
- "token_overlap": self.llm.model_config.get('token_overlap',
192
- int(self.llm.model_config.get('max_tokens', 512) * 0.05))
193
- }
194
- chunks = markdown_chunker(file_content_generator=self._generate_file_content(document), config=config)
195
- yield from chunks
196
-
197
- def _generate_file_content(self, document: Document) -> Generator[Document, None, None]:
198
- page_content = self.read_file(document.metadata['Path'], is_capture_image=True)
199
- document.page_content = json.dumps(str(page_content))
200
- yield document
201
-
167
+ page_content = self.read_file(document.metadata['Path'], is_capture_image=True, excel_by_sheets=True)
168
+ if isinstance(page_content, dict):
169
+ for key, value in page_content.items():
170
+ metadata = document.metadata
171
+ metadata['page'] = key
172
+ yield Document(page_content=str(value), metadata=metadata)
173
+ else:
174
+ document.page_content = json.dumps(str(page_content))
175
+ yield document
176
+
177
+ @extend_with_vector_tools
202
178
  def get_available_tools(self):
203
179
  return [
204
180
  {
@@ -218,11 +194,5 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
218
194
  "description": self.read_file.__doc__,
219
195
  "args_schema": ReadDocument,
220
196
  "ref": self.read_file
221
- },
222
- {
223
- "name": "index_data",
224
- "ref": self.index_data,
225
- "description": self.index_data.__doc__,
226
- "args_schema": indexData,
227
197
  }
228
198
  ]
@@ -51,6 +51,10 @@ class TestrailToolkit(BaseToolkit):
51
51
  ),
52
52
  email=(str, Field(description="User's email", json_schema_extra={'configuration': True})),
53
53
  password=(SecretStr, Field(description="User's password", json_schema_extra={'secret': True, 'configuration': True})),
54
+ # indexer settings
55
+ connection_string=(Optional[SecretStr], Field(description="Connection string for vectorstore",
56
+ default=None,
57
+ json_schema_extra={'secret': True})),
54
58
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
55
59
  __config__=ConfigDict(json_schema_extra={'metadata':
56
60
  {"label": "Testrail", "icon_url": "testrail-icon.svg",