alita-sdk 0.3.209__py3-none-any.whl → 0.3.210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. alita_sdk/runtime/clients/artifact.py +18 -4
  2. alita_sdk/runtime/langchain/document_loaders/AlitaCSVLoader.py +2 -1
  3. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +3 -3
  4. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +8 -4
  5. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
  6. alita_sdk/runtime/langchain/langraph_agent.py +1 -1
  7. alita_sdk/runtime/toolkits/artifact.py +7 -3
  8. alita_sdk/runtime/toolkits/tools.py +8 -1
  9. alita_sdk/runtime/tools/application.py +2 -0
  10. alita_sdk/runtime/tools/artifact.py +65 -8
  11. alita_sdk/runtime/tools/vectorstore.py +125 -41
  12. alita_sdk/runtime/utils/utils.py +3 -0
  13. alita_sdk/tools/ado/__init__.py +8 -0
  14. alita_sdk/tools/ado/repos/repos_wrapper.py +37 -0
  15. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +0 -7
  16. alita_sdk/tools/ado/work_item/__init__.py +4 -0
  17. alita_sdk/tools/ado/work_item/ado_wrapper.py +37 -4
  18. alita_sdk/tools/aws/delta_lake/__init__.py +1 -1
  19. alita_sdk/tools/bitbucket/__init__.py +13 -1
  20. alita_sdk/tools/bitbucket/api_wrapper.py +31 -4
  21. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +31 -0
  22. alita_sdk/tools/chunkers/code/codeparser.py +18 -10
  23. alita_sdk/tools/confluence/api_wrapper.py +35 -134
  24. alita_sdk/tools/confluence/loader.py +30 -28
  25. alita_sdk/tools/elitea_base.py +112 -11
  26. alita_sdk/tools/figma/__init__.py +13 -1
  27. alita_sdk/tools/figma/api_wrapper.py +47 -3
  28. alita_sdk/tools/github/api_wrapper.py +8 -0
  29. alita_sdk/tools/github/github_client.py +18 -0
  30. alita_sdk/tools/gitlab/__init__.py +4 -0
  31. alita_sdk/tools/gitlab/api_wrapper.py +10 -0
  32. alita_sdk/tools/google/bigquery/__init__.py +1 -1
  33. alita_sdk/tools/jira/__init__.py +21 -13
  34. alita_sdk/tools/jira/api_wrapper.py +285 -5
  35. alita_sdk/tools/sharepoint/__init__.py +11 -1
  36. alita_sdk/tools/sharepoint/api_wrapper.py +23 -53
  37. alita_sdk/tools/testrail/__init__.py +4 -0
  38. alita_sdk/tools/testrail/api_wrapper.py +21 -54
  39. alita_sdk/tools/utils/content_parser.py +72 -8
  40. alita_sdk/tools/xray/__init__.py +8 -1
  41. alita_sdk/tools/xray/api_wrapper.py +505 -14
  42. alita_sdk/tools/zephyr_scale/api_wrapper.py +5 -5
  43. {alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/METADATA +1 -1
  44. {alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/RECORD +47 -47
  45. {alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/WHEEL +0 -0
  46. {alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/licenses/LICENSE +0 -0
  47. {alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/top_level.txt +0 -0
@@ -4,17 +4,20 @@ import re
4
4
  import traceback
5
5
  from json import JSONDecodeError
6
6
  from traceback import format_exc
7
- from typing import List, Optional, Any, Dict
7
+ from typing import List, Optional, Any, Dict, Generator
8
8
  import os
9
9
 
10
10
  from atlassian import Jira
11
+ from langchain_core.documents import Document
11
12
  from langchain_core.tools import ToolException
12
13
  from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
13
14
  import requests
14
15
 
15
- from ..elitea_base import BaseToolApiWrapper
16
+ from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
16
17
  from ..llm.img_utils import ImageDescriptionCache
17
18
  from ..utils import is_cookie_token, parse_cookie_string
19
+ from ..utils.content_parser import parse_file_content, load_content_from_bytes
20
+ from ...runtime.utils.utils import IndexerKeywords
18
21
 
19
22
  logger = logging.getLogger(__name__)
20
23
 
@@ -388,7 +391,7 @@ def process_search_response(jira_url, response, payload_params: Dict[str, Any] =
388
391
 
389
392
  return str(processed_issues)
390
393
 
391
- class JiraApiWrapper(BaseToolApiWrapper):
394
+ class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
392
395
  base_url: str
393
396
  api_version: Optional[str] = "2",
394
397
  api_key: Optional[SecretStr] = None,
@@ -402,7 +405,6 @@ class JiraApiWrapper(BaseToolApiWrapper):
402
405
  _client: Jira = PrivateAttr()
403
406
  _image_cache: ImageDescriptionCache = PrivateAttr(default_factory=lambda: ImageDescriptionCache(max_size=50))
404
407
  issue_search_pattern: str = r'/rest/api/\d+/search'
405
- llm: Any = None
406
408
 
407
409
  @model_validator(mode='before')
408
410
  @classmethod
@@ -1061,7 +1063,7 @@ class JiraApiWrapper(BaseToolApiWrapper):
1061
1063
  def process_image_match(match):
1062
1064
  """Process each image reference and get its contextual description"""
1063
1065
  image_ref = match.group(1)
1064
- full_match = match.group(0) # The complete image reference with markers
1066
+ full_match = match.group(0) # The complete image reference with markers
1065
1067
 
1066
1068
  logger.info(f"Processing image reference: {image_ref} (full match: {full_match})")
1067
1069
 
@@ -1221,6 +1223,284 @@ class JiraApiWrapper(BaseToolApiWrapper):
1221
1223
  logger.error(f"Error processing comments with images: {stacktrace}")
1222
1224
  return f"Error processing comments with images: {str(e)}"
1223
1225
 
1226
+ def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
1227
+ """
1228
+ Base loader for Jira issues, used to load issues as documents.
1229
+ Uses the existing Jira client instance to fetch and process issues.
1230
+ """
1231
+ # Extract parameters from kwargs
1232
+ jql = kwargs.get('jql')
1233
+ fields_to_extract = kwargs.get('fields_to_extract')
1234
+ fields_to_index = kwargs.get('fields_to_index')
1235
+ include_attachments = kwargs.get('include_attachments', False)
1236
+ max_total_issues = kwargs.get('max_total_issues', 1000)
1237
+
1238
+ # set values for skipped attachment extensions
1239
+ self._skipped_attachment_extensions = kwargs.get('skip_attachment_extensions', [])
1240
+ self._included_fields = fields_to_extract.copy() if fields_to_extract else []
1241
+
1242
+ try:
1243
+ # Prepare fields to extract
1244
+ DEFAULT_FIELDS = ['status', 'summary', 'reporter', 'description', 'created', 'updated', 'assignee', 'project', 'issuetype']
1245
+ fields = DEFAULT_FIELDS.copy()
1246
+
1247
+ if fields_to_extract:
1248
+ fields.extend(fields_to_extract)
1249
+
1250
+ if include_attachments:
1251
+ fields.append('attachment')
1252
+
1253
+ # Use provided JQL query or default to all issues
1254
+ if not jql:
1255
+ jql_query = "ORDER BY updated DESC" # Default to get all issues ordered by update time
1256
+ else:
1257
+ jql_query = jql
1258
+
1259
+ # Remove duplicates and prepare fields
1260
+ final_fields = ','.join({field.lower() for field in fields})
1261
+
1262
+ # Fetch issues using the existing Jira client
1263
+ issue_generator = self._jql_get_tickets(
1264
+ jql_query,
1265
+ fields=final_fields,
1266
+ limit=max_total_issues
1267
+ )
1268
+
1269
+ # Process each batch of issues
1270
+ for issues_batch in issue_generator:
1271
+ for issue in issues_batch:
1272
+ issue_doc = self._process_issue_for_indexing(
1273
+ issue,
1274
+ fields_to_index
1275
+ )
1276
+ if issue_doc:
1277
+ yield issue_doc
1278
+
1279
+ except Exception as e:
1280
+ logger.error(f"Error loading Jira issues: {str(e)}")
1281
+ raise ToolException(f"Unable to load Jira issues: {str(e)}")
1282
+
1283
+ def _process_document(self, base_document: Document) -> Generator[Document, None, None]:
1284
+ """
1285
+ Process a base document to extract and index Jira issues extra fields: comments, attachments, etc..
1286
+ """
1287
+
1288
+ issue_key = base_document.metadata.get('issue_key')
1289
+ # get attachments content
1290
+
1291
+ issue = self._client.issue(issue_key, fields="attachment")
1292
+ attachments = issue.get('fields', {}).get('attachment', [])
1293
+ for attachment in attachments:
1294
+ # get extension
1295
+ ext = f".{attachment['filename'].split('.')[-1].lower()}"
1296
+ if ext not in self._skipped_attachment_extensions:
1297
+ attachment_id = f"attach_{attachment['id']}"
1298
+ base_document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
1299
+ try:
1300
+ attachment_content = self._client.get_attachment_content(attachment['id'])
1301
+ except Exception as e:
1302
+ logger.error(f"Failed to download attachment {attachment['filename']} for issue {issue_key}: {str(e)}")
1303
+ attachment_content = self._client.get(path=f"secure/attachment/{attachment['id']}/{attachment['filename']}", not_json_response=True)
1304
+ content = load_content_from_bytes(attachment_content, ext, llm=self.llm) if ext not in '.pdf' \
1305
+ else parse_file_content(file_content=attachment_content, file_name=attachment['filename'], llm=self.llm, is_capture_image=True)
1306
+ if not content:
1307
+ continue
1308
+ yield Document(page_content=content,
1309
+ metadata={
1310
+ 'id': attachment_id,
1311
+ 'issue_key': issue_key,
1312
+ 'source': f"{self.base_url}/browse/{issue_key}",
1313
+ 'filename': attachment['filename'],
1314
+ 'created': attachment['created'],
1315
+ 'mimeType': attachment['mimeType'],
1316
+ 'author': attachment.get('author', {}).get('name'),
1317
+ IndexerKeywords.PARENT.value: base_document.metadata.get('id', None),
1318
+ 'type': 'attachment',
1319
+ })
1320
+
1321
+ def _jql_get_tickets(self, jql, fields="*all", start=0, limit=None, expand=None, validate_query=None):
1322
+ """
1323
+ Generator that yields batches of Jira issues based on JQL query.
1324
+ """
1325
+ from atlassian.errors import ApiError
1326
+
1327
+ params = {}
1328
+ if limit is not None:
1329
+ params["maxResults"] = int(limit)
1330
+ if fields is not None:
1331
+ if isinstance(fields, (list, tuple, set)):
1332
+ fields = ",".join(fields)
1333
+ params["fields"] = fields
1334
+ if jql is not None:
1335
+ params["jql"] = jql
1336
+ if expand is not None:
1337
+ params["expand"] = expand
1338
+ if validate_query is not None:
1339
+ params["validateQuery"] = validate_query
1340
+
1341
+ url = self._client.resource_url("search")
1342
+
1343
+ while True:
1344
+ params["startAt"] = int(start)
1345
+ try:
1346
+ response = self._client.get(url, params=params)
1347
+ if not response:
1348
+ break
1349
+ except ApiError as e:
1350
+ error_message = f"Jira API error: {str(e)}"
1351
+ raise ValueError(f"Failed to fetch issues from Jira: {error_message}")
1352
+
1353
+ issues = response["issues"]
1354
+ yield issues
1355
+ if limit is not None and len(response["issues"]) + start >= limit:
1356
+ break
1357
+ if not response["issues"]:
1358
+ break
1359
+ start += len(issues)
1360
+
1361
+ def _process_issue_for_indexing(self, issue: dict, fields_to_index=None) -> Document:
1362
+ """
1363
+ Process a single Jira issue into a Document for indexing.
1364
+ Copied and adapted from AlitaJiraLoader logic.
1365
+ """
1366
+ try:
1367
+ # Build content starting with summary
1368
+ content = f"{issue['fields']['summary']}\n"
1369
+
1370
+ # Add description if present
1371
+ description = issue['fields'].get('description', '')
1372
+ if description:
1373
+ content += f"{description}\n"
1374
+ else:
1375
+ # If no description, still create document but with minimal content
1376
+ logger.debug(f"Issue {issue.get('key', 'unknown')} has no description")
1377
+
1378
+ # Add comments if present
1379
+ if 'comment' in issue['fields'] and issue['fields']['comment'].get('comments'):
1380
+ for comment in issue['fields']['comment']['comments']:
1381
+ content += f"{comment['body']}\n"
1382
+
1383
+ # Add additional fields to index
1384
+ if fields_to_index:
1385
+ for field in fields_to_index:
1386
+ if field in issue['fields'] and issue['fields'][field]:
1387
+ field_value = issue['fields'][field]
1388
+ # Convert complex objects to string representation
1389
+ if isinstance(field_value, dict):
1390
+ field_value = str(field_value)
1391
+ elif isinstance(field_value, list):
1392
+ field_value = ', '.join(str(item) for item in field_value)
1393
+ content += f"{field_value}\n"
1394
+
1395
+ # Create metadata
1396
+ metadata = {
1397
+ "id": issue["id"],
1398
+ "issue_key": issue["key"],
1399
+ "source": f"{self.base_url}/browse/{issue['key']}",
1400
+ "author": issue["fields"].get("reporter", {}).get("emailAddress") if issue["fields"].get("reporter") else None,
1401
+ "status": issue["fields"].get("status", {}).get("name") if issue["fields"].get("status") else None,
1402
+ "updated_on": issue["fields"].get("updated"),
1403
+ "created_on": issue["fields"].get("created"),
1404
+ "project": issue["fields"].get("project", {}).get("key") if issue["fields"].get("project") else None,
1405
+ "issuetype": issue["fields"].get("issuetype", {}).get("name") if issue["fields"].get("issuetype") else None,
1406
+ "type": "jira_issue",
1407
+ }
1408
+
1409
+ return Document(page_content=content, metadata=metadata)
1410
+
1411
+ except Exception as e:
1412
+ logger.error(f"Error processing issue {issue.get('key', 'unknown')}: {str(e)}")
1413
+ return None
1414
+
1415
+ def _index_tool_params(self, **kwargs) -> dict[str, tuple[type, Field]]:
1416
+ return {
1417
+ 'jql': (Optional[str], Field(
1418
+ description="JQL query to filter issues. If not provided, all accessible issues will be indexed. Examples: 'project=PROJ', 'parentEpic=EPIC-123', 'status=Open'",
1419
+ default=None)),
1420
+ 'fields_to_extract': (Optional[List[str]],
1421
+ Field(description="Additional fields to extract from issues", default=None)),
1422
+ 'fields_to_index': (Optional[List[str]],
1423
+ Field(description="Additional fields to include in indexed content", default=None)),
1424
+ 'include_attachments': (Optional[bool],
1425
+ Field(description="Whether to include attachment content in indexing",
1426
+ default=False)),
1427
+ 'max_total_issues': (Optional[int], Field(description="Maximum number of issues to index", default=1000)),
1428
+ 'skip_attachment_extensions': (Optional[str], Field(
1429
+ description="Comma-separated list of file extensions to skip when processing attachments",
1430
+ default=None)),
1431
+ }
1432
+
1433
+ # def index_data(self,
1434
+ # jql: Optional[str] = None,
1435
+ # fields_to_extract: Optional[List[str]] = None,
1436
+ # fields_to_index: Optional[List[str]] = None,
1437
+ # include_attachments: Optional[bool] = False,
1438
+ # max_total_issues: Optional[int] = 1000,
1439
+ # skip_attachment_extensions: Optional[List[str]] = None,
1440
+ # collection_suffix: str = "",
1441
+ # progress_step: Optional[int] = None,
1442
+ # clean_index: Optional[bool] = False):
1443
+ # """
1444
+ # Index Jira issues into the vector store.
1445
+ #
1446
+ # Args:
1447
+ # jql: JQL query to filter issues. If not provided, all accessible issues will be indexed
1448
+ # fields_to_extract: Additional fields to extract from issues
1449
+ # fields_to_index: Additional fields to include in indexed content
1450
+ # include_attachments: Whether to include attachment content in indexing
1451
+ # max_total_issues: Maximum number of issues to index
1452
+ # skip_attachment_extensions: Comma-separated list of file extensions to skip when processing attachments
1453
+ # collection_suffix: Optional suffix for collection name (max 7 characters)
1454
+ # progress_step: Optional step size for progress reporting during indexing
1455
+ # clean_index: Optional flag to enforce clean existing index before indexing new data
1456
+ #
1457
+ # Returns:
1458
+ # Result message from the vector store indexing operation
1459
+ # """
1460
+ # try:
1461
+ # # Validate that at least one filter is provided
1462
+ # if not any([jql]):
1463
+ # raise ToolException("Must provide at least one of: jql to filter issues for indexing")
1464
+ #
1465
+ # # set extensions to skip for post-processing
1466
+ # self._skipped_attachment_extensions = skip_attachment_extensions if skip_attachment_extensions else []
1467
+ #
1468
+ # # Get embeddings
1469
+ # from ...runtime.langchain.interfaces.llm_processor import get_embeddings
1470
+ # embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
1471
+ #
1472
+ # # Initialize vector store
1473
+ # vs = self._init_vector_store(collection_suffix, embeddings=embedding)
1474
+ #
1475
+ # # Prepare parameters for the loader
1476
+ # loader_params = {
1477
+ # 'jql': jql,
1478
+ # 'fields_to_extract': fields_to_extract,
1479
+ # 'fields_to_index': fields_to_index,
1480
+ # 'include_attachments': include_attachments,
1481
+ # 'max_total_issues': max_total_issues,
1482
+ # 'skip_attachment_extensions': skip_attachment_extensions,
1483
+ # }
1484
+ #
1485
+ # # Load documents using _base_loader
1486
+ # docs = self._base_loader(**loader_params)
1487
+ #
1488
+ # if not docs:
1489
+ # return "No Jira issues found matching the specified criteria."
1490
+ #
1491
+ # docs = list(docs) # Convert generator to list for logging and indexing
1492
+ # logger.info(f"Loaded {len(docs)} Jira issues for indexing")
1493
+ #
1494
+ # # Index the documents
1495
+ # result = vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
1496
+ #
1497
+ # return f"Successfully indexed {len(docs)} Jira issues. {result}"
1498
+ #
1499
+ # except Exception as e:
1500
+ # logger.error(f"Error indexing Jira issues: {str(e)}")
1501
+ # raise ToolException(f"Error indexing Jira issues: {str(e)}")
1502
+
1503
+ @extend_with_vector_tools
1224
1504
  def get_available_tools(self):
1225
1505
  return [
1226
1506
  {
@@ -15,7 +15,13 @@ def get_tools(tool):
15
15
  client_id=tool['settings'].get('client_id', None),
16
16
  client_secret=tool['settings'].get('client_secret', None),
17
17
  toolkit_name=tool.get('toolkit_name'),
18
- llm=tool['settings'].get('llm'))
18
+ llm=tool['settings'].get('llm'),
19
+ # indexer settings
20
+ connection_string=tool['settings'].get('connection_string', None),
21
+ collection_name=f"{tool.get('toolkit_name')}_{str(tool['id'])}",
22
+ embedding_model="HuggingFaceEmbeddings",
23
+ embedding_model_params={"model_name": "sentence-transformers/all-MiniLM-L6-v2"},
24
+ vectorstore_type="PGVector")
19
25
  .get_tools())
20
26
 
21
27
 
@@ -33,6 +39,10 @@ class SharepointToolkit(BaseToolkit):
33
39
  client_id=(str, Field(description="Client ID")),
34
40
  client_secret=(SecretStr, Field(description="Client Secret", json_schema_extra={'secret': True})),
35
41
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
42
+ # indexer settings
43
+ connection_string = (Optional[SecretStr], Field(description="Connection string for vectorstore",
44
+ default=None,
45
+ json_schema_extra={'secret': True})),
36
46
  __config__=ConfigDict(json_schema_extra={
37
47
  'metadata': {
38
48
  "label": "Sharepoint", "icon_url": "sharepoint.svg",
@@ -1,17 +1,15 @@
1
1
  import json
2
2
  import logging
3
- from typing import Optional, List, Dict, Any, Generator
3
+ from typing import Optional, List, Generator
4
4
 
5
- from ..chunkers import markdown_chunker
6
- from ..utils.content_parser import parse_file_content
5
+ from langchain_core.documents import Document
7
6
  from langchain_core.tools import ToolException
8
7
  from office365.runtime.auth.client_credential import ClientCredential
9
8
  from office365.sharepoint.client_context import ClientContext
10
9
  from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
11
10
 
12
- from ..elitea_base import BaseToolApiWrapper, BaseIndexParams, BaseVectorStoreToolApiWrapper
13
- from ...runtime.langchain.interfaces.llm_processor import get_embeddings
14
- from langchain_core.documents import Document
11
+ from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
12
+ from ..utils.content_parser import parse_file_content
15
13
 
16
14
  NoInput = create_model(
17
15
  "NoInput"
@@ -39,15 +37,6 @@ ReadDocument = create_model(
39
37
  default=None))
40
38
  )
41
39
 
42
- indexData = create_model(
43
- "indexData",
44
- __base__=BaseIndexParams,
45
- progress_step=(Optional[int], Field(default=None, ge=0, le=100,
46
- description="Optional step size for progress reporting during indexing")),
47
- clean_index=(Optional[bool], Field(default=False,
48
- description="Optional flag to enforce clean existing index before indexing new data")),
49
- )
50
-
51
40
 
52
41
  class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
53
42
  site_url: str
@@ -56,13 +45,6 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
56
45
  token: SecretStr = None
57
46
  _client: Optional[ClientContext] = PrivateAttr() # Private attribute for the office365 client
58
47
 
59
- llm: Any = None
60
- connection_string: Optional[SecretStr] = None
61
- collection_name: Optional[str] = None
62
- embedding_model: Optional[str] = "HuggingFaceEmbeddings"
63
- embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
64
- vectorstore_type: Optional[str] = "PGVector"
65
-
66
48
  @model_validator(mode='before')
67
49
  @classmethod
68
50
  def validate_toolkit(cls, values):
@@ -143,7 +125,11 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
143
125
  logging.error(f"Failed to load files from sharepoint: {e}")
144
126
  return ToolException("Can not get files. Please, double check folder name and read permissions.")
145
127
 
146
- def read_file(self, path, is_capture_image: bool = False, page_number: int = None, sheet_name: str=None):
128
+ def read_file(self, path,
129
+ is_capture_image: bool = False,
130
+ page_number: int = None,
131
+ sheet_name: str = None,
132
+ excel_by_sheets: bool = False):
147
133
  """ Reads file located at the specified server-relative path. """
148
134
  try:
149
135
  file = self._client.web.get_file_by_server_relative_path(path)
@@ -159,9 +145,10 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
159
145
  is_capture_image=is_capture_image,
160
146
  page_number=page_number,
161
147
  sheet_name=sheet_name,
148
+ excel_by_sheets=excel_by_sheets,
162
149
  llm=self.llm)
163
150
 
164
- def _base_loader(self) -> List[Document]:
151
+ def _base_loader(self, **kwargs) -> List[Document]:
165
152
  try:
166
153
  all_files = self.get_files_list()
167
154
  except Exception as e:
@@ -170,35 +157,24 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
170
157
  docs: List[Document] = []
171
158
  for file in all_files:
172
159
  metadata = {
173
- ("updated_at" if k == "Modified" else k): str(v)
160
+ ("updated_on" if k == "Modified" else k): str(v)
174
161
  for k, v in file.items()
175
162
  }
176
163
  docs.append(Document(page_content="", metadata=metadata))
177
164
  return docs
178
165
 
179
- def index_data(self,
180
- collection_suffix: str = '',
181
- progress_step: int = None,
182
- clean_index: bool = False):
183
- docs = self._base_loader()
184
- embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
185
- vs = self._init_vector_store(collection_suffix, embeddings=embedding)
186
- return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
187
-
188
166
  def _process_document(self, document: Document) -> Generator[Document, None, None]:
189
- config = {
190
- "max_tokens": self.llm.model_config.get('max_tokens', 512),
191
- "token_overlap": self.llm.model_config.get('token_overlap',
192
- int(self.llm.model_config.get('max_tokens', 512) * 0.05))
193
- }
194
- chunks = markdown_chunker(file_content_generator=self._generate_file_content(document), config=config)
195
- yield from chunks
196
-
197
- def _generate_file_content(self, document: Document) -> Generator[Document, None, None]:
198
- page_content = self.read_file(document.metadata['Path'], is_capture_image=True)
199
- document.page_content = json.dumps(str(page_content))
200
- yield document
201
-
167
+ page_content = self.read_file(document.metadata['Path'], is_capture_image=True, excel_by_sheets=True)
168
+ if isinstance(page_content, dict):
169
+ for key, value in page_content.items():
170
+ metadata = document.metadata
171
+ metadata['page'] = key
172
+ yield Document(page_content=str(value), metadata=metadata)
173
+ else:
174
+ document.page_content = json.dumps(str(page_content))
175
+ yield document
176
+
177
+ @extend_with_vector_tools
202
178
  def get_available_tools(self):
203
179
  return [
204
180
  {
@@ -218,11 +194,5 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
218
194
  "description": self.read_file.__doc__,
219
195
  "args_schema": ReadDocument,
220
196
  "ref": self.read_file
221
- },
222
- {
223
- "name": "index_data",
224
- "ref": self.index_data,
225
- "description": self.index_data.__doc__,
226
- "args_schema": indexData,
227
197
  }
228
198
  ]
@@ -51,6 +51,10 @@ class TestrailToolkit(BaseToolkit):
51
51
  ),
52
52
  email=(str, Field(description="User's email", json_schema_extra={'configuration': True})),
53
53
  password=(SecretStr, Field(description="User's password", json_schema_extra={'secret': True, 'configuration': True})),
54
+ # indexer settings
55
+ connection_string=(Optional[SecretStr], Field(description="Connection string for vectorstore",
56
+ default=None,
57
+ json_schema_extra={'secret': True})),
54
58
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
55
59
  __config__=ConfigDict(json_schema_extra={'metadata':
56
60
  {"label": "Testrail", "icon_url": "testrail-icon.svg",
@@ -4,10 +4,11 @@ from typing import Dict, List, Optional, Union, Any, Generator
4
4
 
5
5
  import pandas as pd
6
6
  from langchain_core.tools import ToolException
7
+ from openai import BadRequestError
7
8
  from pydantic import SecretStr, create_model, model_validator
8
9
  from pydantic.fields import Field, PrivateAttr
9
10
  from testrail_api import StatusCodeError, TestRailAPI
10
- from ..elitea_base import BaseVectorStoreToolApiWrapper, BaseIndexParams
11
+ from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
11
12
  from langchain_core.documents import Document
12
13
 
13
14
  from ...runtime.utils.utils import IndexerKeywords
@@ -289,20 +290,6 @@ updateCase = create_model(
289
290
  ),
290
291
  )
291
292
 
292
- # Schema for indexing TestRail data into vector store
293
- indexData = create_model(
294
- "indexData",
295
- __base__=BaseIndexParams,
296
- project_id=(str, Field(description="TestRail project ID to index data from")),
297
- suite_id=(Optional[str], Field(default=None, description="Optional TestRail suite ID to filter test cases")),
298
- section_id=(Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
299
- title_keyword=(Optional[str], Field(default=None, description="Optional keyword to filter test cases by title")),
300
- progress_step=(Optional[int],
301
- Field(default=None, ge=0, le=100, description="Optional step size for progress reporting during indexing")),
302
- clean_index=(Optional[bool],
303
- Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
304
- )
305
-
306
293
  SUPPORTED_KEYS = {
307
294
  "id", "title", "section_id", "template_id", "type_id", "priority_id", "milestone_id",
308
295
  "refs", "created_by", "created_on", "updated_by", "updated_on", "estimate",
@@ -317,14 +304,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
317
304
  password: Optional[SecretStr] = None,
318
305
  email: Optional[str] = None,
319
306
  _client: Optional[TestRailAPI] = PrivateAttr() # Private attribute for the TestRail client
320
- llm: Any = None
321
-
322
- connection_string: Optional[SecretStr] = None
323
- collection_name: Optional[str] = None
324
- embedding_model: Optional[str] = "HuggingFaceEmbeddings"
325
- embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
326
- vectorstore_type: Optional[str] = "PGVector"
327
-
328
307
 
329
308
  @model_validator(mode="before")
330
309
  @classmethod
@@ -490,7 +469,8 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
490
469
  project_id=project_id, **params
491
470
  )
492
471
 
493
- cases = extracted_cases.get("cases")
472
+ # support old versions of testrail_api
473
+ cases = extracted_cases.get("cases") if isinstance(extracted_cases, dict) else extracted_cases
494
474
 
495
475
  if cases is None:
496
476
  return ToolException("No test cases found in the extracted data.")
@@ -554,7 +534,8 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
554
534
  def _base_loader(self, project_id: str,
555
535
  suite_id: Optional[str] = None,
556
536
  section_id: Optional[int] = None,
557
- title_keyword: Optional[str] = None
537
+ title_keyword: Optional[str] = None,
538
+ **kwargs: Any
558
539
  ) -> Generator[Document, None, None]:
559
540
  try:
560
541
  if suite_id:
@@ -577,7 +558,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
577
558
  'title': case.get('title', ''),
578
559
  'suite_id': suite_id or case.get('suite_id', ''),
579
560
  'id': str(case.get('id', '')),
580
- 'updated_on': case.get('updated_on') or -1,
561
+ IndexerKeywords.UPDATED_ON.value: case.get('updated_on') or -1,
581
562
  'labels': [lbl['title'] for lbl in case.get('labels', [])],
582
563
  'type': case.get('type_id') or -1,
583
564
  'priority': case.get('priority_id') or -1,
@@ -588,22 +569,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
588
569
  'entity_type': 'test_case',
589
570
  })
590
571
 
591
- def index_data(
592
- self,
593
- project_id: str,
594
- suite_id: Optional[str] = None,
595
- collection_suffix: str = "",
596
- section_id: Optional[int] = None,
597
- title_keyword: Optional[str] = None,
598
- progress_step: Optional[int] = None,
599
- clean_index: Optional[bool] = False
600
- ):
601
- """Load TestRail test cases into the vector store."""
602
- docs = self._base_loader(project_id, suite_id, section_id, title_keyword)
603
- embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
604
- vs = self._init_vector_store(collection_suffix, embeddings=embedding)
605
- return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
606
-
607
572
  def _process_document(self, document: Document) -> Generator[Document, None, None]:
608
573
  """
609
574
  Process an existing base document to extract relevant metadata for full document preparation.
@@ -626,16 +591,15 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
626
591
 
627
592
  # process each attachment to extract its content
628
593
  for attachment in attachments:
629
- attachment_id = attachment['id']
594
+ attachment_id = f"attach_{attachment['id']}"
630
595
  # add attachment id to metadata of parent
631
596
  document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
632
-
633
597
  # TODO: pass it to chunkers
634
598
  yield Document(page_content=self._process_attachment(attachment),
635
599
  metadata={
636
600
  'project_id': base_data.get('project_id', ''),
637
- IndexerKeywords.PARENT.value: case_id,
638
- 'id': attachment_id,
601
+ 'id': str(attachment_id),
602
+ IndexerKeywords.PARENT.value: str(case_id),
639
603
  'filename': attachment['filename'],
640
604
  'filetype': attachment['filetype'],
641
605
  'created_on': attachment['created_on'],
@@ -663,10 +627,20 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
663
627
  try:
664
628
  attachment_path = self._client.attachments.get_attachment(attachment_id=attachment['id'], path=f"./{attachment['filename']}")
665
629
  page_content = parse_file_content(file_name=attachment['filename'], file_content=attachment_path.read_bytes(), llm=self.llm, is_capture_image=True)
630
+ except BadRequestError as ai_e:
631
+ logger.error(f"Unable to parse page's content with type: {attachment['filetype']} due to AI service issues: {ai_e}")
666
632
  except Exception as e:
667
633
  logger.error(f"Unable to parse page's content with type: {attachment['filetype']}: {e}")
668
634
  return page_content
669
635
 
636
+ def _index_tool_params(self):
637
+ return {
638
+ 'project_id': (str, Field(description="TestRail project ID to index data from")),
639
+ 'suite_id': (Optional[str],
640
+ Field(default=None, description="Optional TestRail suite ID to filter test cases")),
641
+ 'section_id': (Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
642
+ }
643
+
670
644
  def _to_markup(self, data: List[Dict], output_format: str) -> str:
671
645
  """
672
646
  Converts the given data into the specified format: 'json', 'csv', or 'markdown'.
@@ -694,6 +668,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
694
668
  if output_format == "markdown":
695
669
  return df.to_markdown(index=False)
696
670
 
671
+ @extend_with_vector_tools
697
672
  def get_available_tools(self):
698
673
  tools = [
699
674
  {
@@ -731,14 +706,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
731
706
  "ref": self.update_case,
732
707
  "description": self.update_case.__doc__,
733
708
  "args_schema": updateCase,
734
- },
735
- {
736
- "name": "index_data",
737
- "ref": self.index_data,
738
- "description": self.index_data.__doc__,
739
- "args_schema": indexData,
740
709
  }
741
710
  ]
742
- # Add vector search from base
743
- tools.extend(self._get_vector_search_tools())
744
711
  return tools