alita-sdk 0.3.209__py3-none-any.whl → 0.3.210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/runtime/clients/artifact.py +18 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaCSVLoader.py +2 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +3 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +8 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
- alita_sdk/runtime/langchain/langraph_agent.py +1 -1
- alita_sdk/runtime/toolkits/artifact.py +7 -3
- alita_sdk/runtime/toolkits/tools.py +8 -1
- alita_sdk/runtime/tools/application.py +2 -0
- alita_sdk/runtime/tools/artifact.py +65 -8
- alita_sdk/runtime/tools/vectorstore.py +125 -41
- alita_sdk/runtime/utils/utils.py +3 -0
- alita_sdk/tools/ado/__init__.py +8 -0
- alita_sdk/tools/ado/repos/repos_wrapper.py +37 -0
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +0 -7
- alita_sdk/tools/ado/work_item/__init__.py +4 -0
- alita_sdk/tools/ado/work_item/ado_wrapper.py +37 -4
- alita_sdk/tools/aws/delta_lake/__init__.py +1 -1
- alita_sdk/tools/bitbucket/__init__.py +13 -1
- alita_sdk/tools/bitbucket/api_wrapper.py +31 -4
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +31 -0
- alita_sdk/tools/chunkers/code/codeparser.py +18 -10
- alita_sdk/tools/confluence/api_wrapper.py +35 -134
- alita_sdk/tools/confluence/loader.py +30 -28
- alita_sdk/tools/elitea_base.py +112 -11
- alita_sdk/tools/figma/__init__.py +13 -1
- alita_sdk/tools/figma/api_wrapper.py +47 -3
- alita_sdk/tools/github/api_wrapper.py +8 -0
- alita_sdk/tools/github/github_client.py +18 -0
- alita_sdk/tools/gitlab/__init__.py +4 -0
- alita_sdk/tools/gitlab/api_wrapper.py +10 -0
- alita_sdk/tools/google/bigquery/__init__.py +1 -1
- alita_sdk/tools/jira/__init__.py +21 -13
- alita_sdk/tools/jira/api_wrapper.py +285 -5
- alita_sdk/tools/sharepoint/__init__.py +11 -1
- alita_sdk/tools/sharepoint/api_wrapper.py +23 -53
- alita_sdk/tools/testrail/__init__.py +4 -0
- alita_sdk/tools/testrail/api_wrapper.py +21 -54
- alita_sdk/tools/utils/content_parser.py +72 -8
- alita_sdk/tools/xray/__init__.py +8 -1
- alita_sdk/tools/xray/api_wrapper.py +505 -14
- alita_sdk/tools/zephyr_scale/api_wrapper.py +5 -5
- {alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/RECORD +47 -47
- {alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.209.dist-info → alita_sdk-0.3.210.dist-info}/top_level.txt +0 -0
@@ -4,17 +4,20 @@ import re
|
|
4
4
|
import traceback
|
5
5
|
from json import JSONDecodeError
|
6
6
|
from traceback import format_exc
|
7
|
-
from typing import List, Optional, Any, Dict
|
7
|
+
from typing import List, Optional, Any, Dict, Generator
|
8
8
|
import os
|
9
9
|
|
10
10
|
from atlassian import Jira
|
11
|
+
from langchain_core.documents import Document
|
11
12
|
from langchain_core.tools import ToolException
|
12
13
|
from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
|
13
14
|
import requests
|
14
15
|
|
15
|
-
from ..elitea_base import
|
16
|
+
from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
|
16
17
|
from ..llm.img_utils import ImageDescriptionCache
|
17
18
|
from ..utils import is_cookie_token, parse_cookie_string
|
19
|
+
from ..utils.content_parser import parse_file_content, load_content_from_bytes
|
20
|
+
from ...runtime.utils.utils import IndexerKeywords
|
18
21
|
|
19
22
|
logger = logging.getLogger(__name__)
|
20
23
|
|
@@ -388,7 +391,7 @@ def process_search_response(jira_url, response, payload_params: Dict[str, Any] =
|
|
388
391
|
|
389
392
|
return str(processed_issues)
|
390
393
|
|
391
|
-
class JiraApiWrapper(
|
394
|
+
class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
|
392
395
|
base_url: str
|
393
396
|
api_version: Optional[str] = "2",
|
394
397
|
api_key: Optional[SecretStr] = None,
|
@@ -402,7 +405,6 @@ class JiraApiWrapper(BaseToolApiWrapper):
|
|
402
405
|
_client: Jira = PrivateAttr()
|
403
406
|
_image_cache: ImageDescriptionCache = PrivateAttr(default_factory=lambda: ImageDescriptionCache(max_size=50))
|
404
407
|
issue_search_pattern: str = r'/rest/api/\d+/search'
|
405
|
-
llm: Any = None
|
406
408
|
|
407
409
|
@model_validator(mode='before')
|
408
410
|
@classmethod
|
@@ -1061,7 +1063,7 @@ class JiraApiWrapper(BaseToolApiWrapper):
|
|
1061
1063
|
def process_image_match(match):
|
1062
1064
|
"""Process each image reference and get its contextual description"""
|
1063
1065
|
image_ref = match.group(1)
|
1064
|
-
full_match = match.group(0)
|
1066
|
+
full_match = match.group(0) # The complete image reference with markers
|
1065
1067
|
|
1066
1068
|
logger.info(f"Processing image reference: {image_ref} (full match: {full_match})")
|
1067
1069
|
|
@@ -1221,6 +1223,284 @@ class JiraApiWrapper(BaseToolApiWrapper):
|
|
1221
1223
|
logger.error(f"Error processing comments with images: {stacktrace}")
|
1222
1224
|
return f"Error processing comments with images: {str(e)}"
|
1223
1225
|
|
1226
|
+
def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
|
1227
|
+
"""
|
1228
|
+
Base loader for Jira issues, used to load issues as documents.
|
1229
|
+
Uses the existing Jira client instance to fetch and process issues.
|
1230
|
+
"""
|
1231
|
+
# Extract parameters from kwargs
|
1232
|
+
jql = kwargs.get('jql')
|
1233
|
+
fields_to_extract = kwargs.get('fields_to_extract')
|
1234
|
+
fields_to_index = kwargs.get('fields_to_index')
|
1235
|
+
include_attachments = kwargs.get('include_attachments', False)
|
1236
|
+
max_total_issues = kwargs.get('max_total_issues', 1000)
|
1237
|
+
|
1238
|
+
# set values for skipped attachment extensions
|
1239
|
+
self._skipped_attachment_extensions = kwargs.get('skip_attachment_extensions', [])
|
1240
|
+
self._included_fields = fields_to_extract.copy() if fields_to_extract else []
|
1241
|
+
|
1242
|
+
try:
|
1243
|
+
# Prepare fields to extract
|
1244
|
+
DEFAULT_FIELDS = ['status', 'summary', 'reporter', 'description', 'created', 'updated', 'assignee', 'project', 'issuetype']
|
1245
|
+
fields = DEFAULT_FIELDS.copy()
|
1246
|
+
|
1247
|
+
if fields_to_extract:
|
1248
|
+
fields.extend(fields_to_extract)
|
1249
|
+
|
1250
|
+
if include_attachments:
|
1251
|
+
fields.append('attachment')
|
1252
|
+
|
1253
|
+
# Use provided JQL query or default to all issues
|
1254
|
+
if not jql:
|
1255
|
+
jql_query = "ORDER BY updated DESC" # Default to get all issues ordered by update time
|
1256
|
+
else:
|
1257
|
+
jql_query = jql
|
1258
|
+
|
1259
|
+
# Remove duplicates and prepare fields
|
1260
|
+
final_fields = ','.join({field.lower() for field in fields})
|
1261
|
+
|
1262
|
+
# Fetch issues using the existing Jira client
|
1263
|
+
issue_generator = self._jql_get_tickets(
|
1264
|
+
jql_query,
|
1265
|
+
fields=final_fields,
|
1266
|
+
limit=max_total_issues
|
1267
|
+
)
|
1268
|
+
|
1269
|
+
# Process each batch of issues
|
1270
|
+
for issues_batch in issue_generator:
|
1271
|
+
for issue in issues_batch:
|
1272
|
+
issue_doc = self._process_issue_for_indexing(
|
1273
|
+
issue,
|
1274
|
+
fields_to_index
|
1275
|
+
)
|
1276
|
+
if issue_doc:
|
1277
|
+
yield issue_doc
|
1278
|
+
|
1279
|
+
except Exception as e:
|
1280
|
+
logger.error(f"Error loading Jira issues: {str(e)}")
|
1281
|
+
raise ToolException(f"Unable to load Jira issues: {str(e)}")
|
1282
|
+
|
1283
|
+
def _process_document(self, base_document: Document) -> Generator[Document, None, None]:
|
1284
|
+
"""
|
1285
|
+
Process a base document to extract and index Jira issues extra fields: comments, attachments, etc..
|
1286
|
+
"""
|
1287
|
+
|
1288
|
+
issue_key = base_document.metadata.get('issue_key')
|
1289
|
+
# get attachments content
|
1290
|
+
|
1291
|
+
issue = self._client.issue(issue_key, fields="attachment")
|
1292
|
+
attachments = issue.get('fields', {}).get('attachment', [])
|
1293
|
+
for attachment in attachments:
|
1294
|
+
# get extension
|
1295
|
+
ext = f".{attachment['filename'].split('.')[-1].lower()}"
|
1296
|
+
if ext not in self._skipped_attachment_extensions:
|
1297
|
+
attachment_id = f"attach_{attachment['id']}"
|
1298
|
+
base_document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
|
1299
|
+
try:
|
1300
|
+
attachment_content = self._client.get_attachment_content(attachment['id'])
|
1301
|
+
except Exception as e:
|
1302
|
+
logger.error(f"Failed to download attachment {attachment['filename']} for issue {issue_key}: {str(e)}")
|
1303
|
+
attachment_content = self._client.get(path=f"secure/attachment/{attachment['id']}/{attachment['filename']}", not_json_response=True)
|
1304
|
+
content = load_content_from_bytes(attachment_content, ext, llm=self.llm) if ext not in '.pdf' \
|
1305
|
+
else parse_file_content(file_content=attachment_content, file_name=attachment['filename'], llm=self.llm, is_capture_image=True)
|
1306
|
+
if not content:
|
1307
|
+
continue
|
1308
|
+
yield Document(page_content=content,
|
1309
|
+
metadata={
|
1310
|
+
'id': attachment_id,
|
1311
|
+
'issue_key': issue_key,
|
1312
|
+
'source': f"{self.base_url}/browse/{issue_key}",
|
1313
|
+
'filename': attachment['filename'],
|
1314
|
+
'created': attachment['created'],
|
1315
|
+
'mimeType': attachment['mimeType'],
|
1316
|
+
'author': attachment.get('author', {}).get('name'),
|
1317
|
+
IndexerKeywords.PARENT.value: base_document.metadata.get('id', None),
|
1318
|
+
'type': 'attachment',
|
1319
|
+
})
|
1320
|
+
|
1321
|
+
def _jql_get_tickets(self, jql, fields="*all", start=0, limit=None, expand=None, validate_query=None):
|
1322
|
+
"""
|
1323
|
+
Generator that yields batches of Jira issues based on JQL query.
|
1324
|
+
"""
|
1325
|
+
from atlassian.errors import ApiError
|
1326
|
+
|
1327
|
+
params = {}
|
1328
|
+
if limit is not None:
|
1329
|
+
params["maxResults"] = int(limit)
|
1330
|
+
if fields is not None:
|
1331
|
+
if isinstance(fields, (list, tuple, set)):
|
1332
|
+
fields = ",".join(fields)
|
1333
|
+
params["fields"] = fields
|
1334
|
+
if jql is not None:
|
1335
|
+
params["jql"] = jql
|
1336
|
+
if expand is not None:
|
1337
|
+
params["expand"] = expand
|
1338
|
+
if validate_query is not None:
|
1339
|
+
params["validateQuery"] = validate_query
|
1340
|
+
|
1341
|
+
url = self._client.resource_url("search")
|
1342
|
+
|
1343
|
+
while True:
|
1344
|
+
params["startAt"] = int(start)
|
1345
|
+
try:
|
1346
|
+
response = self._client.get(url, params=params)
|
1347
|
+
if not response:
|
1348
|
+
break
|
1349
|
+
except ApiError as e:
|
1350
|
+
error_message = f"Jira API error: {str(e)}"
|
1351
|
+
raise ValueError(f"Failed to fetch issues from Jira: {error_message}")
|
1352
|
+
|
1353
|
+
issues = response["issues"]
|
1354
|
+
yield issues
|
1355
|
+
if limit is not None and len(response["issues"]) + start >= limit:
|
1356
|
+
break
|
1357
|
+
if not response["issues"]:
|
1358
|
+
break
|
1359
|
+
start += len(issues)
|
1360
|
+
|
1361
|
+
def _process_issue_for_indexing(self, issue: dict, fields_to_index=None) -> Document:
|
1362
|
+
"""
|
1363
|
+
Process a single Jira issue into a Document for indexing.
|
1364
|
+
Copied and adapted from AlitaJiraLoader logic.
|
1365
|
+
"""
|
1366
|
+
try:
|
1367
|
+
# Build content starting with summary
|
1368
|
+
content = f"{issue['fields']['summary']}\n"
|
1369
|
+
|
1370
|
+
# Add description if present
|
1371
|
+
description = issue['fields'].get('description', '')
|
1372
|
+
if description:
|
1373
|
+
content += f"{description}\n"
|
1374
|
+
else:
|
1375
|
+
# If no description, still create document but with minimal content
|
1376
|
+
logger.debug(f"Issue {issue.get('key', 'unknown')} has no description")
|
1377
|
+
|
1378
|
+
# Add comments if present
|
1379
|
+
if 'comment' in issue['fields'] and issue['fields']['comment'].get('comments'):
|
1380
|
+
for comment in issue['fields']['comment']['comments']:
|
1381
|
+
content += f"{comment['body']}\n"
|
1382
|
+
|
1383
|
+
# Add additional fields to index
|
1384
|
+
if fields_to_index:
|
1385
|
+
for field in fields_to_index:
|
1386
|
+
if field in issue['fields'] and issue['fields'][field]:
|
1387
|
+
field_value = issue['fields'][field]
|
1388
|
+
# Convert complex objects to string representation
|
1389
|
+
if isinstance(field_value, dict):
|
1390
|
+
field_value = str(field_value)
|
1391
|
+
elif isinstance(field_value, list):
|
1392
|
+
field_value = ', '.join(str(item) for item in field_value)
|
1393
|
+
content += f"{field_value}\n"
|
1394
|
+
|
1395
|
+
# Create metadata
|
1396
|
+
metadata = {
|
1397
|
+
"id": issue["id"],
|
1398
|
+
"issue_key": issue["key"],
|
1399
|
+
"source": f"{self.base_url}/browse/{issue['key']}",
|
1400
|
+
"author": issue["fields"].get("reporter", {}).get("emailAddress") if issue["fields"].get("reporter") else None,
|
1401
|
+
"status": issue["fields"].get("status", {}).get("name") if issue["fields"].get("status") else None,
|
1402
|
+
"updated_on": issue["fields"].get("updated"),
|
1403
|
+
"created_on": issue["fields"].get("created"),
|
1404
|
+
"project": issue["fields"].get("project", {}).get("key") if issue["fields"].get("project") else None,
|
1405
|
+
"issuetype": issue["fields"].get("issuetype", {}).get("name") if issue["fields"].get("issuetype") else None,
|
1406
|
+
"type": "jira_issue",
|
1407
|
+
}
|
1408
|
+
|
1409
|
+
return Document(page_content=content, metadata=metadata)
|
1410
|
+
|
1411
|
+
except Exception as e:
|
1412
|
+
logger.error(f"Error processing issue {issue.get('key', 'unknown')}: {str(e)}")
|
1413
|
+
return None
|
1414
|
+
|
1415
|
+
def _index_tool_params(self, **kwargs) -> dict[str, tuple[type, Field]]:
|
1416
|
+
return {
|
1417
|
+
'jql': (Optional[str], Field(
|
1418
|
+
description="JQL query to filter issues. If not provided, all accessible issues will be indexed. Examples: 'project=PROJ', 'parentEpic=EPIC-123', 'status=Open'",
|
1419
|
+
default=None)),
|
1420
|
+
'fields_to_extract': (Optional[List[str]],
|
1421
|
+
Field(description="Additional fields to extract from issues", default=None)),
|
1422
|
+
'fields_to_index': (Optional[List[str]],
|
1423
|
+
Field(description="Additional fields to include in indexed content", default=None)),
|
1424
|
+
'include_attachments': (Optional[bool],
|
1425
|
+
Field(description="Whether to include attachment content in indexing",
|
1426
|
+
default=False)),
|
1427
|
+
'max_total_issues': (Optional[int], Field(description="Maximum number of issues to index", default=1000)),
|
1428
|
+
'skip_attachment_extensions': (Optional[str], Field(
|
1429
|
+
description="Comma-separated list of file extensions to skip when processing attachments",
|
1430
|
+
default=None)),
|
1431
|
+
}
|
1432
|
+
|
1433
|
+
# def index_data(self,
|
1434
|
+
# jql: Optional[str] = None,
|
1435
|
+
# fields_to_extract: Optional[List[str]] = None,
|
1436
|
+
# fields_to_index: Optional[List[str]] = None,
|
1437
|
+
# include_attachments: Optional[bool] = False,
|
1438
|
+
# max_total_issues: Optional[int] = 1000,
|
1439
|
+
# skip_attachment_extensions: Optional[List[str]] = None,
|
1440
|
+
# collection_suffix: str = "",
|
1441
|
+
# progress_step: Optional[int] = None,
|
1442
|
+
# clean_index: Optional[bool] = False):
|
1443
|
+
# """
|
1444
|
+
# Index Jira issues into the vector store.
|
1445
|
+
#
|
1446
|
+
# Args:
|
1447
|
+
# jql: JQL query to filter issues. If not provided, all accessible issues will be indexed
|
1448
|
+
# fields_to_extract: Additional fields to extract from issues
|
1449
|
+
# fields_to_index: Additional fields to include in indexed content
|
1450
|
+
# include_attachments: Whether to include attachment content in indexing
|
1451
|
+
# max_total_issues: Maximum number of issues to index
|
1452
|
+
# skip_attachment_extensions: Comma-separated list of file extensions to skip when processing attachments
|
1453
|
+
# collection_suffix: Optional suffix for collection name (max 7 characters)
|
1454
|
+
# progress_step: Optional step size for progress reporting during indexing
|
1455
|
+
# clean_index: Optional flag to enforce clean existing index before indexing new data
|
1456
|
+
#
|
1457
|
+
# Returns:
|
1458
|
+
# Result message from the vector store indexing operation
|
1459
|
+
# """
|
1460
|
+
# try:
|
1461
|
+
# # Validate that at least one filter is provided
|
1462
|
+
# if not any([jql]):
|
1463
|
+
# raise ToolException("Must provide at least one of: jql to filter issues for indexing")
|
1464
|
+
#
|
1465
|
+
# # set extensions to skip for post-processing
|
1466
|
+
# self._skipped_attachment_extensions = skip_attachment_extensions if skip_attachment_extensions else []
|
1467
|
+
#
|
1468
|
+
# # Get embeddings
|
1469
|
+
# from ...runtime.langchain.interfaces.llm_processor import get_embeddings
|
1470
|
+
# embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
|
1471
|
+
#
|
1472
|
+
# # Initialize vector store
|
1473
|
+
# vs = self._init_vector_store(collection_suffix, embeddings=embedding)
|
1474
|
+
#
|
1475
|
+
# # Prepare parameters for the loader
|
1476
|
+
# loader_params = {
|
1477
|
+
# 'jql': jql,
|
1478
|
+
# 'fields_to_extract': fields_to_extract,
|
1479
|
+
# 'fields_to_index': fields_to_index,
|
1480
|
+
# 'include_attachments': include_attachments,
|
1481
|
+
# 'max_total_issues': max_total_issues,
|
1482
|
+
# 'skip_attachment_extensions': skip_attachment_extensions,
|
1483
|
+
# }
|
1484
|
+
#
|
1485
|
+
# # Load documents using _base_loader
|
1486
|
+
# docs = self._base_loader(**loader_params)
|
1487
|
+
#
|
1488
|
+
# if not docs:
|
1489
|
+
# return "No Jira issues found matching the specified criteria."
|
1490
|
+
#
|
1491
|
+
# docs = list(docs) # Convert generator to list for logging and indexing
|
1492
|
+
# logger.info(f"Loaded {len(docs)} Jira issues for indexing")
|
1493
|
+
#
|
1494
|
+
# # Index the documents
|
1495
|
+
# result = vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
|
1496
|
+
#
|
1497
|
+
# return f"Successfully indexed {len(docs)} Jira issues. {result}"
|
1498
|
+
#
|
1499
|
+
# except Exception as e:
|
1500
|
+
# logger.error(f"Error indexing Jira issues: {str(e)}")
|
1501
|
+
# raise ToolException(f"Error indexing Jira issues: {str(e)}")
|
1502
|
+
|
1503
|
+
@extend_with_vector_tools
|
1224
1504
|
def get_available_tools(self):
|
1225
1505
|
return [
|
1226
1506
|
{
|
@@ -15,7 +15,13 @@ def get_tools(tool):
|
|
15
15
|
client_id=tool['settings'].get('client_id', None),
|
16
16
|
client_secret=tool['settings'].get('client_secret', None),
|
17
17
|
toolkit_name=tool.get('toolkit_name'),
|
18
|
-
llm=tool['settings'].get('llm')
|
18
|
+
llm=tool['settings'].get('llm'),
|
19
|
+
# indexer settings
|
20
|
+
connection_string=tool['settings'].get('connection_string', None),
|
21
|
+
collection_name=f"{tool.get('toolkit_name')}_{str(tool['id'])}",
|
22
|
+
embedding_model="HuggingFaceEmbeddings",
|
23
|
+
embedding_model_params={"model_name": "sentence-transformers/all-MiniLM-L6-v2"},
|
24
|
+
vectorstore_type="PGVector")
|
19
25
|
.get_tools())
|
20
26
|
|
21
27
|
|
@@ -33,6 +39,10 @@ class SharepointToolkit(BaseToolkit):
|
|
33
39
|
client_id=(str, Field(description="Client ID")),
|
34
40
|
client_secret=(SecretStr, Field(description="Client Secret", json_schema_extra={'secret': True})),
|
35
41
|
selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
|
42
|
+
# indexer settings
|
43
|
+
connection_string = (Optional[SecretStr], Field(description="Connection string for vectorstore",
|
44
|
+
default=None,
|
45
|
+
json_schema_extra={'secret': True})),
|
36
46
|
__config__=ConfigDict(json_schema_extra={
|
37
47
|
'metadata': {
|
38
48
|
"label": "Sharepoint", "icon_url": "sharepoint.svg",
|
@@ -1,17 +1,15 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
|
-
from typing import Optional, List,
|
3
|
+
from typing import Optional, List, Generator
|
4
4
|
|
5
|
-
from
|
6
|
-
from ..utils.content_parser import parse_file_content
|
5
|
+
from langchain_core.documents import Document
|
7
6
|
from langchain_core.tools import ToolException
|
8
7
|
from office365.runtime.auth.client_credential import ClientCredential
|
9
8
|
from office365.sharepoint.client_context import ClientContext
|
10
9
|
from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
|
11
10
|
|
12
|
-
from ..elitea_base import
|
13
|
-
from
|
14
|
-
from langchain_core.documents import Document
|
11
|
+
from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
|
12
|
+
from ..utils.content_parser import parse_file_content
|
15
13
|
|
16
14
|
NoInput = create_model(
|
17
15
|
"NoInput"
|
@@ -39,15 +37,6 @@ ReadDocument = create_model(
|
|
39
37
|
default=None))
|
40
38
|
)
|
41
39
|
|
42
|
-
indexData = create_model(
|
43
|
-
"indexData",
|
44
|
-
__base__=BaseIndexParams,
|
45
|
-
progress_step=(Optional[int], Field(default=None, ge=0, le=100,
|
46
|
-
description="Optional step size for progress reporting during indexing")),
|
47
|
-
clean_index=(Optional[bool], Field(default=False,
|
48
|
-
description="Optional flag to enforce clean existing index before indexing new data")),
|
49
|
-
)
|
50
|
-
|
51
40
|
|
52
41
|
class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
|
53
42
|
site_url: str
|
@@ -56,13 +45,6 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
56
45
|
token: SecretStr = None
|
57
46
|
_client: Optional[ClientContext] = PrivateAttr() # Private attribute for the office365 client
|
58
47
|
|
59
|
-
llm: Any = None
|
60
|
-
connection_string: Optional[SecretStr] = None
|
61
|
-
collection_name: Optional[str] = None
|
62
|
-
embedding_model: Optional[str] = "HuggingFaceEmbeddings"
|
63
|
-
embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
|
64
|
-
vectorstore_type: Optional[str] = "PGVector"
|
65
|
-
|
66
48
|
@model_validator(mode='before')
|
67
49
|
@classmethod
|
68
50
|
def validate_toolkit(cls, values):
|
@@ -143,7 +125,11 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
143
125
|
logging.error(f"Failed to load files from sharepoint: {e}")
|
144
126
|
return ToolException("Can not get files. Please, double check folder name and read permissions.")
|
145
127
|
|
146
|
-
def read_file(self, path,
|
128
|
+
def read_file(self, path,
|
129
|
+
is_capture_image: bool = False,
|
130
|
+
page_number: int = None,
|
131
|
+
sheet_name: str = None,
|
132
|
+
excel_by_sheets: bool = False):
|
147
133
|
""" Reads file located at the specified server-relative path. """
|
148
134
|
try:
|
149
135
|
file = self._client.web.get_file_by_server_relative_path(path)
|
@@ -159,9 +145,10 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
159
145
|
is_capture_image=is_capture_image,
|
160
146
|
page_number=page_number,
|
161
147
|
sheet_name=sheet_name,
|
148
|
+
excel_by_sheets=excel_by_sheets,
|
162
149
|
llm=self.llm)
|
163
150
|
|
164
|
-
def _base_loader(self) -> List[Document]:
|
151
|
+
def _base_loader(self, **kwargs) -> List[Document]:
|
165
152
|
try:
|
166
153
|
all_files = self.get_files_list()
|
167
154
|
except Exception as e:
|
@@ -170,35 +157,24 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
170
157
|
docs: List[Document] = []
|
171
158
|
for file in all_files:
|
172
159
|
metadata = {
|
173
|
-
("
|
160
|
+
("updated_on" if k == "Modified" else k): str(v)
|
174
161
|
for k, v in file.items()
|
175
162
|
}
|
176
163
|
docs.append(Document(page_content="", metadata=metadata))
|
177
164
|
return docs
|
178
165
|
|
179
|
-
def index_data(self,
|
180
|
-
collection_suffix: str = '',
|
181
|
-
progress_step: int = None,
|
182
|
-
clean_index: bool = False):
|
183
|
-
docs = self._base_loader()
|
184
|
-
embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
|
185
|
-
vs = self._init_vector_store(collection_suffix, embeddings=embedding)
|
186
|
-
return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
|
187
|
-
|
188
166
|
def _process_document(self, document: Document) -> Generator[Document, None, None]:
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
yield document
|
201
|
-
|
167
|
+
page_content = self.read_file(document.metadata['Path'], is_capture_image=True, excel_by_sheets=True)
|
168
|
+
if isinstance(page_content, dict):
|
169
|
+
for key, value in page_content.items():
|
170
|
+
metadata = document.metadata
|
171
|
+
metadata['page'] = key
|
172
|
+
yield Document(page_content=str(value), metadata=metadata)
|
173
|
+
else:
|
174
|
+
document.page_content = json.dumps(str(page_content))
|
175
|
+
yield document
|
176
|
+
|
177
|
+
@extend_with_vector_tools
|
202
178
|
def get_available_tools(self):
|
203
179
|
return [
|
204
180
|
{
|
@@ -218,11 +194,5 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
218
194
|
"description": self.read_file.__doc__,
|
219
195
|
"args_schema": ReadDocument,
|
220
196
|
"ref": self.read_file
|
221
|
-
},
|
222
|
-
{
|
223
|
-
"name": "index_data",
|
224
|
-
"ref": self.index_data,
|
225
|
-
"description": self.index_data.__doc__,
|
226
|
-
"args_schema": indexData,
|
227
197
|
}
|
228
198
|
]
|
@@ -51,6 +51,10 @@ class TestrailToolkit(BaseToolkit):
|
|
51
51
|
),
|
52
52
|
email=(str, Field(description="User's email", json_schema_extra={'configuration': True})),
|
53
53
|
password=(SecretStr, Field(description="User's password", json_schema_extra={'secret': True, 'configuration': True})),
|
54
|
+
# indexer settings
|
55
|
+
connection_string=(Optional[SecretStr], Field(description="Connection string for vectorstore",
|
56
|
+
default=None,
|
57
|
+
json_schema_extra={'secret': True})),
|
54
58
|
selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
|
55
59
|
__config__=ConfigDict(json_schema_extra={'metadata':
|
56
60
|
{"label": "Testrail", "icon_url": "testrail-icon.svg",
|
@@ -4,10 +4,11 @@ from typing import Dict, List, Optional, Union, Any, Generator
|
|
4
4
|
|
5
5
|
import pandas as pd
|
6
6
|
from langchain_core.tools import ToolException
|
7
|
+
from openai import BadRequestError
|
7
8
|
from pydantic import SecretStr, create_model, model_validator
|
8
9
|
from pydantic.fields import Field, PrivateAttr
|
9
10
|
from testrail_api import StatusCodeError, TestRailAPI
|
10
|
-
from ..elitea_base import BaseVectorStoreToolApiWrapper,
|
11
|
+
from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
|
11
12
|
from langchain_core.documents import Document
|
12
13
|
|
13
14
|
from ...runtime.utils.utils import IndexerKeywords
|
@@ -289,20 +290,6 @@ updateCase = create_model(
|
|
289
290
|
),
|
290
291
|
)
|
291
292
|
|
292
|
-
# Schema for indexing TestRail data into vector store
|
293
|
-
indexData = create_model(
|
294
|
-
"indexData",
|
295
|
-
__base__=BaseIndexParams,
|
296
|
-
project_id=(str, Field(description="TestRail project ID to index data from")),
|
297
|
-
suite_id=(Optional[str], Field(default=None, description="Optional TestRail suite ID to filter test cases")),
|
298
|
-
section_id=(Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
|
299
|
-
title_keyword=(Optional[str], Field(default=None, description="Optional keyword to filter test cases by title")),
|
300
|
-
progress_step=(Optional[int],
|
301
|
-
Field(default=None, ge=0, le=100, description="Optional step size for progress reporting during indexing")),
|
302
|
-
clean_index=(Optional[bool],
|
303
|
-
Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
|
304
|
-
)
|
305
|
-
|
306
293
|
SUPPORTED_KEYS = {
|
307
294
|
"id", "title", "section_id", "template_id", "type_id", "priority_id", "milestone_id",
|
308
295
|
"refs", "created_by", "created_on", "updated_by", "updated_on", "estimate",
|
@@ -317,14 +304,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
|
|
317
304
|
password: Optional[SecretStr] = None,
|
318
305
|
email: Optional[str] = None,
|
319
306
|
_client: Optional[TestRailAPI] = PrivateAttr() # Private attribute for the TestRail client
|
320
|
-
llm: Any = None
|
321
|
-
|
322
|
-
connection_string: Optional[SecretStr] = None
|
323
|
-
collection_name: Optional[str] = None
|
324
|
-
embedding_model: Optional[str] = "HuggingFaceEmbeddings"
|
325
|
-
embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
|
326
|
-
vectorstore_type: Optional[str] = "PGVector"
|
327
|
-
|
328
307
|
|
329
308
|
@model_validator(mode="before")
|
330
309
|
@classmethod
|
@@ -490,7 +469,8 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
|
|
490
469
|
project_id=project_id, **params
|
491
470
|
)
|
492
471
|
|
493
|
-
|
472
|
+
# support old versions of testrail_api
|
473
|
+
cases = extracted_cases.get("cases") if isinstance(extracted_cases, dict) else extracted_cases
|
494
474
|
|
495
475
|
if cases is None:
|
496
476
|
return ToolException("No test cases found in the extracted data.")
|
@@ -554,7 +534,8 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
|
|
554
534
|
def _base_loader(self, project_id: str,
|
555
535
|
suite_id: Optional[str] = None,
|
556
536
|
section_id: Optional[int] = None,
|
557
|
-
title_keyword: Optional[str] = None
|
537
|
+
title_keyword: Optional[str] = None,
|
538
|
+
**kwargs: Any
|
558
539
|
) -> Generator[Document, None, None]:
|
559
540
|
try:
|
560
541
|
if suite_id:
|
@@ -577,7 +558,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
|
|
577
558
|
'title': case.get('title', ''),
|
578
559
|
'suite_id': suite_id or case.get('suite_id', ''),
|
579
560
|
'id': str(case.get('id', '')),
|
580
|
-
|
561
|
+
IndexerKeywords.UPDATED_ON.value: case.get('updated_on') or -1,
|
581
562
|
'labels': [lbl['title'] for lbl in case.get('labels', [])],
|
582
563
|
'type': case.get('type_id') or -1,
|
583
564
|
'priority': case.get('priority_id') or -1,
|
@@ -588,22 +569,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
|
|
588
569
|
'entity_type': 'test_case',
|
589
570
|
})
|
590
571
|
|
591
|
-
def index_data(
|
592
|
-
self,
|
593
|
-
project_id: str,
|
594
|
-
suite_id: Optional[str] = None,
|
595
|
-
collection_suffix: str = "",
|
596
|
-
section_id: Optional[int] = None,
|
597
|
-
title_keyword: Optional[str] = None,
|
598
|
-
progress_step: Optional[int] = None,
|
599
|
-
clean_index: Optional[bool] = False
|
600
|
-
):
|
601
|
-
"""Load TestRail test cases into the vector store."""
|
602
|
-
docs = self._base_loader(project_id, suite_id, section_id, title_keyword)
|
603
|
-
embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
|
604
|
-
vs = self._init_vector_store(collection_suffix, embeddings=embedding)
|
605
|
-
return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
|
606
|
-
|
607
572
|
def _process_document(self, document: Document) -> Generator[Document, None, None]:
|
608
573
|
"""
|
609
574
|
Process an existing base document to extract relevant metadata for full document preparation.
|
@@ -626,16 +591,15 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
|
|
626
591
|
|
627
592
|
# process each attachment to extract its content
|
628
593
|
for attachment in attachments:
|
629
|
-
attachment_id = attachment['id']
|
594
|
+
attachment_id = f"attach_{attachment['id']}"
|
630
595
|
# add attachment id to metadata of parent
|
631
596
|
document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
|
632
|
-
|
633
597
|
# TODO: pass it to chunkers
|
634
598
|
yield Document(page_content=self._process_attachment(attachment),
|
635
599
|
metadata={
|
636
600
|
'project_id': base_data.get('project_id', ''),
|
637
|
-
|
638
|
-
|
601
|
+
'id': str(attachment_id),
|
602
|
+
IndexerKeywords.PARENT.value: str(case_id),
|
639
603
|
'filename': attachment['filename'],
|
640
604
|
'filetype': attachment['filetype'],
|
641
605
|
'created_on': attachment['created_on'],
|
@@ -663,10 +627,20 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
|
|
663
627
|
try:
|
664
628
|
attachment_path = self._client.attachments.get_attachment(attachment_id=attachment['id'], path=f"./{attachment['filename']}")
|
665
629
|
page_content = parse_file_content(file_name=attachment['filename'], file_content=attachment_path.read_bytes(), llm=self.llm, is_capture_image=True)
|
630
|
+
except BadRequestError as ai_e:
|
631
|
+
logger.error(f"Unable to parse page's content with type: {attachment['filetype']} due to AI service issues: {ai_e}")
|
666
632
|
except Exception as e:
|
667
633
|
logger.error(f"Unable to parse page's content with type: {attachment['filetype']}: {e}")
|
668
634
|
return page_content
|
669
635
|
|
636
|
+
def _index_tool_params(self):
|
637
|
+
return {
|
638
|
+
'project_id': (str, Field(description="TestRail project ID to index data from")),
|
639
|
+
'suite_id': (Optional[str],
|
640
|
+
Field(default=None, description="Optional TestRail suite ID to filter test cases")),
|
641
|
+
'section_id': (Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
|
642
|
+
}
|
643
|
+
|
670
644
|
def _to_markup(self, data: List[Dict], output_format: str) -> str:
|
671
645
|
"""
|
672
646
|
Converts the given data into the specified format: 'json', 'csv', or 'markdown'.
|
@@ -694,6 +668,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
|
|
694
668
|
if output_format == "markdown":
|
695
669
|
return df.to_markdown(index=False)
|
696
670
|
|
671
|
+
@extend_with_vector_tools
|
697
672
|
def get_available_tools(self):
|
698
673
|
tools = [
|
699
674
|
{
|
@@ -731,14 +706,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
|
|
731
706
|
"ref": self.update_case,
|
732
707
|
"description": self.update_case.__doc__,
|
733
708
|
"args_schema": updateCase,
|
734
|
-
},
|
735
|
-
{
|
736
|
-
"name": "index_data",
|
737
|
-
"ref": self.index_data,
|
738
|
-
"description": self.index_data.__doc__,
|
739
|
-
"args_schema": indexData,
|
740
709
|
}
|
741
710
|
]
|
742
|
-
# Add vector search from base
|
743
|
-
tools.extend(self._get_vector_search_tools())
|
744
711
|
return tools
|