alita-sdk 0.3.208__py3-none-any.whl → 0.3.210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/runtime/clients/artifact.py +18 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaCSVLoader.py +2 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +3 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +8 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
- alita_sdk/runtime/langchain/langraph_agent.py +9 -6
- alita_sdk/runtime/toolkits/artifact.py +7 -3
- alita_sdk/runtime/toolkits/tools.py +8 -1
- alita_sdk/runtime/tools/application.py +2 -0
- alita_sdk/runtime/tools/artifact.py +65 -8
- alita_sdk/runtime/tools/vectorstore.py +125 -42
- alita_sdk/runtime/utils/utils.py +3 -0
- alita_sdk/tools/ado/__init__.py +8 -0
- alita_sdk/tools/ado/repos/repos_wrapper.py +37 -0
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +0 -7
- alita_sdk/tools/ado/work_item/__init__.py +4 -0
- alita_sdk/tools/ado/work_item/ado_wrapper.py +37 -4
- alita_sdk/tools/aws/delta_lake/__init__.py +1 -1
- alita_sdk/tools/bitbucket/__init__.py +13 -1
- alita_sdk/tools/bitbucket/api_wrapper.py +31 -4
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +31 -0
- alita_sdk/tools/chunkers/code/codeparser.py +18 -10
- alita_sdk/tools/confluence/api_wrapper.py +35 -134
- alita_sdk/tools/confluence/loader.py +30 -28
- alita_sdk/tools/elitea_base.py +112 -11
- alita_sdk/tools/figma/__init__.py +13 -1
- alita_sdk/tools/figma/api_wrapper.py +47 -3
- alita_sdk/tools/github/api_wrapper.py +8 -0
- alita_sdk/tools/github/github_client.py +18 -0
- alita_sdk/tools/gitlab/__init__.py +4 -0
- alita_sdk/tools/gitlab/api_wrapper.py +10 -0
- alita_sdk/tools/google/bigquery/__init__.py +1 -1
- alita_sdk/tools/jira/__init__.py +21 -13
- alita_sdk/tools/jira/api_wrapper.py +285 -5
- alita_sdk/tools/sharepoint/__init__.py +11 -1
- alita_sdk/tools/sharepoint/api_wrapper.py +23 -53
- alita_sdk/tools/testrail/__init__.py +4 -0
- alita_sdk/tools/testrail/api_wrapper.py +28 -56
- alita_sdk/tools/utils/content_parser.py +123 -9
- alita_sdk/tools/xray/__init__.py +8 -1
- alita_sdk/tools/xray/api_wrapper.py +505 -14
- alita_sdk/tools/zephyr_scale/api_wrapper.py +5 -5
- {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/RECORD +47 -47
- {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/top_level.txt +0 -0
@@ -4,17 +4,20 @@ import re
|
|
4
4
|
import traceback
|
5
5
|
from json import JSONDecodeError
|
6
6
|
from traceback import format_exc
|
7
|
-
from typing import List, Optional, Any, Dict
|
7
|
+
from typing import List, Optional, Any, Dict, Generator
|
8
8
|
import os
|
9
9
|
|
10
10
|
from atlassian import Jira
|
11
|
+
from langchain_core.documents import Document
|
11
12
|
from langchain_core.tools import ToolException
|
12
13
|
from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
|
13
14
|
import requests
|
14
15
|
|
15
|
-
from ..elitea_base import
|
16
|
+
from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
|
16
17
|
from ..llm.img_utils import ImageDescriptionCache
|
17
18
|
from ..utils import is_cookie_token, parse_cookie_string
|
19
|
+
from ..utils.content_parser import parse_file_content, load_content_from_bytes
|
20
|
+
from ...runtime.utils.utils import IndexerKeywords
|
18
21
|
|
19
22
|
logger = logging.getLogger(__name__)
|
20
23
|
|
@@ -388,7 +391,7 @@ def process_search_response(jira_url, response, payload_params: Dict[str, Any] =
|
|
388
391
|
|
389
392
|
return str(processed_issues)
|
390
393
|
|
391
|
-
class JiraApiWrapper(
|
394
|
+
class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
|
392
395
|
base_url: str
|
393
396
|
api_version: Optional[str] = "2",
|
394
397
|
api_key: Optional[SecretStr] = None,
|
@@ -402,7 +405,6 @@ class JiraApiWrapper(BaseToolApiWrapper):
|
|
402
405
|
_client: Jira = PrivateAttr()
|
403
406
|
_image_cache: ImageDescriptionCache = PrivateAttr(default_factory=lambda: ImageDescriptionCache(max_size=50))
|
404
407
|
issue_search_pattern: str = r'/rest/api/\d+/search'
|
405
|
-
llm: Any = None
|
406
408
|
|
407
409
|
@model_validator(mode='before')
|
408
410
|
@classmethod
|
@@ -1061,7 +1063,7 @@ class JiraApiWrapper(BaseToolApiWrapper):
|
|
1061
1063
|
def process_image_match(match):
|
1062
1064
|
"""Process each image reference and get its contextual description"""
|
1063
1065
|
image_ref = match.group(1)
|
1064
|
-
full_match = match.group(0)
|
1066
|
+
full_match = match.group(0) # The complete image reference with markers
|
1065
1067
|
|
1066
1068
|
logger.info(f"Processing image reference: {image_ref} (full match: {full_match})")
|
1067
1069
|
|
@@ -1221,6 +1223,284 @@ class JiraApiWrapper(BaseToolApiWrapper):
|
|
1221
1223
|
logger.error(f"Error processing comments with images: {stacktrace}")
|
1222
1224
|
return f"Error processing comments with images: {str(e)}"
|
1223
1225
|
|
1226
|
+
def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
|
1227
|
+
"""
|
1228
|
+
Base loader for Jira issues, used to load issues as documents.
|
1229
|
+
Uses the existing Jira client instance to fetch and process issues.
|
1230
|
+
"""
|
1231
|
+
# Extract parameters from kwargs
|
1232
|
+
jql = kwargs.get('jql')
|
1233
|
+
fields_to_extract = kwargs.get('fields_to_extract')
|
1234
|
+
fields_to_index = kwargs.get('fields_to_index')
|
1235
|
+
include_attachments = kwargs.get('include_attachments', False)
|
1236
|
+
max_total_issues = kwargs.get('max_total_issues', 1000)
|
1237
|
+
|
1238
|
+
# set values for skipped attachment extensions
|
1239
|
+
self._skipped_attachment_extensions = kwargs.get('skip_attachment_extensions', [])
|
1240
|
+
self._included_fields = fields_to_extract.copy() if fields_to_extract else []
|
1241
|
+
|
1242
|
+
try:
|
1243
|
+
# Prepare fields to extract
|
1244
|
+
DEFAULT_FIELDS = ['status', 'summary', 'reporter', 'description', 'created', 'updated', 'assignee', 'project', 'issuetype']
|
1245
|
+
fields = DEFAULT_FIELDS.copy()
|
1246
|
+
|
1247
|
+
if fields_to_extract:
|
1248
|
+
fields.extend(fields_to_extract)
|
1249
|
+
|
1250
|
+
if include_attachments:
|
1251
|
+
fields.append('attachment')
|
1252
|
+
|
1253
|
+
# Use provided JQL query or default to all issues
|
1254
|
+
if not jql:
|
1255
|
+
jql_query = "ORDER BY updated DESC" # Default to get all issues ordered by update time
|
1256
|
+
else:
|
1257
|
+
jql_query = jql
|
1258
|
+
|
1259
|
+
# Remove duplicates and prepare fields
|
1260
|
+
final_fields = ','.join({field.lower() for field in fields})
|
1261
|
+
|
1262
|
+
# Fetch issues using the existing Jira client
|
1263
|
+
issue_generator = self._jql_get_tickets(
|
1264
|
+
jql_query,
|
1265
|
+
fields=final_fields,
|
1266
|
+
limit=max_total_issues
|
1267
|
+
)
|
1268
|
+
|
1269
|
+
# Process each batch of issues
|
1270
|
+
for issues_batch in issue_generator:
|
1271
|
+
for issue in issues_batch:
|
1272
|
+
issue_doc = self._process_issue_for_indexing(
|
1273
|
+
issue,
|
1274
|
+
fields_to_index
|
1275
|
+
)
|
1276
|
+
if issue_doc:
|
1277
|
+
yield issue_doc
|
1278
|
+
|
1279
|
+
except Exception as e:
|
1280
|
+
logger.error(f"Error loading Jira issues: {str(e)}")
|
1281
|
+
raise ToolException(f"Unable to load Jira issues: {str(e)}")
|
1282
|
+
|
1283
|
+
def _process_document(self, base_document: Document) -> Generator[Document, None, None]:
|
1284
|
+
"""
|
1285
|
+
Process a base document to extract and index Jira issues extra fields: comments, attachments, etc..
|
1286
|
+
"""
|
1287
|
+
|
1288
|
+
issue_key = base_document.metadata.get('issue_key')
|
1289
|
+
# get attachments content
|
1290
|
+
|
1291
|
+
issue = self._client.issue(issue_key, fields="attachment")
|
1292
|
+
attachments = issue.get('fields', {}).get('attachment', [])
|
1293
|
+
for attachment in attachments:
|
1294
|
+
# get extension
|
1295
|
+
ext = f".{attachment['filename'].split('.')[-1].lower()}"
|
1296
|
+
if ext not in self._skipped_attachment_extensions:
|
1297
|
+
attachment_id = f"attach_{attachment['id']}"
|
1298
|
+
base_document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
|
1299
|
+
try:
|
1300
|
+
attachment_content = self._client.get_attachment_content(attachment['id'])
|
1301
|
+
except Exception as e:
|
1302
|
+
logger.error(f"Failed to download attachment {attachment['filename']} for issue {issue_key}: {str(e)}")
|
1303
|
+
attachment_content = self._client.get(path=f"secure/attachment/{attachment['id']}/{attachment['filename']}", not_json_response=True)
|
1304
|
+
content = load_content_from_bytes(attachment_content, ext, llm=self.llm) if ext not in '.pdf' \
|
1305
|
+
else parse_file_content(file_content=attachment_content, file_name=attachment['filename'], llm=self.llm, is_capture_image=True)
|
1306
|
+
if not content:
|
1307
|
+
continue
|
1308
|
+
yield Document(page_content=content,
|
1309
|
+
metadata={
|
1310
|
+
'id': attachment_id,
|
1311
|
+
'issue_key': issue_key,
|
1312
|
+
'source': f"{self.base_url}/browse/{issue_key}",
|
1313
|
+
'filename': attachment['filename'],
|
1314
|
+
'created': attachment['created'],
|
1315
|
+
'mimeType': attachment['mimeType'],
|
1316
|
+
'author': attachment.get('author', {}).get('name'),
|
1317
|
+
IndexerKeywords.PARENT.value: base_document.metadata.get('id', None),
|
1318
|
+
'type': 'attachment',
|
1319
|
+
})
|
1320
|
+
|
1321
|
+
def _jql_get_tickets(self, jql, fields="*all", start=0, limit=None, expand=None, validate_query=None):
|
1322
|
+
"""
|
1323
|
+
Generator that yields batches of Jira issues based on JQL query.
|
1324
|
+
"""
|
1325
|
+
from atlassian.errors import ApiError
|
1326
|
+
|
1327
|
+
params = {}
|
1328
|
+
if limit is not None:
|
1329
|
+
params["maxResults"] = int(limit)
|
1330
|
+
if fields is not None:
|
1331
|
+
if isinstance(fields, (list, tuple, set)):
|
1332
|
+
fields = ",".join(fields)
|
1333
|
+
params["fields"] = fields
|
1334
|
+
if jql is not None:
|
1335
|
+
params["jql"] = jql
|
1336
|
+
if expand is not None:
|
1337
|
+
params["expand"] = expand
|
1338
|
+
if validate_query is not None:
|
1339
|
+
params["validateQuery"] = validate_query
|
1340
|
+
|
1341
|
+
url = self._client.resource_url("search")
|
1342
|
+
|
1343
|
+
while True:
|
1344
|
+
params["startAt"] = int(start)
|
1345
|
+
try:
|
1346
|
+
response = self._client.get(url, params=params)
|
1347
|
+
if not response:
|
1348
|
+
break
|
1349
|
+
except ApiError as e:
|
1350
|
+
error_message = f"Jira API error: {str(e)}"
|
1351
|
+
raise ValueError(f"Failed to fetch issues from Jira: {error_message}")
|
1352
|
+
|
1353
|
+
issues = response["issues"]
|
1354
|
+
yield issues
|
1355
|
+
if limit is not None and len(response["issues"]) + start >= limit:
|
1356
|
+
break
|
1357
|
+
if not response["issues"]:
|
1358
|
+
break
|
1359
|
+
start += len(issues)
|
1360
|
+
|
1361
|
+
def _process_issue_for_indexing(self, issue: dict, fields_to_index=None) -> Document:
|
1362
|
+
"""
|
1363
|
+
Process a single Jira issue into a Document for indexing.
|
1364
|
+
Copied and adapted from AlitaJiraLoader logic.
|
1365
|
+
"""
|
1366
|
+
try:
|
1367
|
+
# Build content starting with summary
|
1368
|
+
content = f"{issue['fields']['summary']}\n"
|
1369
|
+
|
1370
|
+
# Add description if present
|
1371
|
+
description = issue['fields'].get('description', '')
|
1372
|
+
if description:
|
1373
|
+
content += f"{description}\n"
|
1374
|
+
else:
|
1375
|
+
# If no description, still create document but with minimal content
|
1376
|
+
logger.debug(f"Issue {issue.get('key', 'unknown')} has no description")
|
1377
|
+
|
1378
|
+
# Add comments if present
|
1379
|
+
if 'comment' in issue['fields'] and issue['fields']['comment'].get('comments'):
|
1380
|
+
for comment in issue['fields']['comment']['comments']:
|
1381
|
+
content += f"{comment['body']}\n"
|
1382
|
+
|
1383
|
+
# Add additional fields to index
|
1384
|
+
if fields_to_index:
|
1385
|
+
for field in fields_to_index:
|
1386
|
+
if field in issue['fields'] and issue['fields'][field]:
|
1387
|
+
field_value = issue['fields'][field]
|
1388
|
+
# Convert complex objects to string representation
|
1389
|
+
if isinstance(field_value, dict):
|
1390
|
+
field_value = str(field_value)
|
1391
|
+
elif isinstance(field_value, list):
|
1392
|
+
field_value = ', '.join(str(item) for item in field_value)
|
1393
|
+
content += f"{field_value}\n"
|
1394
|
+
|
1395
|
+
# Create metadata
|
1396
|
+
metadata = {
|
1397
|
+
"id": issue["id"],
|
1398
|
+
"issue_key": issue["key"],
|
1399
|
+
"source": f"{self.base_url}/browse/{issue['key']}",
|
1400
|
+
"author": issue["fields"].get("reporter", {}).get("emailAddress") if issue["fields"].get("reporter") else None,
|
1401
|
+
"status": issue["fields"].get("status", {}).get("name") if issue["fields"].get("status") else None,
|
1402
|
+
"updated_on": issue["fields"].get("updated"),
|
1403
|
+
"created_on": issue["fields"].get("created"),
|
1404
|
+
"project": issue["fields"].get("project", {}).get("key") if issue["fields"].get("project") else None,
|
1405
|
+
"issuetype": issue["fields"].get("issuetype", {}).get("name") if issue["fields"].get("issuetype") else None,
|
1406
|
+
"type": "jira_issue",
|
1407
|
+
}
|
1408
|
+
|
1409
|
+
return Document(page_content=content, metadata=metadata)
|
1410
|
+
|
1411
|
+
except Exception as e:
|
1412
|
+
logger.error(f"Error processing issue {issue.get('key', 'unknown')}: {str(e)}")
|
1413
|
+
return None
|
1414
|
+
|
1415
|
+
def _index_tool_params(self, **kwargs) -> dict[str, tuple[type, Field]]:
|
1416
|
+
return {
|
1417
|
+
'jql': (Optional[str], Field(
|
1418
|
+
description="JQL query to filter issues. If not provided, all accessible issues will be indexed. Examples: 'project=PROJ', 'parentEpic=EPIC-123', 'status=Open'",
|
1419
|
+
default=None)),
|
1420
|
+
'fields_to_extract': (Optional[List[str]],
|
1421
|
+
Field(description="Additional fields to extract from issues", default=None)),
|
1422
|
+
'fields_to_index': (Optional[List[str]],
|
1423
|
+
Field(description="Additional fields to include in indexed content", default=None)),
|
1424
|
+
'include_attachments': (Optional[bool],
|
1425
|
+
Field(description="Whether to include attachment content in indexing",
|
1426
|
+
default=False)),
|
1427
|
+
'max_total_issues': (Optional[int], Field(description="Maximum number of issues to index", default=1000)),
|
1428
|
+
'skip_attachment_extensions': (Optional[str], Field(
|
1429
|
+
description="Comma-separated list of file extensions to skip when processing attachments",
|
1430
|
+
default=None)),
|
1431
|
+
}
|
1432
|
+
|
1433
|
+
# def index_data(self,
|
1434
|
+
# jql: Optional[str] = None,
|
1435
|
+
# fields_to_extract: Optional[List[str]] = None,
|
1436
|
+
# fields_to_index: Optional[List[str]] = None,
|
1437
|
+
# include_attachments: Optional[bool] = False,
|
1438
|
+
# max_total_issues: Optional[int] = 1000,
|
1439
|
+
# skip_attachment_extensions: Optional[List[str]] = None,
|
1440
|
+
# collection_suffix: str = "",
|
1441
|
+
# progress_step: Optional[int] = None,
|
1442
|
+
# clean_index: Optional[bool] = False):
|
1443
|
+
# """
|
1444
|
+
# Index Jira issues into the vector store.
|
1445
|
+
#
|
1446
|
+
# Args:
|
1447
|
+
# jql: JQL query to filter issues. If not provided, all accessible issues will be indexed
|
1448
|
+
# fields_to_extract: Additional fields to extract from issues
|
1449
|
+
# fields_to_index: Additional fields to include in indexed content
|
1450
|
+
# include_attachments: Whether to include attachment content in indexing
|
1451
|
+
# max_total_issues: Maximum number of issues to index
|
1452
|
+
# skip_attachment_extensions: Comma-separated list of file extensions to skip when processing attachments
|
1453
|
+
# collection_suffix: Optional suffix for collection name (max 7 characters)
|
1454
|
+
# progress_step: Optional step size for progress reporting during indexing
|
1455
|
+
# clean_index: Optional flag to enforce clean existing index before indexing new data
|
1456
|
+
#
|
1457
|
+
# Returns:
|
1458
|
+
# Result message from the vector store indexing operation
|
1459
|
+
# """
|
1460
|
+
# try:
|
1461
|
+
# # Validate that at least one filter is provided
|
1462
|
+
# if not any([jql]):
|
1463
|
+
# raise ToolException("Must provide at least one of: jql to filter issues for indexing")
|
1464
|
+
#
|
1465
|
+
# # set extensions to skip for post-processing
|
1466
|
+
# self._skipped_attachment_extensions = skip_attachment_extensions if skip_attachment_extensions else []
|
1467
|
+
#
|
1468
|
+
# # Get embeddings
|
1469
|
+
# from ...runtime.langchain.interfaces.llm_processor import get_embeddings
|
1470
|
+
# embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
|
1471
|
+
#
|
1472
|
+
# # Initialize vector store
|
1473
|
+
# vs = self._init_vector_store(collection_suffix, embeddings=embedding)
|
1474
|
+
#
|
1475
|
+
# # Prepare parameters for the loader
|
1476
|
+
# loader_params = {
|
1477
|
+
# 'jql': jql,
|
1478
|
+
# 'fields_to_extract': fields_to_extract,
|
1479
|
+
# 'fields_to_index': fields_to_index,
|
1480
|
+
# 'include_attachments': include_attachments,
|
1481
|
+
# 'max_total_issues': max_total_issues,
|
1482
|
+
# 'skip_attachment_extensions': skip_attachment_extensions,
|
1483
|
+
# }
|
1484
|
+
#
|
1485
|
+
# # Load documents using _base_loader
|
1486
|
+
# docs = self._base_loader(**loader_params)
|
1487
|
+
#
|
1488
|
+
# if not docs:
|
1489
|
+
# return "No Jira issues found matching the specified criteria."
|
1490
|
+
#
|
1491
|
+
# docs = list(docs) # Convert generator to list for logging and indexing
|
1492
|
+
# logger.info(f"Loaded {len(docs)} Jira issues for indexing")
|
1493
|
+
#
|
1494
|
+
# # Index the documents
|
1495
|
+
# result = vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
|
1496
|
+
#
|
1497
|
+
# return f"Successfully indexed {len(docs)} Jira issues. {result}"
|
1498
|
+
#
|
1499
|
+
# except Exception as e:
|
1500
|
+
# logger.error(f"Error indexing Jira issues: {str(e)}")
|
1501
|
+
# raise ToolException(f"Error indexing Jira issues: {str(e)}")
|
1502
|
+
|
1503
|
+
@extend_with_vector_tools
|
1224
1504
|
def get_available_tools(self):
|
1225
1505
|
return [
|
1226
1506
|
{
|
@@ -15,7 +15,13 @@ def get_tools(tool):
|
|
15
15
|
client_id=tool['settings'].get('client_id', None),
|
16
16
|
client_secret=tool['settings'].get('client_secret', None),
|
17
17
|
toolkit_name=tool.get('toolkit_name'),
|
18
|
-
llm=tool['settings'].get('llm')
|
18
|
+
llm=tool['settings'].get('llm'),
|
19
|
+
# indexer settings
|
20
|
+
connection_string=tool['settings'].get('connection_string', None),
|
21
|
+
collection_name=f"{tool.get('toolkit_name')}_{str(tool['id'])}",
|
22
|
+
embedding_model="HuggingFaceEmbeddings",
|
23
|
+
embedding_model_params={"model_name": "sentence-transformers/all-MiniLM-L6-v2"},
|
24
|
+
vectorstore_type="PGVector")
|
19
25
|
.get_tools())
|
20
26
|
|
21
27
|
|
@@ -33,6 +39,10 @@ class SharepointToolkit(BaseToolkit):
|
|
33
39
|
client_id=(str, Field(description="Client ID")),
|
34
40
|
client_secret=(SecretStr, Field(description="Client Secret", json_schema_extra={'secret': True})),
|
35
41
|
selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
|
42
|
+
# indexer settings
|
43
|
+
connection_string = (Optional[SecretStr], Field(description="Connection string for vectorstore",
|
44
|
+
default=None,
|
45
|
+
json_schema_extra={'secret': True})),
|
36
46
|
__config__=ConfigDict(json_schema_extra={
|
37
47
|
'metadata': {
|
38
48
|
"label": "Sharepoint", "icon_url": "sharepoint.svg",
|
@@ -1,17 +1,15 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
|
-
from typing import Optional, List,
|
3
|
+
from typing import Optional, List, Generator
|
4
4
|
|
5
|
-
from
|
6
|
-
from ..utils.content_parser import parse_file_content
|
5
|
+
from langchain_core.documents import Document
|
7
6
|
from langchain_core.tools import ToolException
|
8
7
|
from office365.runtime.auth.client_credential import ClientCredential
|
9
8
|
from office365.sharepoint.client_context import ClientContext
|
10
9
|
from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
|
11
10
|
|
12
|
-
from ..elitea_base import
|
13
|
-
from
|
14
|
-
from langchain_core.documents import Document
|
11
|
+
from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
|
12
|
+
from ..utils.content_parser import parse_file_content
|
15
13
|
|
16
14
|
NoInput = create_model(
|
17
15
|
"NoInput"
|
@@ -39,15 +37,6 @@ ReadDocument = create_model(
|
|
39
37
|
default=None))
|
40
38
|
)
|
41
39
|
|
42
|
-
indexData = create_model(
|
43
|
-
"indexData",
|
44
|
-
__base__=BaseIndexParams,
|
45
|
-
progress_step=(Optional[int], Field(default=None, ge=0, le=100,
|
46
|
-
description="Optional step size for progress reporting during indexing")),
|
47
|
-
clean_index=(Optional[bool], Field(default=False,
|
48
|
-
description="Optional flag to enforce clean existing index before indexing new data")),
|
49
|
-
)
|
50
|
-
|
51
40
|
|
52
41
|
class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
|
53
42
|
site_url: str
|
@@ -56,13 +45,6 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
56
45
|
token: SecretStr = None
|
57
46
|
_client: Optional[ClientContext] = PrivateAttr() # Private attribute for the office365 client
|
58
47
|
|
59
|
-
llm: Any = None
|
60
|
-
connection_string: Optional[SecretStr] = None
|
61
|
-
collection_name: Optional[str] = None
|
62
|
-
embedding_model: Optional[str] = "HuggingFaceEmbeddings"
|
63
|
-
embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
|
64
|
-
vectorstore_type: Optional[str] = "PGVector"
|
65
|
-
|
66
48
|
@model_validator(mode='before')
|
67
49
|
@classmethod
|
68
50
|
def validate_toolkit(cls, values):
|
@@ -143,7 +125,11 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
143
125
|
logging.error(f"Failed to load files from sharepoint: {e}")
|
144
126
|
return ToolException("Can not get files. Please, double check folder name and read permissions.")
|
145
127
|
|
146
|
-
def read_file(self, path,
|
128
|
+
def read_file(self, path,
|
129
|
+
is_capture_image: bool = False,
|
130
|
+
page_number: int = None,
|
131
|
+
sheet_name: str = None,
|
132
|
+
excel_by_sheets: bool = False):
|
147
133
|
""" Reads file located at the specified server-relative path. """
|
148
134
|
try:
|
149
135
|
file = self._client.web.get_file_by_server_relative_path(path)
|
@@ -159,9 +145,10 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
159
145
|
is_capture_image=is_capture_image,
|
160
146
|
page_number=page_number,
|
161
147
|
sheet_name=sheet_name,
|
148
|
+
excel_by_sheets=excel_by_sheets,
|
162
149
|
llm=self.llm)
|
163
150
|
|
164
|
-
def _base_loader(self) -> List[Document]:
|
151
|
+
def _base_loader(self, **kwargs) -> List[Document]:
|
165
152
|
try:
|
166
153
|
all_files = self.get_files_list()
|
167
154
|
except Exception as e:
|
@@ -170,35 +157,24 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
170
157
|
docs: List[Document] = []
|
171
158
|
for file in all_files:
|
172
159
|
metadata = {
|
173
|
-
("
|
160
|
+
("updated_on" if k == "Modified" else k): str(v)
|
174
161
|
for k, v in file.items()
|
175
162
|
}
|
176
163
|
docs.append(Document(page_content="", metadata=metadata))
|
177
164
|
return docs
|
178
165
|
|
179
|
-
def index_data(self,
|
180
|
-
collection_suffix: str = '',
|
181
|
-
progress_step: int = None,
|
182
|
-
clean_index: bool = False):
|
183
|
-
docs = self._base_loader()
|
184
|
-
embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
|
185
|
-
vs = self._init_vector_store(collection_suffix, embeddings=embedding)
|
186
|
-
return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
|
187
|
-
|
188
166
|
def _process_document(self, document: Document) -> Generator[Document, None, None]:
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
yield document
|
201
|
-
|
167
|
+
page_content = self.read_file(document.metadata['Path'], is_capture_image=True, excel_by_sheets=True)
|
168
|
+
if isinstance(page_content, dict):
|
169
|
+
for key, value in page_content.items():
|
170
|
+
metadata = document.metadata
|
171
|
+
metadata['page'] = key
|
172
|
+
yield Document(page_content=str(value), metadata=metadata)
|
173
|
+
else:
|
174
|
+
document.page_content = json.dumps(str(page_content))
|
175
|
+
yield document
|
176
|
+
|
177
|
+
@extend_with_vector_tools
|
202
178
|
def get_available_tools(self):
|
203
179
|
return [
|
204
180
|
{
|
@@ -218,11 +194,5 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
218
194
|
"description": self.read_file.__doc__,
|
219
195
|
"args_schema": ReadDocument,
|
220
196
|
"ref": self.read_file
|
221
|
-
},
|
222
|
-
{
|
223
|
-
"name": "index_data",
|
224
|
-
"ref": self.index_data,
|
225
|
-
"description": self.index_data.__doc__,
|
226
|
-
"args_schema": indexData,
|
227
197
|
}
|
228
198
|
]
|
@@ -51,6 +51,10 @@ class TestrailToolkit(BaseToolkit):
|
|
51
51
|
),
|
52
52
|
email=(str, Field(description="User's email", json_schema_extra={'configuration': True})),
|
53
53
|
password=(SecretStr, Field(description="User's password", json_schema_extra={'secret': True, 'configuration': True})),
|
54
|
+
# indexer settings
|
55
|
+
connection_string=(Optional[SecretStr], Field(description="Connection string for vectorstore",
|
56
|
+
default=None,
|
57
|
+
json_schema_extra={'secret': True})),
|
54
58
|
selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
|
55
59
|
__config__=ConfigDict(json_schema_extra={'metadata':
|
56
60
|
{"label": "Testrail", "icon_url": "testrail-icon.svg",
|