alita-sdk 0.3.172__py3-none-any.whl → 0.3.173__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,10 +46,10 @@ class AzureDevOpsReposToolkit(BaseToolkit):
46
46
  AzureDevOpsReposToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
47
47
  m = create_model(
48
48
  name,
49
- organization_url=(Optional[str], Field(default="", title="Organization URL", description="ADO organization url")),
50
- project=(Optional[str], Field(default="", title="Project", description="ADO project")),
51
- repository_id=(Optional[str], Field(default="", title="Repository ID", description="ADO repository ID", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': AzureDevOpsReposToolkit.toolkit_max_length})),
52
- token=(Optional[SecretStr], Field(default="", title="Token", description="ADO token", json_schema_extra={'secret': True})),
49
+ organization_url=(Optional[str], Field(default="", title="Organization URL", description="ADO organization url", json_schema_extra={'configuration': True})),
50
+ project=(Optional[str], Field(default="", title="Project", description="ADO project", json_schema_extra={'configuration': True})),
51
+ repository_id=(Optional[str], Field(default="", title="Repository ID", description="ADO repository ID", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': AzureDevOpsReposToolkit.toolkit_max_length, 'configuration': True})),
52
+ token=(Optional[SecretStr], Field(default="", title="Token", description="ADO token", json_schema_extra={'secret': True, 'configuration': True})),
53
53
  base_branch=(Optional[str], Field(default="", title="Base branch", description="ADO base branch (e.g., main)")),
54
54
  active_branch=(Optional[str], Field(default="", title="Active branch", description="ADO active branch (e.g., main)")),
55
55
 
@@ -3,9 +3,10 @@ from typing import List, Optional, Literal
3
3
  from langchain_core.tools import BaseTool, BaseToolkit
4
4
  from pydantic import create_model, BaseModel, Field, SecretStr
5
5
 
6
+ import requests
6
7
  from .test_plan_wrapper import TestPlanApiWrapper
7
8
  from ...base.tool import BaseAction
8
- from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
9
+ from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, check_connection_response
9
10
 
10
11
 
11
12
  name = "azure_devops_plans"
@@ -20,12 +21,12 @@ class AzureDevOpsPlansToolkit(BaseToolkit):
20
21
  def toolkit_config_schema() -> BaseModel:
21
22
  selected_tools = {x['name']: x['args_schema'].schema() for x in TestPlanApiWrapper.model_construct().get_available_tools()}
22
23
  AzureDevOpsPlansToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
23
- return create_model(
24
+ m = create_model(
24
25
  name_alias,
25
26
  name=(str, Field(description="Toolkit name", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': AzureDevOpsPlansToolkit.toolkit_max_length})),
26
- organization_url=(str, Field(description="ADO organization url")),
27
+ organization_url=(str, Field(description="ADO organization url", json_schema_extra={'configuration': True})),
27
28
  limit=(Optional[int], Field(description="ADO plans limit used for limitation of the list with results", default=5)),
28
- token=(SecretStr, Field(description="ADO token", json_schema_extra={'secret': True})),
29
+ token=(SecretStr, Field(description="ADO token", json_schema_extra={'secret': True, 'configuration': True})),
29
30
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
30
31
  __config__={'json_schema_extra': {'metadata':
31
32
  {
@@ -49,6 +50,18 @@ class AzureDevOpsPlansToolkit(BaseToolkit):
49
50
  }
50
51
  )
51
52
 
53
+ @check_connection_response
54
+ def check_connection(self):
55
+ response = requests.get(
56
+ f'{self.organization_url}/{self.project}/_apis/testplan/plans?api-version=7.0',
57
+ headers = {'Authorization': f'Bearer {self.token}'},
58
+ timeout=5
59
+ )
60
+ return response
61
+
62
+ m.check_connection = check_connection
63
+ return m
64
+
52
65
  @classmethod
53
66
  def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
54
67
  from os import environ
@@ -3,8 +3,9 @@ from .ado_wrapper import AzureDevOpsApiWrapper # Import the API wrapper for Azu
3
3
  from langchain_core.tools import BaseTool, BaseToolkit
4
4
  from pydantic import create_model, BaseModel, Field, SecretStr
5
5
 
6
+ import requests
6
7
  from ...base.tool import BaseAction
7
- from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
8
+ from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, check_connection_response
8
9
 
9
10
  name = "azure_devops_wiki"
10
11
  name_alias = 'ado_wiki'
@@ -17,16 +18,16 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
17
18
  def toolkit_config_schema() -> BaseModel:
18
19
  selected_tools = {x['name']: x['args_schema'].schema() for x in AzureDevOpsApiWrapper.model_construct().get_available_tools()}
19
20
  AzureDevOpsWikiToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
20
- return create_model(
21
+ m = create_model(
21
22
  name_alias,
22
23
  name=(str, Field(description="Toolkit name",
23
24
  json_schema_extra={
24
25
  'toolkit_name': True,
25
26
  'max_toolkit_length': AzureDevOpsWikiToolkit.toolkit_max_length})
26
27
  ),
27
- organization_url=(str, Field(description="ADO organization url")),
28
- project=(str, Field(description="ADO project", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': AzureDevOpsWikiToolkit.toolkit_max_length})),
29
- token=(SecretStr, Field(description="ADO token", json_schema_extra={'secret': True})),
28
+ organization_url=(str, Field(description="ADO organization url", json_schema_extra={'configuration': True})),
29
+ project=(str, Field(description="ADO project", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': AzureDevOpsWikiToolkit.toolkit_max_length, 'configuration': True})),
30
+ token=(SecretStr, Field(description="ADO token", json_schema_extra={'secret': True, 'configuration': True})),
30
31
  selected_tools=(List[Literal[tuple(selected_tools)]],
31
32
  Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
32
33
  __config__={
@@ -52,6 +53,18 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
52
53
  }
53
54
  )
54
55
 
56
+ @check_connection_response
57
+ def check_connection(self):
58
+ response = requests.get(
59
+ f'{self.organization_url}/{self.project}/_apis/wiki/wikis?api-version=7.0',
60
+ headers={'Authorization': f'Bearer {self.token}'},
61
+ timeout=5
62
+ )
63
+ return response
64
+
65
+ m.check_connection = check_connection
66
+ return m
67
+
55
68
  @classmethod
56
69
  def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
57
70
  from os import environ
@@ -3,8 +3,9 @@ from .ado_wrapper import AzureDevOpsApiWrapper # Import the API wrapper for Azu
3
3
  from langchain_core.tools import BaseTool, BaseToolkit
4
4
  from pydantic import create_model, BaseModel, Field, SecretStr
5
5
 
6
+ import requests
6
7
  from ...base.tool import BaseAction
7
- from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
8
+ from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, check_connection_response
8
9
 
9
10
  name = "ado_boards"
10
11
 
@@ -16,16 +17,16 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
16
17
  def toolkit_config_schema() -> BaseModel:
17
18
  selected_tools = {x['name']: x['args_schema'].schema() for x in AzureDevOpsApiWrapper.model_construct().get_available_tools()}
18
19
  AzureDevOpsWorkItemsToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
19
- return create_model(
20
+ m = create_model(
20
21
  name,
21
22
  name=(str, Field(description="Toolkit name",
22
23
  json_schema_extra={
23
24
  'toolkit_name': True,
24
25
  'max_toolkit_length': AzureDevOpsWorkItemsToolkit.toolkit_max_length})
25
26
  ),
26
- organization_url=(str, Field(description="ADO organization url")),
27
- project=(str, Field(description="ADO project", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': AzureDevOpsWorkItemsToolkit.toolkit_max_length})),
28
- token=(SecretStr, Field(description="ADO token", json_schema_extra={'secret': True})),
27
+ organization_url=(str, Field(description="ADO organization url", json_schema_extra={'configuration': True})),
28
+ project=(str, Field(description="ADO project", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': AzureDevOpsWorkItemsToolkit.toolkit_max_length, 'configuration': True})),
29
+ token=(SecretStr, Field(description="ADO token", json_schema_extra={'secret': True, 'configuration': True})),
29
30
  limit=(Optional[int], Field(description="ADO plans limit used for limitation of the list with results", default=5)),
30
31
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
31
32
  __config__={
@@ -51,6 +52,18 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
51
52
  }
52
53
  )
53
54
 
55
+ @check_connection_response
56
+ def check_connection(self):
57
+ response = requests.get(
58
+ f'{self.organization_url}/{self.project}/_apis/wit/workitemtypes?api-version=7.0',
59
+ headers={'Authorization': f'Bearer {self.token}'},
60
+ timeout=5
61
+ )
62
+ return response
63
+
64
+ m.check_connection = check_connection
65
+ return m
66
+
54
67
  @classmethod
55
68
  def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
56
69
  from os import environ
@@ -4,7 +4,8 @@ from .api_wrapper import ConfluenceAPIWrapper
4
4
  from langchain_core.tools import BaseTool
5
5
  from ..base.tool import BaseAction
6
6
  from pydantic import create_model, BaseModel, ConfigDict, Field, SecretStr
7
- from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, parse_list
7
+ from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, parse_list, check_connection_response
8
+ import requests
8
9
 
9
10
  name = "confluence"
10
11
 
@@ -43,7 +44,21 @@ class ConfluenceToolkit(BaseToolkit):
43
44
  selected_tools = {x['name']: x['args_schema'].schema() for x in
44
45
  ConfluenceAPIWrapper.model_construct().get_available_tools()}
45
46
  ConfluenceToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
46
- return create_model(
47
+
48
+ @check_connection_response
49
+ def check_connection(self):
50
+ url = self.base_url.rstrip('/') + '/wiki/rest/api/space'
51
+ headers = {'Accept': 'application/json'}
52
+ auth = None
53
+ if self.token:
54
+ headers['Authorization'] = f'Bearer {self.token.get_secret_value()}'
55
+ elif self.username and self.api_key:
56
+ auth = (self.username, self.api_key.get_secret_value())
57
+ else:
58
+ raise ValueError('Confluence connection requires either token or username+api_key')
59
+ response = requests.get(url, headers=headers, auth=auth, timeout=5, verify=getattr(self, 'verify_ssl', True))
60
+ return response
61
+ model = create_model(
47
62
  name,
48
63
  base_url=(str, Field(description="Confluence URL", json_schema_extra={'configuration': True, 'configuration_title': True})),
49
64
  token=(SecretStr, Field(description="Token", default=None, json_schema_extra={'secret': True, 'configuration': True})),
@@ -92,6 +107,8 @@ class ConfluenceToolkit(BaseToolkit):
92
107
  }
93
108
  })
94
109
  )
110
+ model.check_connection = check_connection
111
+ return model
95
112
 
96
113
  @classmethod
97
114
  def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
@@ -163,7 +163,7 @@ pageId = create_model(
163
163
  indexPagesParams = create_model(
164
164
  "indexPagesParams",
165
165
  __base__=BaseIndexParams,
166
- content_format=(Literal['view', 'storage', 'export_view', 'editor', 'anonymous'],
166
+ content_format=(Literal['view', 'storage', 'export_view', 'editor', 'anonymous'],
167
167
  Field(description="The format of the content to be retrieved.")),
168
168
  ### Loader Parameters
169
169
  page_ids=(Optional[List[str]], Field(description="List of page IDs to retrieve.", default=None)),
@@ -196,6 +196,13 @@ GetPageWithImageDescriptions = create_model(
196
196
  default=500))
197
197
  )
198
198
 
199
+ GetPageAttachmentsInput = create_model(
200
+ "GetPageAttachmentsInput",
201
+ page_id=(str, Field(description="Confluence page ID from which attachments will be retrieved")),
202
+ max_content_length=(int, Field(default=10000, description="Maximum number of characters to return for attachment content. Content will be truncated if longer. Default is 10000.")),
203
+ custom_prompt=(Optional[str], Field(default=None, description="Custom prompt to use for LLM-based analysis of attachments (images, pdfs, etc). If not provided, a default prompt will be used.")),
204
+ )
205
+
199
206
 
200
207
  def parse_payload_params(params: Optional[str]) -> Dict[str, Any]:
201
208
  if params:
@@ -710,6 +717,7 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
710
717
  metadata=metadata,
711
718
  )
712
719
 
720
+ # todo: refactor this method since file processing is not working (self.process_pdf, self.process_image, etc. are not defined)
713
721
  def process_attachment(
714
722
  self,
715
723
  page_id: str,
@@ -893,7 +901,7 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
893
901
  from alita_sdk.langchain.interfaces.llm_processor import get_embeddings
894
902
  except ImportError:
895
903
  from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
896
-
904
+
897
905
  loader_params = {
898
906
  'url': self.base_url,
899
907
  'space_key': self.space,
@@ -919,7 +927,7 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
919
927
  embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
920
928
 
921
929
  chunker = chunkers.get(chunking_tool)
922
-
930
+
923
931
  chunking_config = chunking_config or {}
924
932
 
925
933
  if chunker:
@@ -929,7 +937,7 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
929
937
  # Set required fields that should come from the instance
930
938
  chunking_config['embedding'] = embedding
931
939
  chunking_config['llm'] = self.llm
932
-
940
+
933
941
  try:
934
942
  # Validate the configuration using the appropriate Pydantic model
935
943
  validated_config = config_model(**chunking_config)
@@ -941,13 +949,13 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
941
949
  # Fallback for chunkers without models
942
950
  chunking_config['embedding'] = embedding
943
951
  chunking_config['llm'] = self.llm
944
-
952
+
945
953
  documents = chunker(documents, chunking_config)
946
-
954
+
947
955
  # passing embedding to avoid re-initialization
948
- vectorstore = self._init_vector_store(collection_suffix, embeddings=embedding)
956
+ vectorstore = self._init_vector_store(collection_suffix, embeddings=embedding)
949
957
  return vectorstore.index_documents(documents)
950
-
958
+
951
959
 
952
960
  def _download_image(self, image_url):
953
961
  """
@@ -1094,7 +1102,7 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
1094
1102
  """
1095
1103
  return """
1096
1104
  ## Image Analysis Task:
1097
- Analyze this image in detail, paying special attention to contextual information provided about it.
1105
+ Analyze this image in detail, paying special attention to contextual information provided about it.
1098
1106
  Focus on:
1099
1107
  1. Visual elements and their arrangement
1100
1108
  2. Any text visible in the image
@@ -1159,14 +1167,32 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
1159
1167
  return cached_description
1160
1168
 
1161
1169
  try:
1162
- from io import BytesIO
1163
- from PIL import Image, UnidentifiedImageError
1164
-
1165
1170
  # Get the LLM instance
1166
1171
  llm = self.llm
1167
1172
  if not llm:
1168
1173
  return "[LLM not available for image processing]"
1169
1174
 
1175
+ # If image_data is empty or None, do text-only analysis
1176
+ if not image_data:
1177
+ prompt = custom_prompt if custom_prompt else self._get_default_image_analysis_prompt()
1178
+ if image_name or context_text:
1179
+ prompt += "\n\n## Additional Context Information:\n"
1180
+ if image_name:
1181
+ prompt += f"- Image Name/Reference: {image_name}\n"
1182
+ if context_text:
1183
+ prompt += f"- Surrounding Content: {context_text}\n"
1184
+ prompt += "\nPlease incorporate this contextual information in your description when relevant."
1185
+ result = llm.invoke([
1186
+ HumanMessage(
1187
+ content=[{"type": "text", "text": prompt}]
1188
+ )
1189
+ ])
1190
+ description = result.content
1191
+ self._image_cache.set(image_data, description, image_name)
1192
+ return description
1193
+
1194
+ from io import BytesIO
1195
+ from PIL import Image, UnidentifiedImageError
1170
1196
  # Try to load and validate the image with PIL
1171
1197
  try:
1172
1198
  bio = BytesIO(image_data)
@@ -1402,6 +1428,251 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
1402
1428
  logger.error(f"Error processing page with images: {stacktrace}")
1403
1429
  return f"Error processing page with images: {str(e)}"
1404
1430
 
1431
+ def get_page_attachments(self, page_id: str, max_content_length: int = 10000, custom_prompt: str = None):
1432
+ """
1433
+ Retrieve all attachments for a Confluence page, including core metadata (with creator, created, updated), comments,
1434
+ file content, and LLM-based analysis for supported types.
1435
+ Returns a list of dicts, each with keys: metadata, comments, content, llm_analysis.
1436
+ """
1437
+ try:
1438
+ attachments = self.client.get_attachments_from_content(page_id)
1439
+ if not attachments or not attachments.get('results'):
1440
+ return f"No attachments found for page ID {page_id}."
1441
+
1442
+ # Get attachment history for created/updated info
1443
+ history_map = {}
1444
+ for attachment in attachments['results']:
1445
+ try:
1446
+ hist = self.client.history(attachment['id'])
1447
+ history_map[attachment['id']] = hist
1448
+ except Exception as e:
1449
+ logger.warning(f"Failed to fetch history for attachment {attachment.get('title', '')}: {str(e)}")
1450
+ history_map[attachment['id']] = None
1451
+
1452
+ results = []
1453
+ for attachment in attachments['results']:
1454
+ media_type = attachment.get('metadata', {}).get('mediaType', '')
1455
+ # Core metadata extraction with history
1456
+ hist = history_map.get(attachment['id']) or {}
1457
+ created_by = hist.get('createdBy', {}).get('displayName', '') if hist else attachment.get('creator', {}).get('displayName', '')
1458
+ created_date = hist.get('createdDate', '') if hist else attachment.get('created', '')
1459
+ last_updated = hist.get('lastUpdated', {}).get('when', '') if hist else ''
1460
+ metadata = {
1461
+ 'name': attachment.get('title', ''),
1462
+ 'size': attachment.get('extensions', {}).get('fileSize', None),
1463
+ 'creator': created_by,
1464
+ 'created': created_date,
1465
+ 'updated': last_updated,
1466
+ 'media_type': media_type,
1467
+ 'labels': [label['name'] for label in attachment.get('metadata', {}).get('labels', {}).get('results', [])],
1468
+ 'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get('_links', {}).get('download') else None
1469
+ }
1470
+ # Fetch comments for the attachment
1471
+ comments = []
1472
+ try:
1473
+ comments_response = self.client.get_comments_for_attachment(attachment['id'])
1474
+ if comments_response and 'results' in comments_response:
1475
+ for comment in comments_response['results']:
1476
+ comments.append({
1477
+ 'id': comment.get('id'),
1478
+ 'author': comment.get('creator', {}).get('displayName', ''),
1479
+ 'created': comment.get('created', ''),
1480
+ 'body': comment.get('body', {}).get('storage', {}).get('value', '')
1481
+ })
1482
+ except Exception as e:
1483
+ logger.warning(f"Failed to fetch comments for attachment {attachment.get('title', '')}: {str(e)}")
1484
+
1485
+ content = None
1486
+ llm_analysis = None
1487
+ title = attachment.get('title', '')
1488
+ download_url = self.base_url.rstrip('/') + attachment['_links']['download']
1489
+
1490
+ # --- Begin: Raw content for xml, json, markdown, txt ---
1491
+ # Check by media type or file extension
1492
+ file_ext = title.lower().split('.')[-1] if '.' in title else ''
1493
+ is_text_type = (
1494
+ media_type in [
1495
+ 'application/xml', 'text/xml',
1496
+ 'application/json', 'text/json',
1497
+ 'text/markdown', 'text/x-markdown',
1498
+ 'text/plain', 'text/csv',
1499
+ 'text/html', 'image/svg+xml',
1500
+ 'application/vnd.ms-excel',
1501
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
1502
+ 'application/msword',
1503
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
1504
+ 'application/vnd.ms-excel.sheet.macroEnabled.12',
1505
+ 'application/csv', 'application/x-csv',
1506
+ 'text/x-csv',
1507
+ 'application/doc', 'application/docx',
1508
+ 'application/xls', 'application/xlsx',
1509
+ 'application/svg', 'application/html',
1510
+ ]
1511
+ or file_ext in [
1512
+ 'xml', 'json', 'md', 'markdown', 'txt',
1513
+ 'csv', 'xls', 'xlsx', 'svg', 'html', 'htm', 'doc', 'docx'
1514
+ ]
1515
+ )
1516
+ if is_text_type:
1517
+ try:
1518
+ resp = self.client.request(method="GET", path=download_url[len(self.base_url):], advanced_mode=True)
1519
+ if resp.status_code == 200:
1520
+ # Try utf-8, fallback to latin1
1521
+ try:
1522
+ content = resp.content.decode('utf-8')
1523
+ except UnicodeDecodeError:
1524
+ content = resp.content.decode('latin1')
1525
+ else:
1526
+ content = f"[Failed to download: HTTP {resp.status_code}]"
1527
+ except Exception as e:
1528
+ content = f"[Error downloading content: {str(e)}]"
1529
+
1530
+ # For some types, try to extract text if possible
1531
+ if file_ext in ['doc', 'docx']:
1532
+ try:
1533
+ import io
1534
+ if file_ext == 'docx':
1535
+ try:
1536
+ from docx import Document as DocxDocument
1537
+ docx_file = io.BytesIO(resp.content)
1538
+ doc = DocxDocument(docx_file)
1539
+ paragraphs = [p.text for p in doc.paragraphs]
1540
+ content = '\n'.join(paragraphs)
1541
+ except Exception as e:
1542
+ content = f"[Error extracting docx: {str(e)}]"
1543
+ elif file_ext == 'doc':
1544
+ try:
1545
+ import textract
1546
+ content = textract.process(None, extension='doc', input_data=resp.content).decode('utf-8')
1547
+ except Exception as e:
1548
+ content = f"[Error extracting doc: {str(e)}]"
1549
+ except ImportError:
1550
+ content = "[python-docx or textract not installed for doc/docx extraction]"
1551
+ elif file_ext in ['csv']:
1552
+ try:
1553
+ import io
1554
+ import csv
1555
+ csv_file = io.StringIO(content)
1556
+ reader = csv.reader(csv_file)
1557
+ content = '\n'.join([', '.join(row) for row in reader])
1558
+ except Exception as e:
1559
+ content = f"[Error extracting csv: {str(e)}]"
1560
+ elif file_ext in ['xls', 'xlsx']:
1561
+ try:
1562
+ import io
1563
+ import pandas as pd
1564
+ excel_file = io.BytesIO(resp.content)
1565
+ df = pd.read_excel(excel_file, sheet_name=None)
1566
+ content = ''
1567
+ for sheet, data in df.items():
1568
+ content += f"\n--- Sheet: {sheet} ---\n"
1569
+ content += data.to_csv(index=False)
1570
+ except Exception as e:
1571
+ content = f"[Error extracting xls/xlsx: {str(e)}]"
1572
+ elif file_ext in ['svg'] or media_type == 'image/svg+xml':
1573
+ # SVG is XML, so just return as text
1574
+ pass
1575
+ elif file_ext in ['html', 'htm'] or media_type in ['text/html', 'application/html']:
1576
+ try:
1577
+ from bs4 import BeautifulSoup
1578
+ soup = BeautifulSoup(content, 'html.parser')
1579
+ content = soup.get_text(separator=' ', strip=True)
1580
+ except Exception as e:
1581
+ content = f"[Error extracting html: {str(e)}]"
1582
+
1583
+ # Truncate content if longer than max_content_length
1584
+ if content and isinstance(content, str) and len(content) > max_content_length:
1585
+ content = content[:max_content_length] + f"\n...[truncated, showing first {max_content_length} characters]"
1586
+
1587
+ # No LLM analysis for these types
1588
+ results.append({
1589
+ 'metadata': metadata,
1590
+ 'comments': comments,
1591
+ 'content': content,
1592
+ 'llm_analysis': llm_analysis
1593
+ })
1594
+ continue
1595
+ # --- End: Raw content for xml, json, markdown, txt ---
1596
+
1597
+ # Download content for supported types
1598
+ if media_type.startswith('image/') or media_type == 'application/pdf' or media_type.startswith('application/vnd.jgraph.mxfile'):
1599
+ if media_type == 'application/pdf':
1600
+ try:
1601
+ from pdf2image import convert_from_bytes
1602
+ except ImportError:
1603
+ logger.warning("pdf2image is not installed. Please install it to process PDF attachments.")
1604
+ llm_analysis = '[pdf2image not installed]'
1605
+ image_data = None
1606
+ else:
1607
+ image_data = self._download_image(download_url)
1608
+ if image_data:
1609
+ try:
1610
+ pdf_images = convert_from_bytes(image_data)
1611
+ llm_analysis = []
1612
+ for idx, img in enumerate(pdf_images):
1613
+ from io import BytesIO
1614
+ img_buffer = BytesIO()
1615
+ img.save(img_buffer, format='PNG')
1616
+ img_buffer.seek(0)
1617
+ page_context = f"Attachment: {title} (type: {media_type}, page {idx+1})"
1618
+ page_analysis = self._process_image_with_llm(img_buffer.getvalue(), f"{title} (page {idx+1})", page_context, custom_prompt)
1619
+ llm_analysis.append(page_analysis)
1620
+ llm_analysis = '\n'.join(llm_analysis)
1621
+ except Exception as e:
1622
+ logger.error(f"Failed to process PDF pages: {str(e)}")
1623
+ llm_analysis = f"[Error processing PDF: {str(e)}]"
1624
+ else:
1625
+ content = None
1626
+ llm_analysis = None
1627
+ elif media_type.startswith('application/vnd.jgraph.mxfile'):
1628
+ # Handle drawio (mxfile): base64 decode & decompress, then run LLM
1629
+ image_data = self._download_image(download_url)
1630
+ if image_data:
1631
+ try:
1632
+ import xml.etree.ElementTree as ET
1633
+ import base64
1634
+ import zlib
1635
+ xml_str = image_data.decode("utf-8")
1636
+ root = ET.fromstring(xml_str)
1637
+ diagram_node = root.find("diagram")
1638
+ if diagram_node is not None and diagram_node.text:
1639
+ diagram_base64 = diagram_node.text
1640
+ else:
1641
+ diagram_base64 = None
1642
+
1643
+ compressed = base64.b64decode(diagram_base64)
1644
+ xml_bytes = zlib.decompress(compressed, -15)
1645
+ xml_string = xml_bytes.decode('utf-8')
1646
+
1647
+ # Use LLM to analyze the diagram XML string as text
1648
+ context_text = f"Attachment: {title} (type: {media_type})\nDrawio XML Content: {xml_string[:2000]}" # Limit context for LLM
1649
+ llm_analysis = self._process_image_with_llm(b"", title, context_text, custom_prompt) # Pass empty image, just analyze text
1650
+ except Exception as e:
1651
+ logger.error(f"Failed to convert drawio to image: {str(e)}")
1652
+ llm_analysis = f"[Error processing drawio: {str(e)}]"
1653
+ else:
1654
+ content = None
1655
+ llm_analysis = None
1656
+ else:
1657
+ image_data = self._download_image(download_url)
1658
+ if image_data:
1659
+ context_text = f"Attachment: {title} (type: {media_type})"
1660
+ llm_analysis = self._process_image_with_llm(image_data, title, context_text, custom_prompt)
1661
+
1662
+ if llm_analysis and isinstance(llm_analysis, str) and len(llm_analysis) > max_content_length:
1663
+ llm_analysis = llm_analysis[:max_content_length] + f"\n...[truncated, showing first {max_content_length} characters]"
1664
+
1665
+ results.append({
1666
+ 'metadata': metadata,
1667
+ 'comments': comments,
1668
+ 'content': content,
1669
+ 'llm_analysis': llm_analysis
1670
+ })
1671
+ return results
1672
+ except Exception as e:
1673
+ logger.error(f"Error retrieving attachments for page {page_id}: {str(e)}")
1674
+ return f"Error retrieving attachments: {str(e)}"
1675
+
1405
1676
  def get_available_tools(self):
1406
1677
  # Confluence-specific tools
1407
1678
  confluence_tools = [
@@ -1519,10 +1790,16 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
1519
1790
  "ref": self.index_data,
1520
1791
  "description": self.index_data.__doc__,
1521
1792
  "args_schema": indexPagesParams,
1793
+ },
1794
+ {
1795
+ "name": "get_page_attachments",
1796
+ "ref": self.get_page_attachments,
1797
+ "description": self.get_page_attachments.__doc__,
1798
+ "args_schema": GetPageAttachmentsInput,
1522
1799
  }
1523
1800
  ]
1524
-
1801
+
1525
1802
  # Add standardized vector search tools from base class
1526
1803
  vector_search_tools = self._get_vector_search_tools()
1527
-
1804
+
1528
1805
  return confluence_tools + vector_search_tools
@@ -3,8 +3,9 @@ from .api_wrapper import JiraApiWrapper
3
3
  from langchain_core.tools import BaseTool, BaseToolkit
4
4
  from ..base.tool import BaseAction
5
5
  from pydantic import create_model, BaseModel, ConfigDict, Field, SecretStr
6
+ import requests
6
7
 
7
- from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, parse_list
8
+ from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, parse_list, check_connection_response
8
9
 
9
10
  name = "jira"
10
11
 
@@ -33,7 +34,22 @@ class JiraToolkit(BaseToolkit):
33
34
  def toolkit_config_schema() -> BaseModel:
34
35
  selected_tools = {x['name']: x['args_schema'].schema() for x in JiraApiWrapper.model_construct().get_available_tools()}
35
36
  JiraToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
36
- return create_model(
37
+
38
+ @check_connection_response
39
+ def check_connection(self):
40
+ url = self.base_url.rstrip('/') + '/rest/api/2/myself'
41
+ headers = {'Accept': 'application/json'}
42
+ auth = None
43
+ if self.token:
44
+ headers['Authorization'] = f'Bearer {self.token.get_secret_value()}'
45
+ elif self.username and self.api_key:
46
+ auth = (self.username, self.api_key.get_secret_value())
47
+ else:
48
+ raise ValueError('Jira connection requires either token or username+api_key')
49
+ response = requests.get(url, headers=headers, auth=auth, timeout=5, verify=getattr(self, 'verify_ssl', True))
50
+ return response
51
+
52
+ model = create_model(
37
53
  name,
38
54
  base_url=(
39
55
  str,
@@ -84,6 +100,8 @@ class JiraToolkit(BaseToolkit):
84
100
  }
85
101
  })
86
102
  )
103
+ model.check_connection = check_connection
104
+ return model
87
105
 
88
106
  @classmethod
89
107
  def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.172
3
+ Version: 0.3.173
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -133,13 +133,13 @@ alita_sdk/tools/__init__.py,sha256=48DhEi14KkaYhNb-KvXuM9XJ4WGC-v9sRcWfN7GFWd4,9
133
133
  alita_sdk/tools/elitea_base.py,sha256=NQaIxPX6DVIerHCb18jwUR6maZxxk73NZaTsFHkBQWE,21119
134
134
  alita_sdk/tools/ado/__init__.py,sha256=mD6GHcYMTtffPJkJvFPe2rzvye_IRmXmWfI7xYuZhO4,912
135
135
  alita_sdk/tools/ado/utils.py,sha256=PTCludvaQmPLakF2EbCGy66Mro4-rjDtavVP-xcB2Wc,1252
136
- alita_sdk/tools/ado/repos/__init__.py,sha256=Wro7xo1GcpX4M4yPBqDKFFE2wCCVzj0HyusdAmloxKM,5686
136
+ alita_sdk/tools/ado/repos/__init__.py,sha256=-sMK94fLgtIAGoP26wXs466kF6AiZhdLDdr_--o4ELU,5818
137
137
  alita_sdk/tools/ado/repos/repos_wrapper.py,sha256=_OWKAls7VFfFtEPTwqj_DxE1MSvpC0ivxdTIULEz3Tk,48206
138
- alita_sdk/tools/ado/test_plan/__init__.py,sha256=ctn2CUkH_xS0Wkv2gLrO3FHQ36BvNCc0VcpMuddvU8E,3404
138
+ alita_sdk/tools/ado/test_plan/__init__.py,sha256=PmVBwOQExgDmiywQEL9D29hbX3fC1pJd6bROnyBOu_k,3916
139
139
  alita_sdk/tools/ado/test_plan/test_plan_wrapper.py,sha256=oIvVhLUMP5ZGctoAtK6sU0y6Si9gNv9-mbLqcWtw3gY,12525
140
- alita_sdk/tools/ado/wiki/__init__.py,sha256=92AIAXVYSEYKnNvEG08W2YmR2lC35Bn92CiXJ8T3vpA,3736
140
+ alita_sdk/tools/ado/wiki/__init__.py,sha256=6WRwKBznxXP5vSDpPT6i5njPgPsrUUOJ8U1qeH3wtGk,4265
141
141
  alita_sdk/tools/ado/wiki/ado_wrapper.py,sha256=l4bc2QoKSUXg9UqNcx0ylv7YL9JPPQd35Ti5MXyEgC4,12690
142
- alita_sdk/tools/ado/work_item/__init__.py,sha256=HcPX18wODKCn8oCnhs0DRXYKaRxiQ6GvEGgd1emHud8,3832
142
+ alita_sdk/tools/ado/work_item/__init__.py,sha256=kLIFw7w_3ygqqPjzxWJ8cC9cbOxNl_yfxjhI7RGs1Ys,4368
143
143
  alita_sdk/tools/ado/work_item/ado_wrapper.py,sha256=t0D9xubU0yy_JmRJ_zEtRCxwFLyanT1StbIrtHGaqpw,26108
144
144
  alita_sdk/tools/advanced_jira_mining/__init__.py,sha256=pUTzECqGvYaR5qWY3JPUhrImrZgc7pCXuqSe5eWIE80,4604
145
145
  alita_sdk/tools/advanced_jira_mining/data_mining_wrapper.py,sha256=nZPtuwVWp8VeHw1B8q9kdwf-6ZvHnlXTOGdcIMDkKpw,44211
@@ -216,8 +216,8 @@ alita_sdk/tools/code/linter/api_wrapper.py,sha256=wylpwhAw02Jt8L18CqBq2He5PbwIkx
216
216
  alita_sdk/tools/code/loaders/codesearcher.py,sha256=XoXXZtIQZhvjIwZlnl_4wVGHC-3saYzFo5oDR_Zh3EY,529
217
217
  alita_sdk/tools/code/sonar/__init__.py,sha256=u8wpgXJ_shToLl3G9-XEtGDor5dhmsnurIImh1-e-U0,3165
218
218
  alita_sdk/tools/code/sonar/api_wrapper.py,sha256=nNqxcWN_6W8c0ckj-Er9HkNuAdgQLoWBXh5UyzNutis,2653
219
- alita_sdk/tools/confluence/__init__.py,sha256=ySsU3XzjQhSdl-XG1z_9H-Vmie-kqIY4_4yvf1NPdXQ,6088
220
- alita_sdk/tools/confluence/api_wrapper.py,sha256=-0NYvc_vCdNYSVcBq3UP4YN5s3y7h78KhYABMWy3Hxw,72281
219
+ alita_sdk/tools/confluence/__init__.py,sha256=MkAUfSgQTZ6lVOHErwFggA4v_ctD9ADhhq6dKIALko0,6922
220
+ alita_sdk/tools/confluence/api_wrapper.py,sha256=nCU4wAh3f6EN3NVXya3BVZQX47mi7JYAzylxJJOvMMc,88725
221
221
  alita_sdk/tools/confluence/loader.py,sha256=aHqgdIQMqkyRry8feHAhyd-a_ASEyW3JrV6epTRG6-c,9162
222
222
  alita_sdk/tools/confluence/utils.py,sha256=Lxo6dBD0OlvM4o0JuK6qeB_4LV9BptiwJA9e1vqNcDw,435
223
223
  alita_sdk/tools/custom_open_api/__init__.py,sha256=9aT5SPNPWcJC6jMZEM-3rUCXVULj_3-qJLQKmnreKNo,2537
@@ -244,7 +244,7 @@ alita_sdk/tools/gmail/gmail_wrapper.py,sha256=t0IYM3zb77Ub8o9kv6HugNm_OoG5tN9T73
244
244
  alita_sdk/tools/gmail/utils.py,sha256=cu6pbSsyMIr1BQOSs9et1rbAkk-Z_u48PB9FtJwFhUs,448
245
245
  alita_sdk/tools/google_places/__init__.py,sha256=mHKc7u9P2gqGDzqqJNQC9qiZYEm5gncnM_1XjtrM17o,3152
246
246
  alita_sdk/tools/google_places/api_wrapper.py,sha256=7nZly6nk4f4Tm7s2MVdnnwlb-1_WHRrDhyjDiqoyPjA,4674
247
- alita_sdk/tools/jira/__init__.py,sha256=rHfbnECLMCK0DvPvnYNKnz0ZalbpeLMXnsQl7hDuMcA,5151
247
+ alita_sdk/tools/jira/__init__.py,sha256=es7-lRkd3MPwR4rXWTQTFI_yfP2hRoeD8oJD5SbhJkk,5978
248
248
  alita_sdk/tools/jira/api_wrapper.py,sha256=i0TIHhVnh44dAVTt6RWrZ4o9hyhSMfVkYtk6FB3D2zA,62035
249
249
  alita_sdk/tools/keycloak/__init__.py,sha256=0WB9yXMUUAHQRni1ghDEmd7GYa7aJPsTVlZgMCM9cQ0,3050
250
250
  alita_sdk/tools/keycloak/api_wrapper.py,sha256=cOGr0f3S3-c6tRDBWI8wMnetjoNSxiV5rvC_0VHb8uw,3100
@@ -326,8 +326,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=UHVQUVqcBc3SZvDfO78HSuBzwAsRw
326
326
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=rq4jOb3lRW2GXvAguk4H1KinO5f-zpygzhBJf-E1Ucw,2773
327
327
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=iOMxyE7vOc_LwFB_nBMiSFXkNtvbptA4i-BrTlo7M0A,5854
328
328
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=IYUJoMFOMA70knLhLtAnuGoy3OK80RuqeQZ710oyIxE,3631
329
- alita_sdk-0.3.172.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
330
- alita_sdk-0.3.172.dist-info/METADATA,sha256=m184rG7yffSAsOEtfvcaqDMj6Xc01NvHefXrFTtYS7E,18757
331
- alita_sdk-0.3.172.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
332
- alita_sdk-0.3.172.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
333
- alita_sdk-0.3.172.dist-info/RECORD,,
329
+ alita_sdk-0.3.173.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
330
+ alita_sdk-0.3.173.dist-info/METADATA,sha256=XTue8f1fKyX4zdPdddEyLCNsSDbFo-VdK7pLuIbZ5mI,18757
331
+ alita_sdk-0.3.173.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
332
+ alita_sdk-0.3.173.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
333
+ alita_sdk-0.3.173.dist-info/RECORD,,