alita-sdk 0.3.271__py3-none-any.whl → 0.3.273__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. alita_sdk/configurations/__init__.py +10 -0
  2. alita_sdk/configurations/ado.py +4 -2
  3. alita_sdk/configurations/azure_search.py +1 -1
  4. alita_sdk/configurations/bigquery.py +1 -1
  5. alita_sdk/configurations/browser.py +18 -0
  6. alita_sdk/configurations/carrier.py +19 -0
  7. alita_sdk/configurations/delta_lake.py +1 -1
  8. alita_sdk/configurations/google_places.py +17 -0
  9. alita_sdk/configurations/postman.py +1 -1
  10. alita_sdk/configurations/qtest.py +1 -3
  11. alita_sdk/configurations/report_portal.py +19 -0
  12. alita_sdk/configurations/salesforce.py +19 -0
  13. alita_sdk/configurations/service_now.py +1 -12
  14. alita_sdk/configurations/sharepoint.py +19 -0
  15. alita_sdk/configurations/sonar.py +18 -0
  16. alita_sdk/configurations/sql.py +20 -0
  17. alita_sdk/configurations/testio.py +18 -0
  18. alita_sdk/configurations/zephyr_essential.py +18 -0
  19. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
  20. alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +19 -6
  21. alita_sdk/runtime/langchain/document_loaders/ImageParser.py +17 -0
  22. alita_sdk/runtime/tools/vectorstore.py +25 -9
  23. alita_sdk/runtime/tools/vectorstore_base.py +4 -1
  24. alita_sdk/tools/aws/delta_lake/__init__.py +2 -2
  25. alita_sdk/tools/azure_ai/search/__init__.py +1 -1
  26. alita_sdk/tools/base_indexer_toolkit.py +8 -8
  27. alita_sdk/tools/bitbucket/__init__.py +1 -1
  28. alita_sdk/tools/browser/__init__.py +14 -10
  29. alita_sdk/tools/carrier/__init__.py +11 -11
  30. alita_sdk/tools/code/sonar/__init__.py +10 -7
  31. alita_sdk/tools/confluence/__init__.py +1 -1
  32. alita_sdk/tools/elitea_base.py +9 -8
  33. alita_sdk/tools/figma/__init__.py +1 -1
  34. alita_sdk/tools/github/__init__.py +2 -2
  35. alita_sdk/tools/gitlab_org/__init__.py +1 -1
  36. alita_sdk/tools/google/bigquery/__init__.py +1 -1
  37. alita_sdk/tools/google_places/__init__.py +10 -5
  38. alita_sdk/tools/jira/__init__.py +1 -1
  39. alita_sdk/tools/jira/api_wrapper.py +140 -99
  40. alita_sdk/tools/qtest/__init__.py +1 -1
  41. alita_sdk/tools/rally/__init__.py +1 -1
  42. alita_sdk/tools/report_portal/__init__.py +9 -8
  43. alita_sdk/tools/salesforce/__init__.py +9 -8
  44. alita_sdk/tools/servicenow/__init__.py +1 -1
  45. alita_sdk/tools/sharepoint/__init__.py +5 -7
  46. alita_sdk/tools/slack/__init__.py +1 -1
  47. alita_sdk/tools/sql/__init__.py +9 -11
  48. alita_sdk/tools/testio/__init__.py +9 -6
  49. alita_sdk/tools/utils/content_parser.py +59 -24
  50. alita_sdk/tools/xray/api_wrapper.py +60 -101
  51. alita_sdk/tools/zephyr_enterprise/__init__.py +1 -1
  52. alita_sdk/tools/zephyr_essential/__init__.py +5 -4
  53. alita_sdk/tools/zephyr_essential/api_wrapper.py +42 -10
  54. alita_sdk/tools/zephyr_scale/__init__.py +1 -1
  55. {alita_sdk-0.3.271.dist-info → alita_sdk-0.3.273.dist-info}/METADATA +1 -1
  56. {alita_sdk-0.3.271.dist-info → alita_sdk-0.3.273.dist-info}/RECORD +59 -48
  57. {alita_sdk-0.3.271.dist-info → alita_sdk-0.3.273.dist-info}/WHEEL +0 -0
  58. {alita_sdk-0.3.271.dist-info → alita_sdk-0.3.273.dist-info}/licenses/LICENSE +0 -0
  59. {alita_sdk-0.3.271.dist-info → alita_sdk-0.3.273.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,13 @@
1
1
  import logging
2
2
  from typing import Dict, List, Optional, Literal
3
3
  from langchain_core.tools import BaseToolkit, BaseTool
4
- from pydantic import create_model, BaseModel, ConfigDict, Field, SecretStr
4
+ from pydantic import create_model, BaseModel, ConfigDict, Field
5
5
  from functools import lru_cache
6
6
 
7
7
  from .api_wrapper import CarrierAPIWrapper
8
8
  from .tools import __all__
9
9
  from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
10
+ from ...configurations.carrier import CarrierConfiguration
10
11
 
11
12
  logger = logging.getLogger(__name__)
12
13
 
@@ -27,12 +28,8 @@ class AlitaCarrierToolkit(BaseToolkit):
27
28
  cls.toolkit_max_length = get_max_toolkit_length(selected_tools)
28
29
  return create_model(
29
30
  name,
30
- url=(str, Field(description="Carrier Platform Base URL")),
31
- organization=(str, Field(description="Carrier Organization Name", json_schema_extra={'toolkit_name': True,
32
- 'max_toolkit_length': cls.toolkit_max_length})),
33
- private_token=(
34
- SecretStr, Field(description="Carrier Platform Authentication Token", json_schema_extra={'secret': True})),
35
31
  project_id=(Optional[str], Field(None, description="Optional project ID for scoped operations")),
32
+ carrier_configuration=(CarrierConfiguration, Field(description="Carrier Configuration", json_schema_extra={'configuration_types': ['carrier']})),
36
33
  selected_tools=(
37
34
  List[Literal[tuple(selected_tools)]],
38
35
  Field(default=[], json_schema_extra={"args_schemas": selected_tools}),
@@ -58,10 +55,15 @@ class AlitaCarrierToolkit(BaseToolkit):
58
55
  selected_tools = selected_tools or []
59
56
  logger.info(f"[AlitaCarrierToolkit] Initializing toolkit with selected tools: {selected_tools}")
60
57
 
58
+ wrapper_payload = {
59
+ **kwargs,
60
+ **kwargs.get('carrier_configuration', {}),
61
+ }
62
+
61
63
  try:
62
- carrier_api_wrapper = CarrierAPIWrapper(**kwargs)
64
+ carrier_api_wrapper = CarrierAPIWrapper(**wrapper_payload)
63
65
  logger.info(
64
- f"[AlitaCarrierToolkit] CarrierAPIWrapper initialized successfully with URL: {kwargs.get('url')}")
66
+ f"[AlitaCarrierToolkit] CarrierAPIWrapper initialized successfully with URL: {wrapper_payload.get('url')}")
65
67
  except Exception as e:
66
68
  logger.exception(f"[AlitaCarrierToolkit] Error initializing CarrierAPIWrapper: {e}")
67
69
  raise ValueError(f"CarrierAPIWrapper initialization error: {e}")
@@ -92,9 +94,7 @@ class AlitaCarrierToolkit(BaseToolkit):
92
94
  def get_tools(tool_config: Dict) -> List[BaseTool]:
93
95
  return AlitaCarrierToolkit.get_toolkit(
94
96
  selected_tools=tool_config.get('selected_tools', []),
95
- url=tool_config['settings']['url'],
96
97
  project_id=tool_config['settings'].get('project_id'),
97
- organization=tool_config['settings']['organization'],
98
- private_token=tool_config['settings']['private_token'],
98
+ carrier_configuration=tool_config['settings']['carrier_configuration'],
99
99
  toolkit_name=tool_config.get('toolkit_name')
100
100
  ).get_tools()
@@ -1,19 +1,19 @@
1
1
  from typing import List, Literal, Optional
2
2
  from langchain_core.tools import BaseToolkit, BaseTool
3
- from pydantic import create_model, BaseModel, ConfigDict, Field, SecretStr
3
+ from pydantic import create_model, BaseModel, ConfigDict, Field
4
4
 
5
5
  from .api_wrapper import SonarApiWrapper
6
6
  from ...base.tool import BaseAction
7
7
  from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
8
+ from ....configurations.sonar import SonarConfiguration
8
9
 
9
10
  name = "sonar"
10
11
 
11
12
  def get_tools(tool):
12
13
  return SonarToolkit().get_toolkit(
13
14
  selected_tools=tool['settings'].get('selected_tools', []),
14
- url=tool['settings']['url'],
15
- sonar_token=tool['settings']['sonar_token'],
16
15
  sonar_project_name=tool['settings']['sonar_project_name'],
16
+ sonar_configuration=tool['settings']['sonar_configuration'],
17
17
  toolkit_name=tool.get('toolkit_name')
18
18
  ).get_tools()
19
19
 
@@ -28,9 +28,8 @@ class SonarToolkit(BaseToolkit):
28
28
  SonarToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
29
29
  return create_model(
30
30
  name,
31
- url=(str, Field(description="SonarQube Server URL", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': SonarToolkit.toolkit_max_length})),
32
- sonar_token=(SecretStr, Field(description="SonarQube user token for authentication", json_schema_extra={'secret': True})),
33
- sonar_project_name=(str, Field(description="Project name of the desired repository")),
31
+ sonar_project_name=(str, Field(description="Project name of the desired repository", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': SonarToolkit.toolkit_max_length})),
32
+ sonar_configuration=(SonarConfiguration, Field(description="Sonar Configuration", json_schema_extra={'configuration_types': ['sonar']})),
34
33
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
35
34
  __config__=ConfigDict(json_schema_extra=
36
35
  {
@@ -47,7 +46,11 @@ class SonarToolkit(BaseToolkit):
47
46
  def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
48
47
  if selected_tools is None:
49
48
  selected_tools = []
50
- sonar_api_wrapper = SonarApiWrapper(**kwargs)
49
+ wrapper_payload = {
50
+ **kwargs,
51
+ **kwargs.get('sonar_configuration', {}),
52
+ }
53
+ sonar_api_wrapper = SonarApiWrapper(**wrapper_payload)
51
54
  available_tools = sonar_api_wrapper.get_available_tools()
52
55
  tools = []
53
56
  prefix = clean_string(toolkit_name, SonarToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
@@ -81,7 +81,7 @@ class ConfluenceToolkit(BaseToolkit):
81
81
  max_retry_seconds=(int, Field(description="Max retry, sec", default=60)),
82
82
  # optional field for custom headers as dictionary
83
83
  custom_headers=(Optional[dict], Field(description="Custom headers for API requests", default=None)),
84
- confluence_configuration=(Optional[ConfluenceConfiguration], Field(description="Confluence Configuration", json_schema_extra={'configuration_types': ['confluence']})),
84
+ confluence_configuration=(ConfluenceConfiguration, Field(description="Confluence Configuration", json_schema_extra={'configuration_types': ['confluence']})),
85
85
  pgvector_configuration=(Optional[PgVectorConfiguration], Field(default = None,
86
86
  description="PgVector Configuration",
87
87
  json_schema_extra={'configuration_types': ['pgvector']})),
@@ -90,22 +90,23 @@ BaseStepbackSearchParams = create_model(
90
90
  )),
91
91
  cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
92
92
  search_top=(Optional[int], Field(description="Number of top results to return", default=10, ge=0)),
93
- reranker=(Optional[dict], Field(
94
- description="Reranker configuration. Can be a dictionary with reranking parameters.",
95
- default={}
96
- )),
97
93
  full_text_search=(Optional[Dict[str, Any]], Field(
98
94
  description="Full text search parameters. Can be a dictionary with search options.",
99
95
  default=None
100
96
  )),
101
- reranking_config=(Optional[Dict[str, Dict[str, Any]]], Field(
102
- description="Reranking configuration. Can be a dictionary with reranking settings.",
103
- default=None
104
- )),
105
97
  extended_search=(Optional[List[str]], Field(
106
98
  description="List of additional fields to include in the search results.",
107
99
  default=None
108
100
  )),
101
+ reranker=(Optional[dict], Field(
102
+ description="Reranker configuration. Can be a dictionary with reranking parameters.",
103
+ default={}
104
+ )),
105
+ reranking_config=(Optional[Dict[str, Dict[str, Any]]], Field(
106
+ description="Reranking configuration. Can be a dictionary with reranking settings.",
107
+ default=None
108
+ )),
109
+
109
110
  )
110
111
 
111
112
  BaseIndexDataParams = create_model(
@@ -53,7 +53,7 @@ class FigmaToolkit(BaseToolkit):
53
53
  Field(default=[], json_schema_extra={"args_schemas": selected_tools}),
54
54
  ),
55
55
  # Figma configuration
56
- figma_configuration=(Optional[FigmaConfiguration], Field(description="Figma configuration", json_schema_extra={'configuration_types': ['figma']})),
56
+ figma_configuration=(FigmaConfiguration, Field(description="Figma configuration", json_schema_extra={'configuration_types': ['figma']})),
57
57
 
58
58
  # indexer settings
59
59
  pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector Configuration", json_schema_extra={'configuration_types': ['pgvector']})),
@@ -57,8 +57,8 @@ class AlitaGitHubToolkit(BaseToolkit):
57
57
  },
58
58
  }
59
59
  ),
60
- github_configuration=(Optional[GithubConfiguration], Field(description="Github configuration", default=None,
61
- json_schema_extra={'configuration_types': ['github']})),
60
+ github_configuration=(GithubConfiguration, Field(description="Github configuration",
61
+ json_schema_extra={'configuration_types': ['github']})),
62
62
  pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector configuration", default=None,
63
63
  json_schema_extra={'configuration_types': ['pgvector']})),
64
64
  repository=(str, Field(description="Github repository", json_schema_extra={'toolkit_name': True,
@@ -30,7 +30,7 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
30
30
  name,
31
31
  name=(str, Field(description="Toolkit name", json_schema_extra={'toolkit_name': True,
32
32
  'max_toolkit_length': AlitaGitlabSpaceToolkit.toolkit_max_length})),
33
- gitlab_configuration=(Optional[GitlabConfiguration], Field(description="GitLab configuration",
33
+ gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration",
34
34
  json_schema_extra={
35
35
  'configuration_types': ['gitlab']})),
36
36
  repositories=(str, Field(
@@ -46,7 +46,7 @@ class BigQueryToolkitConfig(BaseModel):
46
46
  }
47
47
  }
48
48
 
49
- bigquery_configuration: Optional[BigQueryConfiguration] = Field(
49
+ bigquery_configuration: BigQueryConfiguration = Field(
50
50
  description="BigQuery configuration", json_schema_extra={"configuration_types": ["bigquery"]}
51
51
  )
52
52
  selected_tools: List[str] = Field(
@@ -1,19 +1,20 @@
1
1
  from typing import List, Literal, Optional
2
2
  from langchain_core.tools import BaseToolkit, BaseTool
3
- from pydantic import create_model, BaseModel, ConfigDict, SecretStr
3
+ from pydantic import create_model, BaseModel, ConfigDict
4
4
  from pydantic.fields import Field
5
5
 
6
6
  from .api_wrapper import GooglePlacesAPIWrapper
7
7
  from ..base.tool import BaseAction
8
8
  from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
9
+ from ...configurations.google_places import GooglePlacesConfiguration
9
10
 
10
11
  name = "google_places"
11
12
 
12
13
  def get_tools(tool):
13
14
  return GooglePlacesToolkit().get_toolkit(
14
15
  selected_tools=tool['settings'].get('selected_tools', []),
15
- api_key=tool['settings']['api_key'],
16
16
  results_count=tool['settings'].get('results_count'),
17
+ google_places_configuration=tool['settings']['google_places_configuration'],
17
18
  toolkit_name=tool.get('toolkit_name')
18
19
  ).get_tools()
19
20
 
@@ -28,8 +29,8 @@ class GooglePlacesToolkit(BaseToolkit):
28
29
  GooglePlacesToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
29
30
  return create_model(
30
31
  name,
31
- api_key=(SecretStr, Field(description="Google Places API key", json_schema_extra={'secret': True, 'max_toolkit_length': GooglePlacesToolkit.toolkit_max_length})),
32
- results_count=(Optional[int], Field(description="Results number to show", default=None)),
32
+ results_count=(Optional[int], Field(description="Results number to show", default=None, json_schema_extra={'toolkit_name': True, 'max_toolkit_length': GooglePlacesToolkit.toolkit_max_length})),
33
+ google_places_configuration=(GooglePlacesConfiguration, Field(description="Google Places Configuration", json_schema_extra={'configuration_types': ['google_places']})),
33
34
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
34
35
  __config__=ConfigDict(json_schema_extra=
35
36
  {
@@ -47,7 +48,11 @@ class GooglePlacesToolkit(BaseToolkit):
47
48
  def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
48
49
  if selected_tools is None:
49
50
  selected_tools = []
50
- google_places_api_wrapper = GooglePlacesAPIWrapper(**kwargs)
51
+ wrapper_payload = {
52
+ **kwargs,
53
+ **kwargs.get('google_places_configuration', {}),
54
+ }
55
+ google_places_api_wrapper = GooglePlacesAPIWrapper(**wrapper_payload)
51
56
  prefix = clean_string(toolkit_name, GooglePlacesToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
52
57
  available_tools = google_places_api_wrapper.get_available_tools()
53
58
  tools = []
@@ -75,7 +75,7 @@ class JiraToolkit(BaseToolkit):
75
75
  custom_headers=(Optional[dict], Field(description="Custom headers for API requests", default=None)),
76
76
  verify_ssl=(bool, Field(description="Verify SSL", default=True)),
77
77
  additional_fields=(Optional[str], Field(description="Additional fields", default="")),
78
- jira_configuration=(Optional[JiraConfiguration], Field(description="Jira Configuration", json_schema_extra={'configuration_types': ['jira']})),
78
+ jira_configuration=(JiraConfiguration, Field(description="Jira Configuration", json_schema_extra={'configuration_types': ['jira']})),
79
79
  pgvector_configuration=(Optional[PgVectorConfiguration], Field(default=None,
80
80
  description="PgVector Configuration", json_schema_extra={'configuration_types': ['pgvector']})),
81
81
  # embedder settings
@@ -4,7 +4,7 @@ import re
4
4
  import traceback
5
5
  from json import JSONDecodeError
6
6
  from traceback import format_exc
7
- from typing import List, Optional, Any, Dict, Generator
7
+ from typing import List, Optional, Any, Dict, Generator, Literal
8
8
  import os
9
9
 
10
10
  from atlassian import Jira
@@ -15,8 +15,9 @@ import requests
15
15
 
16
16
  from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
17
17
  from ..llm.img_utils import ImageDescriptionCache
18
+ from ..non_code_indexer_toolkit import NonCodeIndexerToolkit
18
19
  from ..utils import is_cookie_token, parse_cookie_string
19
- from ..utils.content_parser import parse_file_content, load_content_from_bytes
20
+ from ..utils.content_parser import load_file_docs
20
21
  from ...runtime.utils.utils import IndexerKeywords
21
22
 
22
23
  logger = logging.getLogger(__name__)
@@ -391,7 +392,7 @@ def process_search_response(jira_url, response, payload_params: Dict[str, Any] =
391
392
 
392
393
  return str(processed_issues)
393
394
 
394
- class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
395
+ class JiraApiWrapper(NonCodeIndexerToolkit):
395
396
  base_url: str
396
397
  api_version: Optional[str] = "2",
397
398
  api_key: Optional[SecretStr] = None,
@@ -442,7 +443,7 @@ class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
442
443
  cls._client._update_header(header, value)
443
444
 
444
445
  cls.llm=values.get('llm')
445
- return values
446
+ return super().validate_toolkit(values)
446
447
 
447
448
  def _parse_issues(self, issues: Dict) -> List[dict]:
448
449
  parsed = []
@@ -721,8 +722,8 @@ class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
721
722
  return parsed_projects_str
722
723
  except Exception:
723
724
  stacktrace = format_exc()
724
- logger.error(f"Error creating Jira issue: {stacktrace}")
725
- return ToolException(f"Error creating Jira issue: {stacktrace}")
725
+ logger.error(f"Error listing Jira projects: {stacktrace}")
726
+ return ToolException(f"Error listing Jira projects: {stacktrace}")
726
727
 
727
728
  def get_attachments_content(self, jira_issue_key: str):
728
729
  """ Extract content of all attachments related to specified Jira issue key.
@@ -1118,103 +1119,119 @@ class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
1118
1119
  logger.error(f"Error processing field with images: {stacktrace}")
1119
1120
  return f"Error processing field with images: {str(e)}"
1120
1121
 
1121
- def get_comments_with_image_descriptions(self, jira_issue_key: str, prompt: Optional[str] = None, context_radius: int = 500):
1122
- """
1123
- Get all comments from Jira issue and augment any images in them with textual descriptions.
1124
-
1125
- This method will:
1126
- 1. Extract all comments from the specified Jira issue
1127
- 2. Detect images in each comment
1128
- 3. Retrieve and process each image with an LLM, providing surrounding context
1129
- 4. Replace image references with the generated text descriptions
1122
+ def process_image_match(self, match, body, attachment_resolver, context_radius=500, prompt=None):
1123
+ """Process each image reference and get its contextual description"""
1124
+ image_ref = match.group(1)
1125
+ full_match = match.group(0) # The complete image reference with markers
1130
1126
 
1131
- Args:
1132
- jira_issue_key: The Jira issue key to retrieve comments from (e.g., 'TEST-1234')
1133
- prompt: Custom prompt for the LLM when analyzing images. If None, a default prompt will be used.
1134
- context_radius: Number of characters to include before and after each image for context. Default is 500.
1127
+ logger.info(f"Processing image reference: {image_ref} (full match: {full_match})")
1135
1128
 
1136
- Returns:
1137
- The comments with image references replaced with contextual descriptions
1138
- """
1139
1129
  try:
1140
- # Retrieve all comments for the issue
1141
- comments = self._client.issue_get_comments(jira_issue_key)
1142
-
1143
- if not comments or not comments.get('comments'):
1144
- return f"No comments found for issue '{jira_issue_key}'"
1130
+ # Use the AttachmentResolver to find the attachment
1131
+ attachment = attachment_resolver.find_attachment(image_ref)
1132
+
1133
+ if not attachment:
1134
+ logger.warning(f"Could not find attachment for reference: {image_ref}")
1135
+ if image_ref.startswith("http://") or image_ref.startswith("https://"):
1136
+ content_url = image_ref
1137
+ image_name = image_ref.split("/")[-1] # Extract the name from the URL
1138
+ response = requests.get(content_url, timeout=10)
1139
+ response.raise_for_status()
1140
+ image_data = response.content
1141
+ else:
1142
+ logger.error(f"Invalid image reference: {image_ref}")
1143
+ return f"[Image: {image_ref} - attachment not found]"
1144
+ else:
1145
+ # Get the content URL and download the image
1146
+ content_url = attachment.get('content')
1147
+ if not content_url:
1148
+ logger.error(f"No content URL found in attachment: {attachment}")
1149
+ return f"[Image: {image_ref} - no content URL]"
1145
1150
 
1146
- processed_comments = []
1151
+ image_name = attachment.get('filename', image_ref)
1147
1152
 
1148
- # Create an AttachmentResolver to efficiently handle attachment lookups
1149
- attachment_resolver = AttachmentResolver(self._client, jira_issue_key)
1153
+ # Download the image data
1154
+ logger.info(f"Downloading image from URL: {content_url}")
1155
+ image_data = self._download_attachment(content_url)
1150
1156
 
1151
- # Regular expression to find image references in Jira markup
1152
- image_pattern = r'!([^!|]+)(?:\|[^!]*)?!'
1157
+ if not image_data:
1158
+ logger.error(f"Failed to download image from URL: {content_url}")
1159
+ return f"[Image: {image_ref} - download failed]"
1153
1160
 
1154
- # Process each comment
1155
- for comment in comments['comments']:
1156
- comment_body = comment.get('body', '')
1157
- if not comment_body:
1158
- continue
1161
+ # Collect surrounding content
1162
+ context_text = self._collect_context_for_image(body, full_match, context_radius)
1159
1163
 
1160
- comment_author = comment.get('author', {}).get('displayName', 'Unknown')
1161
- comment_created = comment.get('created', 'Unknown date')
1164
+ # Process with LLM (will use cache if available)
1165
+ description = self._process_image_with_llm(image_data, image_name, context_text, prompt)
1166
+ return f"[Image {image_name} Description: {description}]"
1162
1167
 
1163
- # Function to process images in comment text
1164
- def process_image_match(match):
1165
- """Process each image reference and get its contextual description"""
1166
- image_ref = match.group(1)
1167
- full_match = match.group(0) # The complete image reference with markers
1168
+ except Exception as e:
1169
+ logger.error(f"Error retrieving attachment {image_ref}: {str(e)}")
1170
+ return f"[Image: {image_ref} - Error: {str(e)}]"
1168
1171
 
1169
- logger.info(f"Processing image reference: {image_ref} (full match: {full_match})")
1172
+ def get_processed_comments_list_with_image_description(self, jira_issue_key: str, prompt: Optional[str] = None, context_radius: int = 500):
1173
+ # Retrieve all comments for the issue
1174
+ comments = self._client.issue_get_comments(jira_issue_key)
1170
1175
 
1171
- try:
1172
- # Use the AttachmentResolver to find the attachment
1173
- attachment = attachment_resolver.find_attachment(image_ref)
1176
+ if not comments or not comments.get('comments'):
1177
+ return []
1174
1178
 
1175
- if not attachment:
1176
- logger.warning(f"Could not find attachment for reference: {image_ref}")
1177
- return f"[Image: {image_ref} - attachment not found]"
1179
+ processed_comments = []
1178
1180
 
1179
- # Get the content URL and download the image
1180
- content_url = attachment.get('content')
1181
- if not content_url:
1182
- logger.error(f"No content URL found in attachment: {attachment}")
1183
- return f"[Image: {image_ref} - no content URL]"
1181
+ # Create an AttachmentResolver to efficiently handle attachment lookups
1182
+ attachment_resolver = AttachmentResolver(self._client, jira_issue_key)
1184
1183
 
1185
- image_name = attachment.get('filename', image_ref)
1184
+ # Regular expression to find image references in Jira markup
1185
+ image_pattern = r'!([^!|]+)(?:\|[^!]*)?!'
1186
1186
 
1187
- # Collect surrounding content
1188
- context_text = self._collect_context_for_image(comment_body, full_match, context_radius)
1187
+ # Process each comment
1188
+ for comment in comments['comments']:
1189
+ comment_body = comment.get('body', '')
1190
+ if not comment_body:
1191
+ continue
1189
1192
 
1190
- # Download the image data
1191
- logger.info(f"Downloading image from URL: {content_url}")
1192
- image_data = self._download_attachment(content_url)
1193
+ comment_author = comment.get('author', {}).get('displayName', 'Unknown')
1194
+ comment_created = comment.get('created', 'Unknown date')
1193
1195
 
1194
- if not image_data:
1195
- logger.error(f"Failed to download image from URL: {content_url}")
1196
- return f"[Image: {image_ref} - download failed]"
1196
+ # Process the comment body by replacing image references with descriptions
1197
+ processed_body = re.sub(image_pattern,
1198
+ lambda match: self.process_image_match(match, comment_body, attachment_resolver, context_radius, prompt),
1199
+ comment_body)
1197
1200
 
1198
- # Process with LLM (will use cache if available)
1199
- description = self._process_image_with_llm(image_data, image_name, context_text, prompt)
1200
- return f"[Image {image_name} Description: {description}]"
1201
+ # Add the processed comment to our results
1202
+ processed_comments.append({
1203
+ "author": comment_author,
1204
+ "created": comment_created,
1205
+ "id": comment.get('id'),
1206
+ "original_content": comment_body,
1207
+ "processed_content": processed_body
1208
+ })
1209
+ return processed_comments
1201
1210
 
1202
- except Exception as e:
1203
- logger.error(f"Error retrieving attachment {image_ref}: {str(e)}")
1204
- return f"[Image: {image_ref} - Error: {str(e)}]"
1211
+ def get_comments_with_image_descriptions(self, jira_issue_key: str, prompt: Optional[str] = None, context_radius: int = 500):
1212
+ """
1213
+ Get all comments from Jira issue and augment any images in them with textual descriptions.
1205
1214
 
1206
- # Process the comment body by replacing image references with descriptions
1207
- processed_body = re.sub(image_pattern, process_image_match, comment_body)
1215
+ This method will:
1216
+ 1. Extract all comments from the specified Jira issue
1217
+ 2. Detect images in each comment
1218
+ 3. Retrieve and process each image with an LLM, providing surrounding context
1219
+ 4. Replace image references with the generated text descriptions
1208
1220
 
1209
- # Add the processed comment to our results
1210
- processed_comments.append({
1211
- "author": comment_author,
1212
- "created": comment_created,
1213
- "id": comment.get('id'),
1214
- "original_content": comment_body,
1215
- "processed_content": processed_body
1216
- })
1221
+ Args:
1222
+ jira_issue_key: The Jira issue key to retrieve comments from (e.g., 'TEST-1234')
1223
+ prompt: Custom prompt for the LLM when analyzing images. If None, a default prompt will be used.
1224
+ context_radius: Number of characters to include before and after each image for context. Default is 500.
1217
1225
 
1226
+ Returns:
1227
+ The comments with image references replaced with contextual descriptions
1228
+ """
1229
+ try:
1230
+ processed_comments = self.get_processed_comments_list_with_image_description(jira_issue_key=jira_issue_key,
1231
+ prompt=prompt,
1232
+ context_radius=context_radius)
1233
+ if not processed_comments:
1234
+ return f"No comments found for issue '{jira_issue_key}'"
1218
1235
  # Format the output
1219
1236
  result = f"Comments from issue '{jira_issue_key}' with image descriptions:\n\n"
1220
1237
  for idx, comment in enumerate(processed_comments, 1):
@@ -1243,6 +1260,7 @@ class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
1243
1260
  self._skipped_attachment_extensions = kwargs.get('skip_attachment_extensions', [])
1244
1261
  self._include_attachments = kwargs.get('include_attachments', False)
1245
1262
  self._included_fields = fields_to_extract.copy() if fields_to_extract else []
1263
+ self._include_comments = kwargs.get('include_comments', True)
1246
1264
 
1247
1265
  try:
1248
1266
  # Prepare fields to extract
@@ -1285,6 +1303,18 @@ class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
1285
1303
  logger.error(f"Error loading Jira issues: {str(e)}")
1286
1304
  raise ToolException(f"Unable to load Jira issues: {str(e)}")
1287
1305
 
1306
+ def _extend_data(self, documents: Generator[Document, None, None]):
1307
+ image_pattern = r'!([^!|]+)(?:\|[^!]*)?!'
1308
+ for doc in documents:
1309
+ attachment_resolver = AttachmentResolver(self._client, doc.metadata['issue_key'])
1310
+ processed_content = re.sub(image_pattern,
1311
+ lambda match: self.process_image_match(match,
1312
+ doc.page_content,
1313
+ attachment_resolver),
1314
+ doc.page_content)
1315
+ doc.page_content = processed_content
1316
+ yield doc
1317
+
1288
1318
  def _process_document(self, base_document: Document) -> Generator[Document, None, None]:
1289
1319
  """
1290
1320
  Process a base document to extract and index Jira issues extra fields: comments, attachments, etc..
@@ -1306,21 +1336,36 @@ class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
1306
1336
  except Exception as e:
1307
1337
  logger.error(f"Failed to download attachment {attachment['filename']} for issue {issue_key}: {str(e)}")
1308
1338
  attachment_content = self._client.get(path=f"secure/attachment/{attachment['id']}/{attachment['filename']}", not_json_response=True)
1309
- content = load_content_from_bytes(attachment_content, ext, llm=self.llm) if ext not in '.pdf' \
1310
- else parse_file_content(file_content=attachment_content, file_name=attachment['filename'], llm=self.llm, is_capture_image=True)
1311
- if not content:
1339
+ content_docs = load_file_docs(file_content=attachment_content, file_name=attachment['filename'], llm=self.llm, is_capture_image=True, excel_by_sheets=True)
1340
+ if not content_docs or isinstance(content_docs, ToolException):
1312
1341
  continue
1313
- yield Document(page_content=content,
1342
+ for doc in content_docs:
1343
+ yield Document(page_content=doc.page_content,
1344
+ metadata={
1345
+ **doc.metadata,
1346
+ 'id': attachment_id,
1347
+ 'issue_key': issue_key,
1348
+ 'source': f"{self.base_url}/browse/{issue_key}",
1349
+ 'filename': attachment['filename'],
1350
+ 'created': attachment['created'],
1351
+ 'mimeType': attachment['mimeType'],
1352
+ 'author': attachment.get('author', {}).get('name'),
1353
+ IndexerKeywords.PARENT.value: base_document.metadata.get('id', None),
1354
+ 'type': 'attachment',
1355
+ })
1356
+ if self._include_comments:
1357
+ comments = self.get_processed_comments_list_with_image_description(issue_key)
1358
+ if comments:
1359
+ for comment in comments:
1360
+ yield Document(page_content=comment.get('processed_content'),
1314
1361
  metadata={
1315
- 'id': attachment_id,
1362
+ 'id': comment.get('id'),
1316
1363
  'issue_key': issue_key,
1317
1364
  'source': f"{self.base_url}/browse/{issue_key}",
1318
- 'filename': attachment['filename'],
1319
- 'created': attachment['created'],
1320
- 'mimeType': attachment['mimeType'],
1321
- 'author': attachment.get('author', {}).get('name'),
1365
+ 'created': comment.get('created'),
1366
+ 'author': comment.get('author'),
1322
1367
  IndexerKeywords.PARENT.value: base_document.metadata.get('id', None),
1323
- 'type': 'attachment',
1368
+ 'type': 'comment',
1324
1369
  })
1325
1370
 
1326
1371
  def _jql_get_tickets(self, jql, fields="*all", start=0, limit=None, expand=None, validate_query=None):
@@ -1370,21 +1415,16 @@ class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
1370
1415
  """
1371
1416
  try:
1372
1417
  # Build content starting with summary
1373
- content = f"{issue['fields']['summary']}\n"
1418
+ content = f"# Summary\n{issue['fields']['summary']}\n\n"
1374
1419
 
1375
1420
  # Add description if present
1376
1421
  description = issue['fields'].get('description', '')
1377
1422
  if description:
1378
- content += f"{description}\n"
1423
+ content += f"# Description\n{description}\n\n"
1379
1424
  else:
1380
1425
  # If no description, still create document but with minimal content
1381
1426
  logger.debug(f"Issue {issue.get('key', 'unknown')} has no description")
1382
1427
 
1383
- # Add comments if present
1384
- if 'comment' in issue['fields'] and issue['fields']['comment'].get('comments'):
1385
- for comment in issue['fields']['comment']['comments']:
1386
- content += f"{comment['body']}\n"
1387
-
1388
1428
  # Add additional fields to index
1389
1429
  if fields_to_index:
1390
1430
  for field in fields_to_index:
@@ -1395,7 +1435,7 @@ class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
1395
1435
  field_value = str(field_value)
1396
1436
  elif isinstance(field_value, list):
1397
1437
  field_value = ', '.join(str(item) for item in field_value)
1398
- content += f"{field_value}\n"
1438
+ content += f"# {field}\n{field_value}\n\n"
1399
1439
 
1400
1440
  # Create metadata
1401
1441
  metadata = {
@@ -1433,6 +1473,7 @@ class JiraApiWrapper(BaseVectorStoreToolApiWrapper):
1433
1473
  'skip_attachment_extensions': (Optional[List[str]], Field(
1434
1474
  description="List of file extensions to skip when processing attachments: i.e. ['.png', '.jpg']",
1435
1475
  default=[])),
1476
+ 'chunking_tool': (Literal['markdown'], Field(description="Name of chunking tool for base document", default='markdown')),
1436
1477
  }
1437
1478
 
1438
1479
  # def index_data(self,
@@ -33,7 +33,7 @@ class QtestToolkit(BaseToolkit):
33
33
  QtestToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
34
34
  m = create_model(
35
35
  name,
36
- qtest_configuration=(Optional[QtestConfiguration], Field(description="QTest API token", json_schema_extra={
36
+ qtest_configuration=(QtestConfiguration, Field(description="QTest API token", json_schema_extra={
37
37
  'configuration_types': ['qtest']})),
38
38
  qtest_project_id=(int, Field(default=None, description="QTest project id", json_schema_extra={'toolkit_name': True,
39
39
  'max_toolkit_length': QtestToolkit.toolkit_max_length})),
@@ -30,7 +30,7 @@ class RallyToolkit(BaseToolkit):
30
30
  name,
31
31
  name=(str, Field(description="Toolkit name", json_schema_extra={'toolkit_name': True,
32
32
  'max_toolkit_length': RallyToolkit.toolkit_max_length})),
33
- rally_configuration=(Optional[RallyConfiguration], Field(description="Rally configuration", json_schema_extra={'configuration_types': ['rally']})),
33
+ rally_configuration=(RallyConfiguration, Field(description="Rally configuration", json_schema_extra={'configuration_types': ['rally']})),
34
34
  workspace=(Optional[str], Field(default=None, description="Rally workspace")),
35
35
  project=(Optional[str], Field(default=None, description="Rally project")),
36
36
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),