alita-sdk 0.3.208__py3-none-any.whl → 0.3.210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. alita_sdk/runtime/clients/artifact.py +18 -4
  2. alita_sdk/runtime/langchain/document_loaders/AlitaCSVLoader.py +2 -1
  3. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +3 -3
  4. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +8 -4
  5. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
  6. alita_sdk/runtime/langchain/langraph_agent.py +9 -6
  7. alita_sdk/runtime/toolkits/artifact.py +7 -3
  8. alita_sdk/runtime/toolkits/tools.py +8 -1
  9. alita_sdk/runtime/tools/application.py +2 -0
  10. alita_sdk/runtime/tools/artifact.py +65 -8
  11. alita_sdk/runtime/tools/vectorstore.py +125 -42
  12. alita_sdk/runtime/utils/utils.py +3 -0
  13. alita_sdk/tools/ado/__init__.py +8 -0
  14. alita_sdk/tools/ado/repos/repos_wrapper.py +37 -0
  15. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +0 -7
  16. alita_sdk/tools/ado/work_item/__init__.py +4 -0
  17. alita_sdk/tools/ado/work_item/ado_wrapper.py +37 -4
  18. alita_sdk/tools/aws/delta_lake/__init__.py +1 -1
  19. alita_sdk/tools/bitbucket/__init__.py +13 -1
  20. alita_sdk/tools/bitbucket/api_wrapper.py +31 -4
  21. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +31 -0
  22. alita_sdk/tools/chunkers/code/codeparser.py +18 -10
  23. alita_sdk/tools/confluence/api_wrapper.py +35 -134
  24. alita_sdk/tools/confluence/loader.py +30 -28
  25. alita_sdk/tools/elitea_base.py +112 -11
  26. alita_sdk/tools/figma/__init__.py +13 -1
  27. alita_sdk/tools/figma/api_wrapper.py +47 -3
  28. alita_sdk/tools/github/api_wrapper.py +8 -0
  29. alita_sdk/tools/github/github_client.py +18 -0
  30. alita_sdk/tools/gitlab/__init__.py +4 -0
  31. alita_sdk/tools/gitlab/api_wrapper.py +10 -0
  32. alita_sdk/tools/google/bigquery/__init__.py +1 -1
  33. alita_sdk/tools/jira/__init__.py +21 -13
  34. alita_sdk/tools/jira/api_wrapper.py +285 -5
  35. alita_sdk/tools/sharepoint/__init__.py +11 -1
  36. alita_sdk/tools/sharepoint/api_wrapper.py +23 -53
  37. alita_sdk/tools/testrail/__init__.py +4 -0
  38. alita_sdk/tools/testrail/api_wrapper.py +28 -56
  39. alita_sdk/tools/utils/content_parser.py +123 -9
  40. alita_sdk/tools/xray/__init__.py +8 -1
  41. alita_sdk/tools/xray/api_wrapper.py +505 -14
  42. alita_sdk/tools/zephyr_scale/api_wrapper.py +5 -5
  43. {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/METADATA +1 -1
  44. {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/RECORD +47 -47
  45. {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/WHEEL +0 -0
  46. {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/licenses/LICENSE +0 -0
  47. {alita_sdk-0.3.208.dist-info → alita_sdk-0.3.210.dist-info}/top_level.txt +0 -0
@@ -4,13 +4,15 @@ from typing import Dict, List, Optional, Union, Any, Generator
4
4
 
5
5
  import pandas as pd
6
6
  from langchain_core.tools import ToolException
7
+ from openai import BadRequestError
7
8
  from pydantic import SecretStr, create_model, model_validator
8
9
  from pydantic.fields import Field, PrivateAttr
9
10
  from testrail_api import StatusCodeError, TestRailAPI
10
- from ..elitea_base import BaseVectorStoreToolApiWrapper, BaseIndexParams
11
+ from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
11
12
  from langchain_core.documents import Document
12
13
 
13
14
  from ...runtime.utils.utils import IndexerKeywords
15
+ from ..utils.content_parser import parse_file_content
14
16
 
15
17
  try:
16
18
  from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
@@ -288,20 +290,6 @@ updateCase = create_model(
288
290
  ),
289
291
  )
290
292
 
291
- # Schema for indexing TestRail data into vector store
292
- indexData = create_model(
293
- "indexData",
294
- __base__=BaseIndexParams,
295
- project_id=(str, Field(description="TestRail project ID to index data from")),
296
- suite_id=(Optional[str], Field(default=None, description="Optional TestRail suite ID to filter test cases")),
297
- section_id=(Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
298
- title_keyword=(Optional[str], Field(default=None, description="Optional keyword to filter test cases by title")),
299
- progress_step=(Optional[int],
300
- Field(default=None, ge=0, le=100, description="Optional step size for progress reporting during indexing")),
301
- clean_index=(Optional[bool],
302
- Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
303
- )
304
-
305
293
  SUPPORTED_KEYS = {
306
294
  "id", "title", "section_id", "template_id", "type_id", "priority_id", "milestone_id",
307
295
  "refs", "created_by", "created_on", "updated_by", "updated_on", "estimate",
@@ -316,14 +304,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
316
304
  password: Optional[SecretStr] = None,
317
305
  email: Optional[str] = None,
318
306
  _client: Optional[TestRailAPI] = PrivateAttr() # Private attribute for the TestRail client
319
- llm: Any = None
320
-
321
- connection_string: Optional[SecretStr] = None
322
- collection_name: Optional[str] = None
323
- embedding_model: Optional[str] = "HuggingFaceEmbeddings"
324
- embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
325
- vectorstore_type: Optional[str] = "PGVector"
326
-
327
307
 
328
308
  @model_validator(mode="before")
329
309
  @classmethod
@@ -489,7 +469,8 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
489
469
  project_id=project_id, **params
490
470
  )
491
471
 
492
- cases = extracted_cases.get("cases")
472
+ # support old versions of testrail_api
473
+ cases = extracted_cases.get("cases") if isinstance(extracted_cases, dict) else extracted_cases
493
474
 
494
475
  if cases is None:
495
476
  return ToolException("No test cases found in the extracted data.")
@@ -553,7 +534,8 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
553
534
  def _base_loader(self, project_id: str,
554
535
  suite_id: Optional[str] = None,
555
536
  section_id: Optional[int] = None,
556
- title_keyword: Optional[str] = None
537
+ title_keyword: Optional[str] = None,
538
+ **kwargs: Any
557
539
  ) -> Generator[Document, None, None]:
558
540
  try:
559
541
  if suite_id:
@@ -576,7 +558,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
576
558
  'title': case.get('title', ''),
577
559
  'suite_id': suite_id or case.get('suite_id', ''),
578
560
  'id': str(case.get('id', '')),
579
- 'updated_on': case.get('updated_on') or -1,
561
+ IndexerKeywords.UPDATED_ON.value: case.get('updated_on') or -1,
580
562
  'labels': [lbl['title'] for lbl in case.get('labels', [])],
581
563
  'type': case.get('type_id') or -1,
582
564
  'priority': case.get('priority_id') or -1,
@@ -587,22 +569,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
587
569
  'entity_type': 'test_case',
588
570
  })
589
571
 
590
- def index_data(
591
- self,
592
- project_id: str,
593
- suite_id: Optional[str] = None,
594
- collection_suffix: str = "",
595
- section_id: Optional[int] = None,
596
- title_keyword: Optional[str] = None,
597
- progress_step: Optional[int] = None,
598
- clean_index: Optional[bool] = False
599
- ):
600
- """Load TestRail test cases into the vector store."""
601
- docs = self._base_loader(project_id, suite_id, section_id, title_keyword)
602
- embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
603
- vs = self._init_vector_store(collection_suffix, embeddings=embedding)
604
- return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
605
-
606
572
  def _process_document(self, document: Document) -> Generator[Document, None, None]:
607
573
  """
608
574
  Process an existing base document to extract relevant metadata for full document preparation.
@@ -625,16 +591,15 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
625
591
 
626
592
  # process each attachment to extract its content
627
593
  for attachment in attachments:
628
- attachment_id = attachment['id']
594
+ attachment_id = f"attach_{attachment['id']}"
629
595
  # add attachment id to metadata of parent
630
596
  document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
631
-
632
597
  # TODO: pass it to chunkers
633
598
  yield Document(page_content=self._process_attachment(attachment),
634
599
  metadata={
635
600
  'project_id': base_data.get('project_id', ''),
636
- IndexerKeywords.PARENT.value: case_id,
637
- 'id': attachment_id,
601
+ 'id': str(attachment_id),
602
+ IndexerKeywords.PARENT.value: str(case_id),
638
603
  'filename': attachment['filename'],
639
604
  'filetype': attachment['filetype'],
640
605
  'created_on': attachment['created_on'],
@@ -658,10 +623,24 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
658
623
  page_content = "This filetype is not supported."
659
624
  if attachment['filetype'] == 'txt' :
660
625
  page_content = self._client.get(endpoint=f"get_attachment/{attachment['id']}")
661
- # TODO: add support for other file types
662
- # use utility to handle different types (tools/utils)
626
+ else:
627
+ try:
628
+ attachment_path = self._client.attachments.get_attachment(attachment_id=attachment['id'], path=f"./{attachment['filename']}")
629
+ page_content = parse_file_content(file_name=attachment['filename'], file_content=attachment_path.read_bytes(), llm=self.llm, is_capture_image=True)
630
+ except BadRequestError as ai_e:
631
+ logger.error(f"Unable to parse page's content with type: {attachment['filetype']} due to AI service issues: {ai_e}")
632
+ except Exception as e:
633
+ logger.error(f"Unable to parse page's content with type: {attachment['filetype']}: {e}")
663
634
  return page_content
664
635
 
636
+ def _index_tool_params(self):
637
+ return {
638
+ 'project_id': (str, Field(description="TestRail project ID to index data from")),
639
+ 'suite_id': (Optional[str],
640
+ Field(default=None, description="Optional TestRail suite ID to filter test cases")),
641
+ 'section_id': (Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
642
+ }
643
+
665
644
  def _to_markup(self, data: List[Dict], output_format: str) -> str:
666
645
  """
667
646
  Converts the given data into the specified format: 'json', 'csv', or 'markdown'.
@@ -689,6 +668,7 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
689
668
  if output_format == "markdown":
690
669
  return df.to_markdown(index=False)
691
670
 
671
+ @extend_with_vector_tools
692
672
  def get_available_tools(self):
693
673
  tools = [
694
674
  {
@@ -726,14 +706,6 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
726
706
  "ref": self.update_case,
727
707
  "description": self.update_case.__doc__,
728
708
  "args_schema": updateCase,
729
- },
730
- {
731
- "name": "index_data",
732
- "ref": self.index_data,
733
- "description": self.index_data.__doc__,
734
- "args_schema": indexData,
735
709
  }
736
710
  ]
737
- # Add vector search from base
738
- tools.extend(self._get_vector_search_tools())
739
711
  return tools
@@ -11,9 +11,12 @@ import pymupdf
11
11
  from langchain_core.tools import ToolException
12
12
  from transformers import BlipProcessor, BlipForConditionalGeneration
13
13
  from langchain_core.messages import HumanMessage
14
+ from logging import getLogger
14
15
 
15
16
  from ...runtime.langchain.tools.utils import bytes_to_base64
16
17
 
18
+ logger = getLogger(__name__)
19
+
17
20
  image_processing_prompt='''
18
21
  You are an AI model designed for analyzing images. Your task is to accurately describe the content of the given image. Depending on the type of image, follow these specific instructions:
19
22
 
@@ -56,13 +59,39 @@ Be as precise and thorough as possible in your responses. If something is unclea
56
59
 
57
60
  IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp', 'svg']
58
61
 
59
- def parse_file_content(file_name, file_content, is_capture_image: bool = False, page_number: int = None, sheet_name: str = None, llm=None):
62
+
63
+ def parse_file_content(file_name=None, file_content=None, is_capture_image: bool = False, page_number: int = None,
64
+ sheet_name: str = None, llm=None, file_path: str = None, excel_by_sheets: bool = False):
65
+ """Parse the content of a file based on its type and return the parsed content.
66
+
67
+ Args:
68
+ file_name (str): The name of the file to parse.
69
+ file_content (bytes): The content of the file as bytes.
70
+ is_capture_image (bool): Whether to capture images from the file.
71
+ page_number (int, optional): The specific page number to parse for PDF or PPTX files.
72
+ sheet_name (str, optional): The specific sheet name to parse for Excel files.
73
+ llm: The language model to use for image processing.
74
+ file_path (str, optional): The path to the file if it needs to be read from disk.
75
+ Returns:
76
+ str: The parsed content of the file.
77
+ Raises:
78
+ ToolException: If the file type is not supported or if there is an error reading the file.
79
+ """
80
+
81
+ if (file_path and (file_name or file_content)) or (not file_path and (not file_name or file_content is None)):
82
+ raise ToolException("Either (file_name and file_content) or file_path must be provided, but not both.")
83
+
84
+ if file_path:
85
+ file_content = file_to_bytes(file_path)
86
+ if file_content is None:
87
+ return ToolException(f"File not found or could not be read: {file_path}")
88
+ file_name = file_path.split('/')[-1] # Extract file name from path
60
89
  if file_name.endswith('.txt'):
61
90
  return parse_txt(file_content)
62
91
  elif file_name.endswith('.docx'):
63
92
  return read_docx_from_bytes(file_content)
64
93
  elif file_name.endswith('.xlsx') or file_name.endswith('.xls'):
65
- return parse_excel(file_content, sheet_name)
94
+ return parse_excel(file_content, sheet_name, excel_by_sheets)
66
95
  elif file_name.endswith('.pdf'):
67
96
  return parse_pdf(file_content, page_number, is_capture_image, llm)
68
97
  elif file_name.endswith('.pptx'):
@@ -80,17 +109,26 @@ def parse_txt(file_content):
80
109
  except Exception as e:
81
110
  return ToolException(f"Error decoding file content: {e}")
82
111
 
83
- def parse_excel(file_content, sheet_name = None):
112
+ def parse_excel(file_content, sheet_name = None, return_by_sheets: bool = False):
84
113
  try:
85
114
  excel_file = io.BytesIO(file_content)
86
115
  if sheet_name:
87
116
  return parse_sheet(excel_file, sheet_name)
88
117
  dfs = pd.read_excel(excel_file, sheet_name=sheet_name)
89
- result = []
90
- for sheet_name, df in dfs.items():
91
- df.fillna('', inplace=True)
92
- result.append(f"=== Sheet: {sheet_name} ===\n{df.to_string(index=False)}")
93
- return "\n\n".join(result)
118
+
119
+ if return_by_sheets:
120
+ result = {}
121
+ for sheet_name, df in dfs.items():
122
+ df.fillna('', inplace=True)
123
+ result[sheet_name] = df.to_dict(orient='records')
124
+ return result
125
+ else:
126
+ result = []
127
+ for sheet_name, df in dfs.items():
128
+ df.fillna('', inplace=True)
129
+ string_content = df.to_string(index=False)
130
+ result.append(f"====== Sheet name: {sheet_name} ======\n{string_content}")
131
+ return "\n\n".join(result)
94
132
  except Exception as e:
95
133
  return ToolException(f"Error reading Excel file: {e}")
96
134
 
@@ -165,6 +203,8 @@ def describe_image(image):
165
203
  return "\n[Picture: " + processor.decode(out[0], skip_special_tokens=True) + "]\n"
166
204
 
167
205
  def __perform_llm_prediction_for_image(llm, image: bytes, image_format='png', prompt=image_processing_prompt) -> str:
206
+ if not llm:
207
+ raise ToolException("LLM is not provided for image processing.")
168
208
  base64_string = bytes_to_base64(image)
169
209
  result = llm.invoke([
170
210
  HumanMessage(
@@ -176,4 +216,78 @@ def __perform_llm_prediction_for_image(llm, image: bytes, image_format='png', pr
176
216
  },
177
217
  ])
178
218
  ])
179
- return f"\n[Image description: {result.content}]\n"
219
+ return f"\n[Image description: {result.content}]\n"
220
+
221
+ # TODO: review usage of this function alongside with functions above
222
+ def load_content(file_path: str, extension: str = None, loader_extra_config: dict = None, llm = None) -> str:
223
+ """
224
+ Loads the content of a file based on its extension using a configured loader.
225
+ """
226
+ try:
227
+ from ...runtime.langchain.document_loaders.constants import loaders_map
228
+
229
+ if not extension:
230
+ extension = file_path.split('.')[-1].lower()
231
+
232
+ loader_config = loaders_map.get(extension)
233
+ if not loader_config:
234
+ logger.warning(f"No loader found for file extension: {extension}. File: {file_path}")
235
+ return ""
236
+
237
+ loader_cls = loader_config['class']
238
+ loader_kwargs = loader_config['kwargs']
239
+
240
+ if loader_extra_config:
241
+ loader_kwargs.update(loader_extra_config)
242
+ if loader_config['is_multimodal_processing'] and llm:
243
+ loader_kwargs.update({'llm': llm})
244
+
245
+ loader = loader_cls(file_path, **loader_kwargs)
246
+ documents = loader.load()
247
+
248
+ page_contents = [doc.page_content for doc in documents]
249
+ return "\n".join(page_contents)
250
+ except Exception as e:
251
+ error_message = f"Error loading attachment: {str(e)}"
252
+ logger.warning(f"{error_message} for file {file_path}")
253
+ return ""
254
+
255
+ def load_content_from_bytes(file_content: bytes, extension: str = None, loader_extra_config: dict = None, llm = None) -> str:
256
+ """Loads the content of a file from bytes based on its extension using a configured loader."""
257
+
258
+ import tempfile
259
+
260
+ # Automatic cleanup with context manager
261
+ with tempfile.NamedTemporaryFile(mode='w+b', delete=True) as temp_file:
262
+ # Write data to temp file
263
+ temp_file.write(file_content)
264
+ temp_file.flush() # Ensure data is written
265
+
266
+ # Get the file path for operations
267
+ temp_path = temp_file.name
268
+
269
+ # Perform your operations
270
+ return load_content(temp_path, extension, loader_extra_config, llm)
271
+
272
+
273
+
274
+ def file_to_bytes(filepath):
275
+ """
276
+ Reads a file and returns its content as a bytes object.
277
+
278
+ Args:
279
+ filepath (str): The path to the file.
280
+
281
+ Returns:
282
+ bytes: The content of the file as a bytes object.
283
+ """
284
+ try:
285
+ with open(filepath, "rb") as f:
286
+ file_content_bytes = f.read()
287
+ return file_content_bytes
288
+ except FileNotFoundError:
289
+ logger.error(f"File not found: {filepath}")
290
+ return None
291
+ except Exception as e:
292
+ logger.error(f"Error reading file {filepath}: {e}")
293
+ return None
@@ -20,7 +20,14 @@ def get_tools(tool):
20
20
  client_secret=tool['settings'].get('client_secret', None),
21
21
  limit=tool['settings'].get('limit', 20),
22
22
  verify_ssl=tool['settings'].get('verify_ssl', True),
23
- toolkit_name=tool.get('toolkit_name')
23
+ toolkit_name=tool.get('toolkit_name'),
24
+
25
+ # indexer settings
26
+ connection_string=tool['settings'].get('connection_string', None),
27
+ collection_name=f"{tool.get('toolkit_name')}_{str(tool['id'])}",
28
+ embedding_model="HuggingFaceEmbeddings",
29
+ embedding_model_params={"model_name": "sentence-transformers/all-MiniLM-L6-v2"},
30
+ vectorstore_type="PGVector"
24
31
  ).get_tools()
25
32
 
26
33