alita-sdk 0.3.204__py3-none-any.whl → 0.3.206__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. alita_sdk/runtime/clients/client.py +45 -5
  2. alita_sdk/runtime/langchain/assistant.py +22 -21
  3. alita_sdk/runtime/langchain/interfaces/llm_processor.py +1 -4
  4. alita_sdk/runtime/toolkits/application.py +5 -10
  5. alita_sdk/runtime/toolkits/tools.py +0 -1
  6. alita_sdk/runtime/tools/vectorstore.py +157 -13
  7. alita_sdk/runtime/utils/streamlit.py +33 -30
  8. alita_sdk/runtime/utils/utils.py +5 -0
  9. alita_sdk/tools/__init__.py +4 -0
  10. alita_sdk/tools/ado/repos/repos_wrapper.py +20 -13
  11. alita_sdk/tools/aws/__init__.py +7 -0
  12. alita_sdk/tools/aws/delta_lake/__init__.py +136 -0
  13. alita_sdk/tools/aws/delta_lake/api_wrapper.py +220 -0
  14. alita_sdk/tools/aws/delta_lake/schemas.py +20 -0
  15. alita_sdk/tools/aws/delta_lake/tool.py +35 -0
  16. alita_sdk/tools/bitbucket/api_wrapper.py +5 -5
  17. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +54 -29
  18. alita_sdk/tools/elitea_base.py +55 -5
  19. alita_sdk/tools/gitlab/__init__.py +22 -10
  20. alita_sdk/tools/gitlab/api_wrapper.py +278 -253
  21. alita_sdk/tools/gitlab/tools.py +354 -376
  22. alita_sdk/tools/google/__init__.py +7 -0
  23. alita_sdk/tools/google/bigquery/__init__.py +154 -0
  24. alita_sdk/tools/google/bigquery/api_wrapper.py +502 -0
  25. alita_sdk/tools/google/bigquery/schemas.py +102 -0
  26. alita_sdk/tools/google/bigquery/tool.py +34 -0
  27. alita_sdk/tools/llm/llm_utils.py +0 -6
  28. alita_sdk/tools/openapi/__init__.py +14 -3
  29. alita_sdk/tools/sharepoint/__init__.py +2 -1
  30. alita_sdk/tools/sharepoint/api_wrapper.py +71 -7
  31. alita_sdk/tools/testrail/__init__.py +9 -1
  32. alita_sdk/tools/testrail/api_wrapper.py +154 -5
  33. alita_sdk/tools/utils/content_parser.py +77 -13
  34. alita_sdk/tools/zephyr_scale/api_wrapper.py +271 -22
  35. {alita_sdk-0.3.204.dist-info → alita_sdk-0.3.206.dist-info}/METADATA +3 -1
  36. {alita_sdk-0.3.204.dist-info → alita_sdk-0.3.206.dist-info}/RECORD +39 -30
  37. alita_sdk/runtime/llms/alita.py +0 -259
  38. {alita_sdk-0.3.204.dist-info → alita_sdk-0.3.206.dist-info}/WHEEL +0 -0
  39. {alita_sdk-0.3.204.dist-info → alita_sdk-0.3.206.dist-info}/licenses/LICENSE +0 -0
  40. {alita_sdk-0.3.204.dist-info → alita_sdk-0.3.206.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,102 @@
1
+ from enum import Enum
2
+ from typing import Any, Dict, List, Optional, Union
3
+
4
+ from pydantic import Field, create_model
5
+
6
+
7
+ class ArgsSchema(Enum):
8
+ NoInput = create_model("NoInput")
9
+ GetDocuments = create_model(
10
+ "GetDocuments",
11
+ ids=(
12
+ Optional[List[str]],
13
+ Field(default=None, description="List of document IDs to retrieve."),
14
+ ),
15
+ filter=(
16
+ Optional[Union[Dict[str, Any], str]],
17
+ Field(default=None, description="Filter as dict or SQL WHERE clause."),
18
+ ),
19
+ )
20
+ SimilaritySearch = create_model(
21
+ "SimilaritySearch",
22
+ query=(str, Field(description="Text query to search for similar documents.")),
23
+ k=(int, Field(default=5, description="Number of top results to return.")),
24
+ filter=(
25
+ Optional[Union[Dict[str, Any], str]],
26
+ Field(default=None, description="Filter as dict or SQL WHERE clause."),
27
+ ),
28
+ )
29
+ BatchSearch = create_model(
30
+ "BatchSearch",
31
+ queries=(
32
+ Optional[List[str]],
33
+ Field(default=None, description="List of text queries."),
34
+ ),
35
+ embeddings=(
36
+ Optional[List[List[float]]],
37
+ Field(default=None, description="List of embedding vectors."),
38
+ ),
39
+ k=(int, Field(default=5, description="Number of top results to return.")),
40
+ filter=(
41
+ Optional[Union[Dict[str, Any], str]],
42
+ Field(default=None, description="Filter as dict or SQL WHERE clause."),
43
+ ),
44
+ )
45
+ JobStatsArgs = create_model(
46
+ "JobStatsArgs", job_id=(str, Field(description="BigQuery job ID."))
47
+ )
48
+ SimilaritySearchByVectorArgs = create_model(
49
+ "SimilaritySearchByVectorArgs",
50
+ embedding=(List[float], Field(description="Embedding vector.")),
51
+ k=(int, Field(default=5, description="Number of top results to return.")),
52
+ )
53
+ SimilaritySearchByVectorWithScoreArgs = create_model(
54
+ "SimilaritySearchByVectorWithScoreArgs",
55
+ embedding=(List[float], Field(description="Embedding vector.")),
56
+ filter=(
57
+ Optional[Union[Dict[str, Any], str]],
58
+ Field(default=None, description="Filter as dict or SQL WHERE clause."),
59
+ ),
60
+ k=(int, Field(default=5, description="Number of top results to return.")),
61
+ )
62
+ SimilaritySearchWithScoreArgs = create_model(
63
+ "SimilaritySearchWithScoreArgs",
64
+ query=(str, Field(description="Text query.")),
65
+ filter=(
66
+ Optional[Union[Dict[str, Any], str]],
67
+ Field(default=None, description="Filter as dict or SQL WHERE clause."),
68
+ ),
69
+ k=(int, Field(default=5, description="Number of top results to return.")),
70
+ )
71
+ SimilaritySearchByVectorsArgs = create_model(
72
+ "SimilaritySearchByVectorsArgs",
73
+ embeddings=(List[List[float]], Field(description="List of embedding vectors.")),
74
+ filter=(
75
+ Optional[Union[Dict[str, Any], str]],
76
+ Field(default=None, description="Filter as dict or SQL WHERE clause."),
77
+ ),
78
+ k=(int, Field(default=5, description="Number of top results to return.")),
79
+ with_scores=(bool, Field(default=False)),
80
+ with_embeddings=(bool, Field(default=False)),
81
+ )
82
+ ExecuteArgs = create_model(
83
+ "ExecuteArgs",
84
+ method=(str, Field(description="Name of the BigQuery client method to call.")),
85
+ args=(
86
+ Optional[List[Any]],
87
+ Field(default=None, description="Positional arguments for the method."),
88
+ ),
89
+ kwargs=(
90
+ Optional[Dict[str, Any]],
91
+ Field(default=None, description="Keyword arguments for the method."),
92
+ ),
93
+ )
94
+ CreateDeltaLakeTable = create_model(
95
+ "CreateDeltaLakeTable",
96
+ table_name=(str, Field(description="Name of the Delta Lake table to create in BigQuery.")),
97
+ dataset=(Optional[str], Field(default=None, description="BigQuery dataset to contain the table (defaults to self.dataset).")),
98
+ connection_id=(str, Field(description="Fully qualified connection ID (project.region.connection_id).")),
99
+ source_uris=(list, Field(description="List of GCS URIs (prefixes) for the Delta Lake table.")),
100
+ autodetect=(bool, Field(default=True, description="Whether to autodetect schema (default: True).")),
101
+ project=(Optional[str], Field(default=None, description="GCP project ID (defaults to self.project).")),
102
+ )
@@ -0,0 +1,34 @@
1
+ from typing import Optional, Type
2
+
3
+ from langchain_core.callbacks import CallbackManagerForToolRun
4
+ from pydantic import BaseModel, field_validator, Field
5
+ from langchain_core.tools import BaseTool
6
+ from traceback import format_exc
7
+ from .api_wrapper import BigQueryApiWrapper
8
+
9
+
10
+ class BigQueryAction(BaseTool):
11
+ """Tool for interacting with the BigQuery API."""
12
+
13
+ api_wrapper: BigQueryApiWrapper = Field(default_factory=BigQueryApiWrapper)
14
+ name: str
15
+ mode: str = ""
16
+ description: str = ""
17
+ args_schema: Optional[Type[BaseModel]] = None
18
+
19
+ @field_validator('name', mode='before')
20
+ @classmethod
21
+ def remove_spaces(cls, v):
22
+ return v.replace(' ', '')
23
+
24
+ def _run(
25
+ self,
26
+ *args,
27
+ run_manager: Optional[CallbackManagerForToolRun] = None,
28
+ **kwargs,
29
+ ) -> str:
30
+ """Use the GitHub API to run an operation."""
31
+ try:
32
+ return self.api_wrapper.run(self.mode, *args, **kwargs)
33
+ except Exception as e:
34
+ return f"Error: {format_exc()}"
@@ -10,12 +10,6 @@ def get_model(model_type: str, model_params: dict):
10
10
  return None
11
11
  if model_type in llms:
12
12
  return get_llm(model_type)(**model_params)
13
- elif model_type == "Alita":
14
- try:
15
- from alita_sdk.llms.alita import AlitaChatModel
16
- except ImportError:
17
- raise RuntimeError("Alita model not found")
18
- return AlitaChatModel(**model_params)
19
13
  elif model_type in chat_models:
20
14
  model = getattr(__import__("langchain_community.chat_models", fromlist=[model_type]), model_type)
21
15
  return model(**model_params)
@@ -1,12 +1,15 @@
1
1
  import json
2
2
  import re
3
+ import logging
3
4
  from typing import List, Any, Optional, Dict
4
- from langchain_core.tools import BaseTool, BaseToolkit
5
+ from langchain_core.tools import BaseTool, BaseToolkit, ToolException
5
6
  from requests_openapi import Operation, Client, Server
6
7
 
7
8
  from pydantic import create_model, Field
8
9
  from functools import partial
9
10
 
11
+ logger = logging.getLogger(__name__)
12
+
10
13
  name = "openapi"
11
14
 
12
15
  def get_tools(tool):
@@ -105,11 +108,19 @@ class AlitaOpenAPIToolkit(BaseToolkit):
105
108
  c.requestor.headers.update(headers)
106
109
  tools = []
107
110
  for i in tools_set:
111
+
108
112
  try:
113
+ if not i:
114
+ raise ToolException("Operation id is missing for some of declared operations.")
109
115
  tool = c.operations[i]
116
+ if not isinstance(tool, Operation):
117
+ raise ToolException(f"Operation {i} is not an instance of Operation class.")
110
118
  tools.append(create_api_tool(i, tool))
111
- except KeyError:
112
- ...
119
+ except ToolException:
120
+ raise
121
+ except Exception as e:
122
+ logger.warning(f"Tool {i} not found in OpenAPI spec.")
123
+ raise ToolException(f"Cannot create API tool ({i}): \n{e}.")
113
124
  return cls(request_session=c, tools=tools)
114
125
 
115
126
  def get_tools(self):
@@ -14,7 +14,8 @@ def get_tools(tool):
14
14
  site_url=tool['settings'].get('site_url', None),
15
15
  client_id=tool['settings'].get('client_id', None),
16
16
  client_secret=tool['settings'].get('client_secret', None),
17
- toolkit_name=tool.get('toolkit_name'))
17
+ toolkit_name=tool.get('toolkit_name'),
18
+ llm=tool['settings'].get('llm'))
18
19
  .get_tools())
19
20
 
20
21
 
@@ -1,5 +1,6 @@
1
+ import json
1
2
  import logging
2
- from typing import Optional
3
+ from typing import Optional, List, Dict, Any
3
4
 
4
5
  from ..utils.content_parser import parse_file_content
5
6
  from langchain_core.tools import ToolException
@@ -7,7 +8,9 @@ from office365.runtime.auth.client_credential import ClientCredential
7
8
  from office365.sharepoint.client_context import ClientContext
8
9
  from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
9
10
 
10
- from ..elitea_base import BaseToolApiWrapper
11
+ from ..elitea_base import BaseToolApiWrapper, BaseIndexParams, BaseVectorStoreToolApiWrapper
12
+ from ...runtime.langchain.interfaces.llm_processor import get_embeddings
13
+ from langchain_core.documents import Document
11
14
 
12
15
  NoInput = create_model(
13
16
  "NoInput"
@@ -29,17 +32,36 @@ ReadDocument = create_model(
29
32
  "ReadDocument",
30
33
  path=(str, Field(description="Contains the server-relative path of a document for reading.")),
31
34
  is_capture_image=(Optional[bool], Field(description="Determines is pictures in the document should be recognized.", default=False)),
32
- page_number=(Optional[int], Field(description="Specifies which page to read. If it is None, then full document will be read.", default=None))
35
+ page_number=(Optional[int], Field(description="Specifies which page to read. If it is None, then full document will be read.", default=None)),
36
+ sheet_name=(Optional[str], Field(
37
+ description="Specifies which sheet to read. If it is None, then full document will be read.",
38
+ default=None))
39
+ )
40
+
41
+ indexData = create_model(
42
+ "indexData",
43
+ __base__=BaseIndexParams,
44
+ progress_step=(Optional[int], Field(default=None, ge=0, le=100,
45
+ description="Optional step size for progress reporting during indexing")),
46
+ clean_index=(Optional[bool], Field(default=False,
47
+ description="Optional flag to enforce clean existing index before indexing new data")),
33
48
  )
34
49
 
35
50
 
36
- class SharepointApiWrapper(BaseToolApiWrapper):
51
+ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
37
52
  site_url: str
38
53
  client_id: str = None
39
54
  client_secret: SecretStr = None
40
55
  token: SecretStr = None
41
56
  _client: Optional[ClientContext] = PrivateAttr() # Private attribute for the office365 client
42
57
 
58
+ llm: Any = None
59
+ connection_string: Optional[SecretStr] = None
60
+ collection_name: Optional[str] = None
61
+ embedding_model: Optional[str] = "HuggingFaceEmbeddings"
62
+ embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
63
+ vectorstore_type: Optional[str] = "PGVector"
64
+
43
65
  @model_validator(mode='before')
44
66
  @classmethod
45
67
  def validate_toolkit(cls, values):
@@ -111,7 +133,8 @@ class SharepointApiWrapper(BaseToolApiWrapper):
111
133
  'Path': file.properties['ServerRelativeUrl'],
112
134
  'Created': file.properties['TimeCreated'],
113
135
  'Modified': file.properties['TimeLastModified'],
114
- 'Link': file.properties['LinkingUrl']
136
+ 'Link': file.properties['LinkingUrl'],
137
+ 'id': file.properties['UniqueId']
115
138
  }
116
139
  result.append(temp_props)
117
140
  return result if result else ToolException("Can not get files or folder is empty. Please, double check folder name and read permissions.")
@@ -119,7 +142,7 @@ class SharepointApiWrapper(BaseToolApiWrapper):
119
142
  logging.error(f"Failed to load files from sharepoint: {e}")
120
143
  return ToolException("Can not get files. Please, double check folder name and read permissions.")
121
144
 
122
- def read_file(self, path, is_capture_image: bool = False, page_number: int = None):
145
+ def read_file(self, path, is_capture_image: bool = False, page_number: int = None, sheet_name: str=None):
123
146
  """ Reads file located at the specified server-relative path. """
124
147
  try:
125
148
  file = self._client.web.get_file_by_server_relative_path(path)
@@ -130,7 +153,42 @@ class SharepointApiWrapper(BaseToolApiWrapper):
130
153
  except Exception as e:
131
154
  logging.error(f"Failed to load file from SharePoint: {e}. Path: {path}. Please, double check file name and path.")
132
155
  return ToolException("File not found. Please, check file name and path.")
133
- return parse_file_content(file.name, file_content, is_capture_image, page_number)
156
+ return parse_file_content(file_name=file.name,
157
+ file_content=file_content,
158
+ is_capture_image=is_capture_image,
159
+ page_number=page_number,
160
+ sheet_name=sheet_name,
161
+ llm=self.llm)
162
+
163
+ def _base_loader(self) -> List[Document]:
164
+ try:
165
+ all_files = self.get_files_list()
166
+ except Exception as e:
167
+ raise ToolException(f"Unable to extract files: {e}")
168
+
169
+ docs: List[Document] = []
170
+ for file in all_files:
171
+ metadata = {
172
+ ("updated_at" if k == "Modified" else k): str(v)
173
+ for k, v in file.items()
174
+ }
175
+ docs.append(Document(page_content="", metadata=metadata))
176
+ return docs
177
+
178
+ def index_data(self,
179
+ collection_suffix: str = '',
180
+ progress_step: int = None,
181
+ clean_index: bool = False):
182
+ docs = self._base_loader()
183
+ embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
184
+ vs = self._init_vector_store(collection_suffix, embeddings=embedding)
185
+ return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
186
+
187
+ def _process_document(self, document: Document) -> Document:
188
+ page_content = self.read_file(document.metadata['Path'], is_capture_image=True)
189
+
190
+ document.page_content = json.dumps(str(page_content))
191
+ return document
134
192
 
135
193
  def get_available_tools(self):
136
194
  return [
@@ -151,5 +209,11 @@ class SharepointApiWrapper(BaseToolApiWrapper):
151
209
  "description": self.read_file.__doc__,
152
210
  "args_schema": ReadDocument,
153
211
  "ref": self.read_file
212
+ },
213
+ {
214
+ "name": "index_data",
215
+ "ref": self.index_data,
216
+ "description": self.index_data.__doc__,
217
+ "args_schema": indexData,
154
218
  }
155
219
  ]
@@ -16,7 +16,15 @@ def get_tools(tool):
16
16
  url=tool['settings']['url'],
17
17
  password=tool['settings'].get('password', None),
18
18
  email=tool['settings'].get('email', None),
19
- toolkit_name=tool.get('toolkit_name')
19
+ toolkit_name=tool.get('toolkit_name'),
20
+ llm=tool['settings'].get('llm', None),
21
+
22
+ # indexer settings
23
+ connection_string=tool['settings'].get('connection_string', None),
24
+ collection_name=f"{tool.get('toolkit_name')}_{str(tool['id'])}",
25
+ embedding_model="HuggingFaceEmbeddings",
26
+ embedding_model_params={"model_name": "sentence-transformers/all-MiniLM-L6-v2"},
27
+ vectorstore_type="PGVector"
20
28
  ).get_tools()
21
29
 
22
30
 
@@ -1,14 +1,21 @@
1
1
  import json
2
2
  import logging
3
- from typing import Dict, List, Optional, Union
3
+ from typing import Dict, List, Optional, Union, Any, Generator
4
4
 
5
5
  import pandas as pd
6
6
  from langchain_core.tools import ToolException
7
7
  from pydantic import SecretStr, create_model, model_validator
8
8
  from pydantic.fields import Field, PrivateAttr
9
9
  from testrail_api import StatusCodeError, TestRailAPI
10
+ from ..elitea_base import BaseVectorStoreToolApiWrapper, BaseIndexParams
11
+ from langchain_core.documents import Document
10
12
 
11
- from ..elitea_base import BaseToolApiWrapper
13
+ from ...runtime.utils.utils import IndexerKeywords
14
+
15
+ try:
16
+ from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
17
+ except ImportError:
18
+ from alita_sdk.langchain.interfaces.llm_processor import get_embeddings
12
19
 
13
20
  logger = logging.getLogger(__name__)
14
21
 
@@ -281,6 +288,19 @@ updateCase = create_model(
281
288
  ),
282
289
  )
283
290
 
291
+ # Schema for indexing TestRail data into vector store
292
+ indexData = create_model(
293
+ "indexData",
294
+ __base__=BaseIndexParams,
295
+ project_id=(str, Field(description="TestRail project ID to index data from")),
296
+ suite_id=(Optional[str], Field(default=None, description="Optional TestRail suite ID to filter test cases")),
297
+ section_id=(Optional[int], Field(default=None, description="Optional section ID to filter test cases")),
298
+ title_keyword=(Optional[str], Field(default=None, description="Optional keyword to filter test cases by title")),
299
+ progress_step=(Optional[int],
300
+ Field(default=None, ge=0, le=100, description="Optional step size for progress reporting during indexing")),
301
+ clean_index=(Optional[bool],
302
+ Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
303
+ )
284
304
 
285
305
  SUPPORTED_KEYS = {
286
306
  "id", "title", "section_id", "template_id", "type_id", "priority_id", "milestone_id",
@@ -291,11 +311,19 @@ SUPPORTED_KEYS = {
291
311
  }
292
312
 
293
313
 
294
- class TestrailAPIWrapper(BaseToolApiWrapper):
314
+ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
295
315
  url: str
296
316
  password: Optional[SecretStr] = None,
297
317
  email: Optional[str] = None,
298
318
  _client: Optional[TestRailAPI] = PrivateAttr() # Private attribute for the TestRail client
319
+ llm: Any = None
320
+
321
+ connection_string: Optional[SecretStr] = None
322
+ collection_name: Optional[str] = None
323
+ embedding_model: Optional[str] = "HuggingFaceEmbeddings"
324
+ embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
325
+ vectorstore_type: Optional[str] = "PGVector"
326
+
299
327
 
300
328
  @model_validator(mode="before")
301
329
  @classmethod
@@ -492,7 +520,7 @@ class TestrailAPIWrapper(BaseToolApiWrapper):
492
520
  you can submit and update specific fields only).
493
521
 
494
522
  :param case_id: T
495
- he ID of the test case
523
+ He ID of the test case
496
524
  :param kwargs:
497
525
  :key title: str
498
526
  The title of the test case
@@ -522,6 +550,118 @@ class TestrailAPIWrapper(BaseToolApiWrapper):
522
550
  f"Test case #{case_id} has been updated at '{updated_case['updated_on']}')"
523
551
  )
524
552
 
553
+ def _base_loader(self, project_id: str,
554
+ suite_id: Optional[str] = None,
555
+ section_id: Optional[int] = None,
556
+ title_keyword: Optional[str] = None
557
+ ) -> Generator[Document, None, None]:
558
+ try:
559
+ if suite_id:
560
+ resp = self._client.cases.get_cases(project_id=project_id, suite_id=int(suite_id))
561
+ cases = resp.get('cases', [])
562
+ else:
563
+ resp = self._client.cases.get_cases(project_id=project_id)
564
+ cases = resp.get('cases', [])
565
+ except StatusCodeError as e:
566
+ raise ToolException(f"Unable to extract test cases: {e}")
567
+ # Apply filters
568
+ if section_id is not None:
569
+ cases = [case for case in cases if case.get('section_id') == section_id]
570
+ if title_keyword is not None:
571
+ cases = [case for case in cases if title_keyword.lower() in case.get('title', '').lower()]
572
+
573
+ for case in cases:
574
+ yield Document(page_content=json.dumps(case), metadata={
575
+ 'project_id': project_id,
576
+ 'title': case.get('title', ''),
577
+ 'suite_id': suite_id or case.get('suite_id', ''),
578
+ 'id': str(case.get('id', '')),
579
+ 'updated_on': case.get('updated_on') or -1,
580
+ 'labels': [lbl['title'] for lbl in case.get('labels', [])],
581
+ 'type': case.get('type_id') or -1,
582
+ 'priority': case.get('priority_id') or -1,
583
+ 'milestone': case.get('milestone_id') or -1,
584
+ 'estimate': case.get('estimate') or '',
585
+ 'automation_type': case.get('custom_automation_type') or -1,
586
+ 'section_id': case.get('section_id') or -1,
587
+ 'entity_type': 'test_case',
588
+ })
589
+
590
+ def index_data(
591
+ self,
592
+ project_id: str,
593
+ suite_id: Optional[str] = None,
594
+ collection_suffix: str = "",
595
+ section_id: Optional[int] = None,
596
+ title_keyword: Optional[str] = None,
597
+ progress_step: Optional[int] = None,
598
+ clean_index: Optional[bool] = False
599
+ ):
600
+ """Load TestRail test cases into the vector store."""
601
+ docs = self._base_loader(project_id, suite_id, section_id, title_keyword)
602
+ embedding = get_embeddings(self.embedding_model, self.embedding_model_params)
603
+ vs = self._init_vector_store(collection_suffix, embeddings=embedding)
604
+ return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
605
+
606
+ def _process_document(self, document: Document) -> Generator[Document, None, None]:
607
+ """
608
+ Process an existing base document to extract relevant metadata for full document preparation.
609
+ Used for late processing of documents after we ensure that the document has to be indexed to avoid
610
+ time-consuming operations for documents which might be useless.
611
+
612
+ Args:
613
+ document (Document): The base document to process.
614
+
615
+ Returns:
616
+ Generator[Document, None, None]: A generator yielding processed Document objects with metadata.
617
+ """
618
+ try:
619
+ # get base data from the document required to extract attachments and other metadata
620
+ base_data = json.loads(document.page_content)
621
+ case_id = base_data.get("id")
622
+
623
+ # get a list of attachments for the case
624
+ attachments = self._client.attachments.get_attachments_for_case_bulk(case_id=case_id)
625
+
626
+ # process each attachment to extract its content
627
+ for attachment in attachments:
628
+ attachment_id = attachment['id']
629
+ # add attachment id to metadata of parent
630
+ document.metadata.setdefault(IndexerKeywords.DEPENDENT_DOCS.value, []).append(attachment_id)
631
+
632
+ # TODO: pass it to chunkers
633
+ yield Document(page_content=self._process_attachment(attachment),
634
+ metadata={
635
+ 'project_id': base_data.get('project_id', ''),
636
+ IndexerKeywords.PARENT.value: case_id,
637
+ 'id': attachment_id,
638
+ 'filename': attachment['filename'],
639
+ 'filetype': attachment['filetype'],
640
+ 'created_on': attachment['created_on'],
641
+ 'entity_type': 'test_case_attachment',
642
+ 'is_image': attachment['is_image'],
643
+ })
644
+ except json.JSONDecodeError as e:
645
+ raise ToolException(f"Failed to decode JSON from document: {e}")
646
+
647
+ def _process_attachment(self, attachment: Dict[str, Any]) -> str:
648
+ """
649
+ Processes an attachment to extract its content.
650
+
651
+ Args:
652
+ attachment (Dict[str, Any]): The attachment data.
653
+
654
+ Returns:
655
+ str: string description of the attachment.
656
+ """
657
+
658
+ page_content = "This filetype is not supported."
659
+ if attachment['filetype'] == 'txt' :
660
+ page_content = self._client.get(endpoint=f"get_attachment/{attachment['id']}")
661
+ # TODO: add support for other file types
662
+ # use utility to handle different types (tools/utils)
663
+ return page_content
664
+
525
665
  def _to_markup(self, data: List[Dict], output_format: str) -> str:
526
666
  """
527
667
  Converts the given data into the specified format: 'json', 'csv', or 'markdown'.
@@ -550,7 +690,7 @@ class TestrailAPIWrapper(BaseToolApiWrapper):
550
690
  return df.to_markdown(index=False)
551
691
 
552
692
  def get_available_tools(self):
553
- return [
693
+ tools = [
554
694
  {
555
695
  "name": "get_case",
556
696
  "ref": self.get_case,
@@ -587,4 +727,13 @@ class TestrailAPIWrapper(BaseToolApiWrapper):
587
727
  "description": self.update_case.__doc__,
588
728
  "args_schema": updateCase,
589
729
  },
730
+ {
731
+ "name": "index_data",
732
+ "ref": self.index_data,
733
+ "description": self.index_data.__doc__,
734
+ "args_schema": indexData,
735
+ }
590
736
  ]
737
+ # Add vector search from base
738
+ tools.extend(self._get_vector_search_tools())
739
+ return tools