alita-sdk 0.3.374__py3-none-any.whl → 0.3.423__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (51) hide show
  1. alita_sdk/configurations/bitbucket.py +95 -0
  2. alita_sdk/configurations/confluence.py +96 -1
  3. alita_sdk/configurations/gitlab.py +79 -0
  4. alita_sdk/configurations/jira.py +103 -0
  5. alita_sdk/configurations/testrail.py +88 -0
  6. alita_sdk/configurations/xray.py +93 -0
  7. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  8. alita_sdk/configurations/zephyr_essential.py +75 -0
  9. alita_sdk/runtime/clients/client.py +3 -2
  10. alita_sdk/runtime/clients/sandbox_client.py +8 -0
  11. alita_sdk/runtime/langchain/assistant.py +56 -40
  12. alita_sdk/runtime/langchain/constants.py +4 -0
  13. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  14. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
  15. alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
  16. alita_sdk/runtime/langchain/langraph_agent.py +92 -28
  17. alita_sdk/runtime/langchain/utils.py +24 -4
  18. alita_sdk/runtime/toolkits/application.py +8 -1
  19. alita_sdk/runtime/toolkits/tools.py +80 -49
  20. alita_sdk/runtime/tools/__init__.py +7 -2
  21. alita_sdk/runtime/tools/application.py +7 -0
  22. alita_sdk/runtime/tools/function.py +28 -23
  23. alita_sdk/runtime/tools/graph.py +10 -4
  24. alita_sdk/runtime/tools/image_generation.py +104 -8
  25. alita_sdk/runtime/tools/llm.py +146 -114
  26. alita_sdk/runtime/tools/sandbox.py +166 -63
  27. alita_sdk/runtime/tools/vectorstore.py +22 -21
  28. alita_sdk/runtime/tools/vectorstore_base.py +16 -15
  29. alita_sdk/runtime/utils/utils.py +1 -0
  30. alita_sdk/tools/__init__.py +43 -31
  31. alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
  32. alita_sdk/tools/base_indexer_toolkit.py +102 -93
  33. alita_sdk/tools/code_indexer_toolkit.py +15 -5
  34. alita_sdk/tools/confluence/api_wrapper.py +30 -8
  35. alita_sdk/tools/confluence/loader.py +10 -0
  36. alita_sdk/tools/elitea_base.py +22 -22
  37. alita_sdk/tools/gitlab/api_wrapper.py +8 -9
  38. alita_sdk/tools/jira/api_wrapper.py +1 -1
  39. alita_sdk/tools/non_code_indexer_toolkit.py +2 -2
  40. alita_sdk/tools/openapi/__init__.py +10 -1
  41. alita_sdk/tools/qtest/api_wrapper.py +298 -51
  42. alita_sdk/tools/sharepoint/api_wrapper.py +104 -33
  43. alita_sdk/tools/sharepoint/authorization_helper.py +175 -1
  44. alita_sdk/tools/sharepoint/utils.py +8 -2
  45. alita_sdk/tools/utils/content_parser.py +27 -16
  46. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +38 -25
  47. {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/METADATA +1 -1
  48. {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/RECORD +51 -51
  49. {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/WHEEL +0 -0
  50. {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/licenses/LICENSE +0 -0
  51. {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,10 @@
1
1
  from datetime import datetime, timezone
2
+ from urllib.parse import unquote, urlparse, quote
2
3
 
3
4
  import jwt
4
5
  import requests
6
+ from botocore.response import get_response
7
+
5
8
 
6
9
  class SharepointAuthorizationHelper:
7
10
 
@@ -54,4 +57,175 @@ class SharepointAuthorizationHelper:
54
57
  except jwt.ExpiredSignatureError:
55
58
  return False
56
59
  except jwt.InvalidTokenError:
57
- return False
60
+ return False
61
+
62
+ def _validate_response(self, response, required_field, error_prefix=None):
63
+ if response.status_code != 200:
64
+ raise RuntimeError(f"{error_prefix or 'Request'} failed: {response.status_code} {response.text}")
65
+ json_data = response.json()
66
+ if required_field not in json_data:
67
+ raise KeyError(f"'{required_field}' missing in response")
68
+ return json_data[required_field]
69
+
70
+ def generate_token_and_site_id(self, site_url: str) -> tuple[str, str]:
71
+ try:
72
+ parsed = urlparse(site_url)
73
+ domain = parsed.hostname
74
+ site_path = parsed.path.strip('/')
75
+ if not domain or not site_path:
76
+ raise ValueError(f"site_url missing domain or site path: {site_url}")
77
+ app_name = domain.split('.')[0]
78
+ openid_config_url = f"https://login.microsoftonline.com/{app_name}.onmicrosoft.com/v2.0/.well-known/openid-configuration"
79
+ response = requests.get(openid_config_url)
80
+ token_url = self._validate_response(response, required_field="token_endpoint", error_prefix="OpenID config")
81
+ token_data = {
82
+ "grant_type": "client_credentials",
83
+ "client_id": self.client_id,
84
+ "client_secret": self.client_secret,
85
+ "scope": "https://graph.microsoft.com/.default"
86
+ }
87
+ token_response = requests.post(token_url, data=token_data)
88
+ access_token = self._validate_response(token_response, required_field="access_token", error_prefix="Token request")
89
+ graph_site_url = f"https://graph.microsoft.com/v1.0/sites/{domain}:/{site_path}"
90
+ headers = {"Authorization": f"Bearer {access_token}"}
91
+ site_response = requests.get(graph_site_url, headers=headers)
92
+ site_id = self._validate_response(site_response, required_field="id", error_prefix="Site info")
93
+ return access_token, site_id
94
+ except Exception as e:
95
+ raise RuntimeError(f"Error while obtaining access_token and site_id: {e}")
96
+
97
+ def get_files_list(self, site_url: str, folder_name: str = None, limit_files: int = 100):
98
+ if not site_url or not site_url.startswith("https://"):
99
+ raise ValueError(f"Invalid site_url format: {site_url}")
100
+ if limit_files is not None and (not isinstance(limit_files, int) or limit_files <= 0):
101
+ raise ValueError(f"limit_files must be a positive integer, got: {limit_files}")
102
+ try:
103
+ access_token, site_id = self.generate_token_and_site_id(site_url)
104
+ headers = {"Authorization": f"Bearer {access_token}"}
105
+ drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
106
+ drives_response = requests.get(drives_url, headers=headers)
107
+ drives = self._validate_response(drives_response, required_field="value", error_prefix="Drives request")
108
+ result = []
109
+ def _recurse_drive(drive_id, drive_path, parent_folder, limit_files):
110
+ # Escape folder_name for URL safety if present
111
+ if parent_folder:
112
+ safe_folder_name = quote(parent_folder.strip('/'), safe="/")
113
+ url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root:/{safe_folder_name}:/children?$top={limit_files}"
114
+ else:
115
+ url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root/children?$top={limit_files}"
116
+ response = requests.get(url, headers=headers)
117
+ if response.status_code != 200:
118
+ return []
119
+ files_json = response.json()
120
+ if "value" not in files_json:
121
+ return []
122
+ files = []
123
+ for file in files_json["value"]:
124
+ file_name = file.get('name', '')
125
+ # Build full path reflecting nested folders
126
+ if parent_folder:
127
+ full_path = '/' + '/'.join([drive_path.strip('/'), parent_folder.strip('/'), file_name.strip('/')])
128
+ else:
129
+ full_path = '/' + '/'.join([drive_path.strip('/'), file_name.strip('/')])
130
+ temp_props = {
131
+ 'Name': file_name,
132
+ 'Path': full_path,
133
+ 'Created': file.get('createdDateTime'),
134
+ 'Modified': file.get('lastModifiedDateTime'),
135
+ 'Link': file.get('webUrl'),
136
+ 'id': file.get('id')
137
+ }
138
+ if not all([temp_props['Name'], temp_props['Path'], temp_props['id']]):
139
+ continue # skip files with missing required fields
140
+ if 'folder' in file:
141
+ # Recursively extract files from this folder
142
+ inner_folder = parent_folder + '/' + file_name if parent_folder else file_name
143
+ inner_files = _recurse_drive(drive_id, drive_path, inner_folder, limit_files)
144
+ files.extend(inner_files)
145
+ else:
146
+ files.append(temp_props)
147
+ if limit_files is not None and len(result) + len(files) >= limit_files:
148
+ return files[:limit_files - len(result)]
149
+ return files
150
+ for drive in drives:
151
+ drive_id = drive.get("id")
152
+ drive_path = unquote(urlparse(drive.get("webUrl")).path) if drive.get("webUrl") else ""
153
+ if not drive_id:
154
+ continue # skip drives without id
155
+ files = _recurse_drive(drive_id, drive_path, folder_name, limit_files)
156
+ result.extend(files)
157
+ if limit_files is not None and len(result) >= limit_files:
158
+ return result[:limit_files]
159
+ return result
160
+ except Exception as e:
161
+ raise RuntimeError(f"Error in get_files_list: {e}")
162
+
163
+ def get_file_content(self, site_url: str, path: str):
164
+ try:
165
+ access_token, site_id = self.generate_token_and_site_id(site_url)
166
+ headers = {"Authorization": f"Bearer {access_token}"}
167
+ drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
168
+ drives_response = requests.get(drives_url, headers=headers)
169
+ drives = self._validate_response(drives_response, required_field="value", error_prefix="Drives request")
170
+ path = path.strip('/')
171
+ #
172
+ for drive in drives:
173
+ drive_path = unquote(urlparse(drive.get("webUrl")).path).strip('/')
174
+ if not drive_path or not path.startswith(drive_path):
175
+ continue
176
+ drive_id = drive.get("id")
177
+ if not drive_id:
178
+ continue
179
+ path = path.replace(drive_path, '').strip('/')
180
+ safe_path = quote(path, safe="")
181
+ url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{safe_path}:/content"
182
+ response = requests.get(url, headers=headers)
183
+ if response.status_code == 200:
184
+ return response.content
185
+ raise RuntimeError(f"File '{path}' not found in any private or shared documents.")
186
+ except Exception as e:
187
+ raise RuntimeError(f"Error in get_file_content: {e}")
188
+
189
+ def get_list_items(self, site_url: str, list_title: str, limit: int = 1000):
190
+ """Fallback Graph API method to read SharePoint list items by list title.
191
+
192
+ Returns a list of dictionaries representing list item fields.
193
+ """
194
+ if not site_url or not site_url.startswith("https://"):
195
+ raise ValueError(f"Invalid site_url format: {site_url}")
196
+ try:
197
+ access_token, site_id = self.generate_token_and_site_id(site_url)
198
+ headers = {"Authorization": f"Bearer {access_token}"}
199
+ lists_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists"
200
+ response = requests.get(lists_url, headers=headers)
201
+ if response.status_code != 200:
202
+ raise RuntimeError(f"Lists request failed: {response.status_code} {response.text}")
203
+ lists_json = response.json()
204
+ lists = lists_json.get("value", [])
205
+ target_list = None
206
+ normalized_title = list_title.strip().lower()
207
+ for lst in lists:
208
+ # displayName is the user-visible title. name can differ (internal name)
209
+ display_name = (lst.get("displayName") or lst.get("name") or '').strip().lower()
210
+ if display_name == normalized_title:
211
+ target_list = lst
212
+ break
213
+ if not target_list:
214
+ raise RuntimeError(f"List '{list_title}' not found via Graph API.")
215
+ list_id = target_list.get('id')
216
+ if not list_id:
217
+ raise RuntimeError(f"List '{list_title}' missing id field.")
218
+ items_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items?expand=fields&$top={limit}"
219
+ items_response = requests.get(items_url, headers=headers)
220
+ if items_response.status_code != 200:
221
+ raise RuntimeError(f"List items request failed: {items_response.status_code} {items_response.text}")
222
+ items_json = items_response.json()
223
+ values = items_json.get('value', [])
224
+ result = []
225
+ for item in values:
226
+ fields = item.get('fields', {})
227
+ if fields:
228
+ result.append(fields)
229
+ return result
230
+ except Exception as e:
231
+ raise RuntimeError(f"Error in get_list_items: {e}")
@@ -1,5 +1,7 @@
1
- from docx import Document
1
+ import re
2
2
  from io import BytesIO
3
+ from docx import Document
4
+
3
5
 
4
6
  def read_docx_from_bytes(file_content):
5
7
  """Read and return content from a .docx file using a byte stream."""
@@ -11,4 +13,8 @@ def read_docx_from_bytes(file_content):
11
13
  return '\n'.join(text)
12
14
  except Exception as e:
13
15
  print(f"Error reading .docx from bytes: {e}")
14
- return ""
16
+ return ""
17
+
18
+
19
+ def decode_sharepoint_string(s):
20
+ return re.sub(r'_x([0-9A-Fa-f]{4})_', lambda m: chr(int(m.group(1), 16)), s)
@@ -92,21 +92,24 @@ def parse_file_content(file_name=None, file_content=None, is_capture_image: bool
92
92
  return ToolException(
93
93
  "Not supported type of files entered. Supported types are TXT, DOCX, PDF, PPTX, XLSX and XLS only.")
94
94
 
95
- if hasattr(loader, 'get_content'):
96
- return loader.get_content()
97
- else:
98
- extension = Path(file_path if file_path else file_name).suffix
99
- loader_kwargs = get_loader_kwargs(loaders_map.get(extension), file_name, file_content, is_capture_image, page_number, sheet_name, llm, file_path, excel_by_sheets)
100
- if file_content:
101
- return load_content_from_bytes(file_content=file_content,
102
- extension=extension,
103
- loader_extra_config=loader_kwargs,
104
- llm=llm)
95
+ try:
96
+ if hasattr(loader, 'get_content'):
97
+ return loader.get_content()
105
98
  else:
106
- return load_content(file_path=file_path,
107
- extension=extension,
108
- loader_extra_config=loader_kwargs,
109
- llm=llm)
99
+ extension = Path(file_path if file_path else file_name).suffix
100
+ loader_kwargs = get_loader_kwargs(loaders_map.get(extension), file_name, file_content, is_capture_image, page_number, sheet_name, llm, file_path, excel_by_sheets)
101
+ if file_content:
102
+ return load_content_from_bytes(file_content=file_content,
103
+ extension=extension,
104
+ loader_extra_config=loader_kwargs,
105
+ llm=llm)
106
+ else:
107
+ return load_content(file_path=file_path,
108
+ extension=extension,
109
+ loader_extra_config=loader_kwargs,
110
+ llm=llm)
111
+ except Exception as e:
112
+ return ToolException(f"Error reading file ({file_name or file_path}) content. Make sure these types are supported: {str(e)}")
110
113
 
111
114
  def load_file_docs(file_name=None, file_content=None, is_capture_image: bool = False, page_number: int = None,
112
115
  sheet_name: str = None, llm=None, file_path: str = None, excel_by_sheets: bool = False) -> List[Document] | ToolException:
@@ -153,7 +156,7 @@ def prepare_loader(file_name=None, file_content=None, is_capture_image: bool = F
153
156
 
154
157
  loader_object = loaders_map.get(extension)
155
158
  if not loader_object:
156
- return None
159
+ loader_object = loaders_map.get('.txt') # Default to text loader if no specific loader found
157
160
  loader_kwargs = get_loader_kwargs(loader_object, file_name, file_content, is_capture_image, page_number, sheet_name, llm, file_path, excel_by_sheets, prompt)
158
161
  loader = loader_object['class'](**loader_kwargs)
159
162
  return loader
@@ -222,10 +225,18 @@ def process_document_by_type(content, extension_source: str, document: Document
222
225
  metadata={**document.metadata, 'chunk_id': 1}
223
226
  )
224
227
  return
228
+ #
229
+ chunks_counter = 0
225
230
  for chunk in chunks:
231
+ chunks_counter += 1
232
+ metadata = {**document.metadata, **chunk.metadata}
233
+ #
234
+ # ensure each chunk has a unique chunk_id
235
+ metadata['chunk_id'] = chunks_counter
236
+ #
226
237
  yield Document(
227
238
  page_content=sanitize_for_postgres(chunk.page_content),
228
- metadata={**document.metadata, **chunk.metadata}
239
+ metadata=metadata
229
240
  )
230
241
 
231
242
 
@@ -26,12 +26,12 @@ class VectorStoreAdapter(ABC):
26
26
  pass
27
27
 
28
28
  @abstractmethod
29
- def get_indexed_ids(self, vectorstore_wrapper, collection_suffix: Optional[str] = '') -> List[str]:
29
+ def get_indexed_ids(self, vectorstore_wrapper, index_name: Optional[str] = '') -> List[str]:
30
30
  """Get all indexed document IDs from vectorstore"""
31
31
  pass
32
32
 
33
33
  @abstractmethod
34
- def clean_collection(self, vectorstore_wrapper, collection_suffix: str = ''):
34
+ def clean_collection(self, vectorstore_wrapper, index_name: str = ''):
35
35
  """Clean the vectorstore collection by deleting all indexed data."""
36
36
  pass
37
37
 
@@ -41,7 +41,7 @@ class VectorStoreAdapter(ABC):
41
41
  pass
42
42
 
43
43
  @abstractmethod
44
- def get_code_indexed_data(self, vectorstore_wrapper, collection_suffix) -> Dict[str, Dict[str, Any]]:
44
+ def get_code_indexed_data(self, vectorstore_wrapper, index_name) -> Dict[str, Dict[str, Any]]:
45
45
  """Get all indexed data from vectorstore for code content"""
46
46
  pass
47
47
 
@@ -51,7 +51,7 @@ class VectorStoreAdapter(ABC):
51
51
  pass
52
52
 
53
53
  @abstractmethod
54
- def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
54
+ def get_index_meta(self, vectorstore_wrapper, index_name: str) -> List[Dict[str, Any]]:
55
55
  """Get all index_meta entries from the vector store."""
56
56
  pass
57
57
 
@@ -106,20 +106,25 @@ class PGVectorAdapter(VectorStoreAdapter):
106
106
  session.commit()
107
107
  logger.info(f"Schema '{schema_name}' has been dropped.")
108
108
 
109
- def get_indexed_ids(self, vectorstore_wrapper, collection_suffix: Optional[str] = '') -> List[str]:
109
+ def get_indexed_ids(self, vectorstore_wrapper, index_name: Optional[str] = '') -> List[str]:
110
110
  """Get all indexed document IDs from PGVector"""
111
111
  from sqlalchemy.orm import Session
112
- from sqlalchemy import func
112
+ from sqlalchemy import func, or_
113
113
 
114
114
  store = vectorstore_wrapper.vectorstore
115
115
  try:
116
116
  with Session(store.session_maker.bind) as session:
117
117
  # Start building the query
118
118
  query = session.query(store.EmbeddingStore.id)
119
- # Apply filter only if collection_suffix is provided
120
- if collection_suffix:
119
+ # Apply filter only if index_name is provided
120
+ if index_name:
121
121
  query = query.filter(
122
- func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
122
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name,
123
+ or_(
124
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type').is_(None),
125
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata,
126
+ 'type') != IndexerKeywords.INDEX_META_TYPE.value
127
+ )
123
128
  )
124
129
  ids = query.all()
125
130
  return [str(id_tuple[0]) for id_tuple in ids]
@@ -127,25 +132,33 @@ class PGVectorAdapter(VectorStoreAdapter):
127
132
  logger.error(f"Failed to get indexed IDs from PGVector: {str(e)}")
128
133
  return []
129
134
 
130
- def clean_collection(self, vectorstore_wrapper, collection_suffix: str = ''):
135
+ def clean_collection(self, vectorstore_wrapper, index_name: str = ''):
131
136
  """Clean the vectorstore collection by deleting all indexed data."""
132
137
  # This logic deletes all data from the vectorstore collection without removal of collection.
133
138
  # Collection itself remains available for future indexing.
134
- vectorstore_wrapper.vectorstore.delete(ids=self.get_indexed_ids(vectorstore_wrapper, collection_suffix))
139
+ from sqlalchemy.orm import Session
140
+ from sqlalchemy import func
141
+
142
+ store = vectorstore_wrapper.vectorstore
143
+ with Session(store.session_maker.bind) as session:
144
+ session.query(store.EmbeddingStore).filter(
145
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
146
+ ).delete(synchronize_session=False)
147
+ session.commit()
135
148
 
136
149
  def is_vectorstore_type(self, vectorstore) -> bool:
137
150
  """Check if the vectorstore is a PGVector store."""
138
151
  return hasattr(vectorstore, 'session_maker') and hasattr(vectorstore, 'EmbeddingStore')
139
152
 
140
- def get_indexed_data(self, vectorstore_wrapper, collection_suffix: str)-> Dict[str, Dict[str, Any]]:
141
- """Get all indexed data from PGVector for non-code content per collection_suffix."""
153
+ def get_indexed_data(self, vectorstore_wrapper, index_name: str)-> Dict[str, Dict[str, Any]]:
154
+ """Get all indexed data from PGVector for non-code content per index_name."""
142
155
  from sqlalchemy.orm import Session
143
156
  from sqlalchemy import func
144
157
  from ...runtime.utils.utils import IndexerKeywords
145
158
 
146
159
  result = {}
147
160
  try:
148
- vectorstore_wrapper._log_data("Retrieving already indexed data from PGVector vectorstore",
161
+ vectorstore_wrapper._log_tool_event("Retrieving already indexed data from PGVector vectorstore",
149
162
  tool_name="get_indexed_data")
150
163
  store = vectorstore_wrapper.vectorstore
151
164
  with Session(store.session_maker.bind) as session:
@@ -154,7 +167,7 @@ class PGVectorAdapter(VectorStoreAdapter):
154
167
  store.EmbeddingStore.document,
155
168
  store.EmbeddingStore.cmetadata
156
169
  ).filter(
157
- func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
170
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
158
171
  ).all()
159
172
 
160
173
  # Process the retrieved data
@@ -187,14 +200,14 @@ class PGVectorAdapter(VectorStoreAdapter):
187
200
 
188
201
  return result
189
202
 
190
- def get_code_indexed_data(self, vectorstore_wrapper, collection_suffix: str) -> Dict[str, Dict[str, Any]]:
203
+ def get_code_indexed_data(self, vectorstore_wrapper, index_name: str) -> Dict[str, Dict[str, Any]]:
191
204
  """Get all indexed code data from PGVector per collection suffix."""
192
205
  from sqlalchemy.orm import Session
193
206
  from sqlalchemy import func
194
207
 
195
208
  result = {}
196
209
  try:
197
- vectorstore_wrapper._log_data("Retrieving already indexed code data from PGVector vectorstore",
210
+ vectorstore_wrapper._log_tool_event(message="Retrieving already indexed code data from PGVector vectorstore",
198
211
  tool_name="index_code_data")
199
212
  store = vectorstore_wrapper.vectorstore
200
213
  with (Session(store.session_maker.bind) as session):
@@ -202,7 +215,7 @@ class PGVectorAdapter(VectorStoreAdapter):
202
215
  store.EmbeddingStore.id,
203
216
  store.EmbeddingStore.cmetadata
204
217
  ).filter(
205
- func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
218
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
206
219
  ).all()
207
220
 
208
221
  for db_id, meta in docs:
@@ -272,7 +285,7 @@ class PGVectorAdapter(VectorStoreAdapter):
272
285
  except Exception as e:
273
286
  logger.error(f"Failed to update collection for entry ID {entry_id}: {str(e)}")
274
287
 
275
- def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
288
+ def get_index_meta(self, vectorstore_wrapper, index_name: str) -> List[Dict[str, Any]]:
276
289
  from sqlalchemy.orm import Session
277
290
  from sqlalchemy import func
278
291
 
@@ -285,7 +298,7 @@ class PGVectorAdapter(VectorStoreAdapter):
285
298
  store.EmbeddingStore.cmetadata
286
299
  ).filter(
287
300
  store.EmbeddingStore.cmetadata['type'].astext == IndexerKeywords.INDEX_META_TYPE.value,
288
- func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
301
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
289
302
  ).all()
290
303
  result = []
291
304
  for id, document, cmetadata in meta:
@@ -312,7 +325,7 @@ class ChromaAdapter(VectorStoreAdapter):
312
325
  def remove_collection(self, vectorstore_wrapper, collection_name: str):
313
326
  vectorstore_wrapper.vectorstore.delete_collection()
314
327
 
315
- def get_indexed_ids(self, vectorstore_wrapper, collection_suffix: Optional[str] = '') -> List[str]:
328
+ def get_indexed_ids(self, vectorstore_wrapper, index_name: Optional[str] = '') -> List[str]:
316
329
  """Get all indexed document IDs from Chroma"""
317
330
  try:
318
331
  data = vectorstore_wrapper.vectorstore.get(include=[]) # Only get IDs, no metadata
@@ -321,9 +334,9 @@ class ChromaAdapter(VectorStoreAdapter):
321
334
  logger.error(f"Failed to get indexed IDs from Chroma: {str(e)}")
322
335
  return []
323
336
 
324
- def clean_collection(self, vectorstore_wrapper, collection_suffix: str = ''):
337
+ def clean_collection(self, vectorstore_wrapper, index_name: str = ''):
325
338
  """Clean the vectorstore collection by deleting all indexed data."""
326
- vectorstore_wrapper.vectorstore.delete(ids=self.get_indexed_ids(vectorstore_wrapper, collection_suffix))
339
+ vectorstore_wrapper.vectorstore.delete(ids=self.get_indexed_ids(vectorstore_wrapper, index_name))
327
340
 
328
341
  def get_indexed_data(self, vectorstore_wrapper):
329
342
  """Get all indexed data from Chroma for non-code content"""
@@ -361,7 +374,7 @@ class ChromaAdapter(VectorStoreAdapter):
361
374
 
362
375
  return result
363
376
 
364
- def get_code_indexed_data(self, vectorstore_wrapper, collection_suffix) -> Dict[str, Dict[str, Any]]:
377
+ def get_code_indexed_data(self, vectorstore_wrapper, index_name) -> Dict[str, Dict[str, Any]]:
365
378
  """Get all indexed code data from Chroma."""
366
379
  result = {}
367
380
  try:
@@ -391,7 +404,7 @@ class ChromaAdapter(VectorStoreAdapter):
391
404
  # This is a simplified implementation - in practice, you might need more complex logic
392
405
  logger.warning("add_to_collection for Chroma is not fully implemented yet")
393
406
 
394
- def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
407
+ def get_index_meta(self, vectorstore_wrapper, index_name: str) -> List[Dict[str, Any]]:
395
408
  logger.warning("get_index_meta for Chroma is not implemented yet")
396
409
 
397
410
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.374
3
+ Version: 0.3.423
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0