alita-sdk 0.3.374__py3-none-any.whl → 0.3.423__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (51) hide show
  1. alita_sdk/configurations/bitbucket.py +95 -0
  2. alita_sdk/configurations/confluence.py +96 -1
  3. alita_sdk/configurations/gitlab.py +79 -0
  4. alita_sdk/configurations/jira.py +103 -0
  5. alita_sdk/configurations/testrail.py +88 -0
  6. alita_sdk/configurations/xray.py +93 -0
  7. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  8. alita_sdk/configurations/zephyr_essential.py +75 -0
  9. alita_sdk/runtime/clients/client.py +3 -2
  10. alita_sdk/runtime/clients/sandbox_client.py +8 -0
  11. alita_sdk/runtime/langchain/assistant.py +56 -40
  12. alita_sdk/runtime/langchain/constants.py +4 -0
  13. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  14. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
  15. alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
  16. alita_sdk/runtime/langchain/langraph_agent.py +92 -28
  17. alita_sdk/runtime/langchain/utils.py +24 -4
  18. alita_sdk/runtime/toolkits/application.py +8 -1
  19. alita_sdk/runtime/toolkits/tools.py +80 -49
  20. alita_sdk/runtime/tools/__init__.py +7 -2
  21. alita_sdk/runtime/tools/application.py +7 -0
  22. alita_sdk/runtime/tools/function.py +28 -23
  23. alita_sdk/runtime/tools/graph.py +10 -4
  24. alita_sdk/runtime/tools/image_generation.py +104 -8
  25. alita_sdk/runtime/tools/llm.py +146 -114
  26. alita_sdk/runtime/tools/sandbox.py +166 -63
  27. alita_sdk/runtime/tools/vectorstore.py +22 -21
  28. alita_sdk/runtime/tools/vectorstore_base.py +16 -15
  29. alita_sdk/runtime/utils/utils.py +1 -0
  30. alita_sdk/tools/__init__.py +43 -31
  31. alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
  32. alita_sdk/tools/base_indexer_toolkit.py +102 -93
  33. alita_sdk/tools/code_indexer_toolkit.py +15 -5
  34. alita_sdk/tools/confluence/api_wrapper.py +30 -8
  35. alita_sdk/tools/confluence/loader.py +10 -0
  36. alita_sdk/tools/elitea_base.py +22 -22
  37. alita_sdk/tools/gitlab/api_wrapper.py +8 -9
  38. alita_sdk/tools/jira/api_wrapper.py +1 -1
  39. alita_sdk/tools/non_code_indexer_toolkit.py +2 -2
  40. alita_sdk/tools/openapi/__init__.py +10 -1
  41. alita_sdk/tools/qtest/api_wrapper.py +298 -51
  42. alita_sdk/tools/sharepoint/api_wrapper.py +104 -33
  43. alita_sdk/tools/sharepoint/authorization_helper.py +175 -1
  44. alita_sdk/tools/sharepoint/utils.py +8 -2
  45. alita_sdk/tools/utils/content_parser.py +27 -16
  46. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +38 -25
  47. {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/METADATA +1 -1
  48. {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/RECORD +51 -51
  49. {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/WHEEL +0 -0
  50. {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/licenses/LICENSE +0 -0
  51. {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/top_level.txt +0 -0
@@ -2,21 +2,60 @@ import asyncio
2
2
  import logging
3
3
  import subprocess
4
4
  import os
5
- from typing import Any, Type, Optional, Dict
6
- from langchain_core.tools import BaseTool
7
- from pydantic import BaseModel, create_model
5
+ from typing import Any, Type, Optional, Dict, List, Literal, Union
6
+ from copy import deepcopy
7
+ from pathlib import Path
8
+
9
+ from langchain_core.tools import BaseTool, BaseToolkit
10
+ from langchain_core.messages import ToolCall
11
+ from pydantic import BaseModel, create_model, ConfigDict, Field
8
12
  from pydantic.fields import FieldInfo
9
13
 
10
14
  logger = logging.getLogger(__name__)
11
15
 
16
+ name = "pyodide"
17
+
18
+
19
+ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store=None):
20
+ """
21
+ Get sandbox tools for the provided tool configurations.
22
+
23
+ Args:
24
+ tools_list: List of tool configurations
25
+ alita_client: Alita client instance for sandbox tools
26
+ llm: LLM client instance (unused for sandbox)
27
+ memory_store: Optional memory store instance (unused for sandbox)
28
+
29
+ Returns:
30
+ List of sandbox tools
31
+ """
32
+ all_tools = []
33
+
34
+ for tool in tools_list:
35
+ if tool.get('type') == 'sandbox' or tool.get('toolkit_name') == 'sandbox':
36
+ try:
37
+ toolkit_instance = SandboxToolkit.get_toolkit(
38
+ stateful=tool['settings'].get('stateful', False),
39
+ allow_net=tool['settings'].get('allow_net', True),
40
+ alita_client=alita_client,
41
+ toolkit_name=tool.get('toolkit_name', '')
42
+ )
43
+ all_tools.extend(toolkit_instance.get_tools())
44
+ except Exception as e:
45
+ logger.error(f"Error in sandbox toolkit get_tools: {e}")
46
+ logger.error(f"Tool config: {tool}")
47
+ raise
48
+
49
+ return all_tools
50
+
12
51
 
13
52
  def _is_deno_available() -> bool:
14
53
  """Check if Deno is available in the PATH"""
15
54
  try:
16
55
  result = subprocess.run(
17
- ["deno", "--version"],
18
- capture_output=True,
19
- text=True,
56
+ ["deno", "--version"],
57
+ capture_output=True,
58
+ text=True,
20
59
  timeout=10
21
60
  )
22
61
  return result.returncode == 0
@@ -25,43 +64,17 @@ def _is_deno_available() -> bool:
25
64
 
26
65
 
27
66
  def _setup_pyodide_cache_env() -> None:
28
- """Setup Pyodide caching environment variables for performance optimization"""
67
+ """Setup Pyodide caching environment variables for performance optimization [NO-OP]"""
29
68
  try:
30
- # Check if cache environment file exists and source it
31
- cache_env_file = os.path.expanduser("~/.pyodide_cache_env")
32
- if os.path.exists(cache_env_file):
33
- with open(cache_env_file, 'r') as f:
34
- for line in f:
35
- line = line.strip()
36
- if line.startswith('export ') and '=' in line:
37
- # Parse export VAR=value format
38
- var_assignment = line[7:] # Remove 'export '
39
- if '=' in var_assignment:
40
- key, value = var_assignment.split('=', 1)
41
- # Remove quotes if present
42
- value = value.strip('"').strip("'")
43
- os.environ[key] = value
44
- logger.debug(f"Set Pyodide cache env: {key}={value}")
45
-
46
- # Set default caching environment variables if not already set
47
- cache_defaults = {
48
- 'PYODIDE_PACKAGES_PATH': os.path.expanduser('~/.cache/pyodide'),
49
- 'DENO_DIR': os.path.expanduser('~/.cache/deno'),
50
- 'PYODIDE_CACHE_DIR': os.path.expanduser('~/.cache/pyodide'),
51
- }
52
-
53
- for key, default_value in cache_defaults.items():
54
- if key not in os.environ:
55
- os.environ[key] = default_value
56
- logger.debug(f"Set default Pyodide env: {key}={default_value}")
57
-
69
+ for key in ["SANDBOX_BASE", "DENO_DIR"]:
70
+ logger.info("Sandbox env: %s -> %s", key, os.environ.get(key, "n/a"))
58
71
  except Exception as e:
59
72
  logger.warning(f"Could not setup Pyodide cache environment: {e}")
60
73
 
61
74
 
62
75
  # Create input schema for the sandbox tool
63
76
  sandbox_tool_input = create_model(
64
- "SandboxToolInput",
77
+ "SandboxToolInput",
65
78
  code=(str, FieldInfo(description="Python code to execute in the sandbox environment"))
66
79
  )
67
80
 
@@ -72,7 +85,7 @@ class PyodideSandboxTool(BaseTool):
72
85
  This tool leverages langchain-sandbox to provide a safe environment for running untrusted Python code.
73
86
  Optimized for performance with caching and stateless execution by default.
74
87
  """
75
-
88
+
76
89
  name: str = "pyodide_sandbox"
77
90
  description: str = """Execute Python code in a secure sandbox environment using Pyodide.
78
91
  This tool allows safe execution of Python code without access to the host system.
@@ -81,7 +94,7 @@ class PyodideSandboxTool(BaseTool):
81
94
  - Perform calculations or data analysis
82
95
  - Test Python algorithms
83
96
  - Run code that requires isolation from the host system
84
-
97
+
85
98
  The sandbox supports most Python standard library modules and can install additional packages.
86
99
  Note: File access and some system operations are restricted for security.
87
100
  Optimized for performance with local caching (stateless by default for faster execution).
@@ -91,14 +104,37 @@ class PyodideSandboxTool(BaseTool):
91
104
  allow_net: bool = True
92
105
  session_bytes: Optional[bytes] = None
93
106
  session_metadata: Optional[Dict] = None
94
-
107
+ alita_client: Optional[Any] = None
108
+
95
109
  def __init__(self, **kwargs: Any) -> None:
96
110
  super().__init__(**kwargs)
97
111
  self._sandbox = None
98
112
  # Setup caching environment for optimal performance
99
113
  _setup_pyodide_cache_env()
100
114
  self._initialize_sandbox()
101
-
115
+
116
+ def _prepare_pyodide_input(self, code: str) -> str:
117
+ """Prepare input for PyodideSandboxTool by injecting state and alita_client into the code block."""
118
+ pyodide_predata = ""
119
+
120
+ # Add alita_client if available
121
+ if self.alita_client:
122
+ try:
123
+ # Get the directory of the current file and construct the path to sandbox_client.py
124
+ current_dir = Path(__file__).parent
125
+ sandbox_client_path = current_dir.parent / 'clients' / 'sandbox_client.py'
126
+
127
+ with open(sandbox_client_path, 'r') as f:
128
+ sandbox_client_code = f.read()
129
+ pyodide_predata += f"{sandbox_client_code}\n"
130
+ pyodide_predata += (f"alita_client = SandboxClient(base_url='{self.alita_client.base_url}',"
131
+ f"project_id={self.alita_client.project_id},"
132
+ f"auth_token='{self.alita_client.auth_token}')\n")
133
+ except FileNotFoundError:
134
+ logger.error(f"sandbox_client.py not found. Ensure the file exists.")
135
+
136
+ return f"#elitea simplified client\n{pyodide_predata}{code}"
137
+
102
138
  def _initialize_sandbox(self) -> None:
103
139
  """Initialize the PyodideSandbox instance with optimized settings"""
104
140
  try:
@@ -110,12 +146,22 @@ class PyodideSandboxTool(BaseTool):
110
146
  )
111
147
  logger.error(error_msg)
112
148
  raise RuntimeError(error_msg)
113
-
149
+
114
150
  from langchain_sandbox import PyodideSandbox
115
-
151
+
152
+ # Air-gapped settings
153
+ sandbox_base = os.environ.get("SANDBOX_BASE", os.path.expanduser('~/.cache/pyodide'))
154
+ sandbox_tmp = os.path.join(sandbox_base, "tmp")
155
+ deno_cache = os.environ.get("DENO_DIR", os.path.expanduser('~/.cache/deno'))
156
+
116
157
  # Configure sandbox with performance optimizations
117
158
  self._sandbox = PyodideSandbox(
118
159
  stateful=self.stateful,
160
+ #
161
+ allow_env=["SANDBOX_BASE"],
162
+ allow_read=[sandbox_base, sandbox_tmp, deno_cache],
163
+ allow_write=[sandbox_tmp, deno_cache],
164
+ #
119
165
  allow_net=self.allow_net,
120
166
  # Use auto node_modules_dir for better caching
121
167
  node_modules_dir="auto"
@@ -135,7 +181,7 @@ class PyodideSandboxTool(BaseTool):
135
181
  except Exception as e:
136
182
  logger.error(f"Failed to initialize PyodideSandbox: {e}")
137
183
  raise
138
-
184
+
139
185
  def _run(self, code: str) -> str:
140
186
  """
141
187
  Synchronous version - runs the async method in a new event loop
@@ -144,7 +190,10 @@ class PyodideSandboxTool(BaseTool):
144
190
  # Check if sandbox is initialized, if not try to initialize
145
191
  if self._sandbox is None:
146
192
  self._initialize_sandbox()
147
-
193
+
194
+ # Prepare code with state and client injection
195
+ prepared_code = self._prepare_pyodide_input(code)
196
+
148
197
  # Check if we're already in an async context
149
198
  try:
150
199
  loop = asyncio.get_running_loop()
@@ -152,11 +201,11 @@ class PyodideSandboxTool(BaseTool):
152
201
  # We'll need to use a different approach
153
202
  import concurrent.futures
154
203
  with concurrent.futures.ThreadPoolExecutor() as executor:
155
- future = executor.submit(asyncio.run, self._arun(code))
204
+ future = executor.submit(asyncio.run, self._arun(prepared_code))
156
205
  return future.result()
157
206
  except RuntimeError:
158
207
  # No running loop, safe to use asyncio.run
159
- return asyncio.run(self._arun(code))
208
+ return asyncio.run(self._arun(prepared_code))
160
209
  except (ImportError, RuntimeError) as e:
161
210
  # Handle specific dependency errors gracefully
162
211
  error_msg = str(e)
@@ -169,7 +218,7 @@ class PyodideSandboxTool(BaseTool):
169
218
  except Exception as e:
170
219
  logger.error(f"Error executing code in sandbox: {e}")
171
220
  return f"Error executing code: {str(e)}"
172
-
221
+
173
222
  async def _arun(self, code: str) -> str:
174
223
  """
175
224
  Execute Python code in the Pyodide sandbox
@@ -177,19 +226,19 @@ class PyodideSandboxTool(BaseTool):
177
226
  try:
178
227
  if self._sandbox is None:
179
228
  self._initialize_sandbox()
180
-
229
+
181
230
  # Execute the code with session state if available
182
231
  result = await self._sandbox.execute(
183
232
  code,
184
233
  session_bytes=self.session_bytes,
185
234
  session_metadata=self.session_metadata
186
235
  )
187
-
236
+
188
237
  # Update session state for stateful execution
189
238
  if self.stateful:
190
239
  self.session_bytes = result.session_bytes
191
240
  self.session_metadata = result.session_metadata
192
-
241
+
193
242
  result_dict = {}
194
243
 
195
244
  if result.result is not None:
@@ -212,10 +261,10 @@ class PyodideSandboxTool(BaseTool):
212
261
 
213
262
  result_dict["execution_info"] = execution_info
214
263
  return result_dict
215
-
264
+
216
265
  except Exception as e:
217
266
  logger.error(f"Error executing code in sandbox: {e}")
218
- return f"Error executing code: {str(e)}"
267
+ return {"error": f"Error executing code: {str(e)}"}
219
268
 
220
269
 
221
270
  class StatefulPyodideSandboxTool(PyodideSandboxTool):
@@ -223,7 +272,7 @@ class StatefulPyodideSandboxTool(PyodideSandboxTool):
223
272
  A stateful version of the PyodideSandboxTool that maintains state between executions.
224
273
  This version preserves variables, imports, and function definitions across multiple tool calls.
225
274
  """
226
-
275
+
227
276
  name: str = "stateful_pyodide_sandbox"
228
277
  description: str = """Execute Python code in a stateful sandbox environment using Pyodide.
229
278
  This tool maintains state between executions, preserving variables, imports, and function definitions.
@@ -232,41 +281,95 @@ class StatefulPyodideSandboxTool(PyodideSandboxTool):
232
281
  - Maintain variables across multiple calls
233
282
  - Develop complex programs step by step
234
283
  - Preserve imported libraries and defined functions
235
-
284
+
236
285
  The sandbox supports most Python standard library modules and can install additional packages.
237
286
  Note: File access and some system operations are restricted for security.
238
287
  """
239
-
288
+
240
289
  def __init__(self, **kwargs: Any) -> None:
241
290
  kwargs['stateful'] = True # Force stateful mode
242
291
  super().__init__(**kwargs)
243
292
 
244
293
 
245
294
  # Factory function for creating sandbox tools
246
- def create_sandbox_tool(stateful: bool = False, allow_net: bool = True) -> BaseTool:
295
+ def create_sandbox_tool(stateful: bool = False, allow_net: bool = True, alita_client: Optional[Any] = None) -> BaseTool:
247
296
  """
248
297
  Factory function to create sandbox tools with specified configuration.
249
-
298
+
250
299
  Note: This tool requires Deno to be installed and available in PATH.
251
300
  For installation and optimization, run the bootstrap.sh script.
252
-
301
+
253
302
  Args:
254
303
  stateful: Whether to maintain state between executions (default: False for better performance)
255
304
  allow_net: Whether to allow network access (for package installation)
256
-
305
+
257
306
  Returns:
258
307
  Configured sandbox tool instance
259
-
308
+
260
309
  Raises:
261
310
  ImportError: If langchain-sandbox is not installed
262
311
  RuntimeError: If Deno is not found in PATH
263
-
312
+
264
313
  Performance Notes:
265
314
  - Stateless mode (default) is faster and avoids session state overhead
266
315
  - Run bootstrap.sh script to enable local caching and reduce initialization time
267
316
  - Cached wheels reduce package download time from ~4.76s to near-instant
268
317
  """
269
318
  if stateful:
270
- return StatefulPyodideSandboxTool(allow_net=allow_net)
319
+ return StatefulPyodideSandboxTool(allow_net=allow_net, alita_client=alita_client)
271
320
  else:
272
- return PyodideSandboxTool(stateful=False, allow_net=allow_net)
321
+ return PyodideSandboxTool(stateful=False, allow_net=allow_net, alita_client=alita_client)
322
+
323
+
324
+ class SandboxToolkit(BaseToolkit):
325
+ tools: List[BaseTool] = []
326
+
327
+ @staticmethod
328
+ def toolkit_config_schema() -> Type[BaseModel]:
329
+ # Create sample tools to get their schemas
330
+ sample_tools = [
331
+ PyodideSandboxTool(),
332
+ StatefulPyodideSandboxTool()
333
+ ]
334
+ selected_tools = {x.name: x.args_schema.model_json_schema() for x in sample_tools}
335
+
336
+ return create_model(
337
+ 'sandbox',
338
+ stateful=(bool, Field(default=False, description="Whether to maintain state between executions")),
339
+ allow_net=(bool, Field(default=True, description="Whether to allow network access for package installation")),
340
+ selected_tools=(List[Literal[tuple(selected_tools)]],
341
+ Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
342
+
343
+ __config__=ConfigDict(json_schema_extra={
344
+ 'metadata': {
345
+ "label": "Python Sandbox",
346
+ "icon_url": "sandbox.svg",
347
+ "hidden": False,
348
+ "categories": ["code", "execution", "internal_tool"],
349
+ "extra_categories": ["python", "pyodide", "sandbox", "code execution"],
350
+ }
351
+ })
352
+ )
353
+
354
+ @classmethod
355
+ def get_toolkit(cls, stateful: bool = False, allow_net: bool = True, alita_client=None, **kwargs):
356
+ """
357
+ Get toolkit with sandbox tools.
358
+
359
+ Args:
360
+ stateful: Whether to maintain state between executions
361
+ allow_net: Whether to allow network access
362
+ alita_client: Alita client instance for sandbox tools
363
+ **kwargs: Additional arguments
364
+ """
365
+ tools = []
366
+
367
+ if stateful:
368
+ tools.append(StatefulPyodideSandboxTool(allow_net=allow_net, alita_client=alita_client))
369
+ else:
370
+ tools.append(PyodideSandboxTool(stateful=False, allow_net=allow_net, alita_client=alita_client))
371
+
372
+ return cls(tools=tools)
373
+
374
+ def get_tools(self):
375
+ return self.tools
@@ -207,9 +207,9 @@ class VectorStoreWrapper(BaseToolApiWrapper):
207
207
  tool_name="_remove_collection"
208
208
  )
209
209
 
210
- def _get_indexed_ids(self, collection_suffix: Optional[str] = '') -> List[str]:
210
+ def _get_indexed_ids(self, index_name: Optional[str] = '') -> List[str]:
211
211
  """Get all indexed document IDs from vectorstore"""
212
- return self.vector_adapter.get_indexed_ids(self, collection_suffix)
212
+ return self.vector_adapter.get_indexed_ids(self, index_name)
213
213
 
214
214
  def list_collections(self) -> Any:
215
215
  """List all collections in the vectorstore.
@@ -233,7 +233,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
233
233
  return {"collections": [], "message": "No indexed collections"}
234
234
  return cols
235
235
 
236
- def _clean_collection(self, collection_suffix: str = ''):
236
+ def _clean_collection(self, index_name: str = ''):
237
237
  """
238
238
  Clean the vectorstore collection by deleting all indexed data.
239
239
  """
@@ -241,15 +241,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
241
241
  f"Cleaning collection '{self.dataset}'",
242
242
  tool_name="_clean_collection"
243
243
  )
244
- self.vector_adapter.clean_collection(self, collection_suffix)
244
+ self.vector_adapter.clean_collection(self, index_name)
245
245
  self._log_data(
246
246
  f"Collection '{self.dataset}' has been cleaned. ",
247
247
  tool_name="_clean_collection"
248
248
  )
249
249
 
250
- def _get_code_indexed_data(self, collection_suffix: str) -> Dict[str, Dict[str, Any]]:
250
+ def _get_code_indexed_data(self, index_name: str) -> Dict[str, Dict[str, Any]]:
251
251
  """ Get all indexed data from vectorstore for code content """
252
- return self.vector_adapter.get_code_indexed_data(self, collection_suffix)
252
+ return self.vector_adapter.get_code_indexed_data(self, index_name)
253
253
 
254
254
  def _add_to_collection(self, entry_id, new_collection_value):
255
255
  """Add a new collection name to the `collection` key in the `metadata` column."""
@@ -258,7 +258,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
258
258
  def _reduce_duplicates(
259
259
  self,
260
260
  documents: Generator[Any, None, None],
261
- collection_suffix: str,
261
+ index_name: str,
262
262
  get_indexed_data: Callable,
263
263
  key_fn: Callable,
264
264
  compare_fn: Callable,
@@ -267,7 +267,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
267
267
  ) -> List[Any]:
268
268
  """Generic duplicate reduction logic for documents."""
269
269
  self._log_data(log_msg, tool_name="index_documents")
270
- indexed_data = get_indexed_data(collection_suffix)
270
+ indexed_data = get_indexed_data(index_name)
271
271
  indexed_keys = set(indexed_data.keys())
272
272
  if not indexed_keys:
273
273
  self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
@@ -279,14 +279,14 @@ class VectorStoreWrapper(BaseToolApiWrapper):
279
279
  for document in documents:
280
280
  key = key_fn(document)
281
281
  key = key if isinstance(key, str) else str(key)
282
- if key in indexed_keys and collection_suffix == indexed_data[key]['metadata'].get('collection'):
282
+ if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
283
283
  if compare_fn(document, indexed_data[key]):
284
284
  # Disabled addition of new collection to already indexed documents
285
285
  # # check metadata.collection and update if needed
286
286
  # for update_collection_id in remove_ids_fn(indexed_data, key):
287
287
  # self._add_to_collection(
288
288
  # update_collection_id,
289
- # collection_suffix
289
+ # index_name
290
290
  # )
291
291
  continue
292
292
  final_docs.append(document)
@@ -303,10 +303,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
303
303
 
304
304
  return final_docs
305
305
 
306
- def _reduce_code_duplicates(self, documents: Generator[Any, None, None], collection_suffix: str) -> List[Any]:
306
+ def _reduce_code_duplicates(self, documents: Generator[Any, None, None], index_name: str) -> List[Any]:
307
307
  return self._reduce_duplicates(
308
308
  documents,
309
- collection_suffix,
309
+ index_name,
310
310
  self._get_code_indexed_data,
311
311
  lambda doc: doc.metadata.get('filename'),
312
312
  lambda doc, idx: (
@@ -318,7 +318,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
318
318
  log_msg="Verification of code documents to index started"
319
319
  )
320
320
 
321
- def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
321
+ def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
322
322
  """ Index documents in the vectorstore.
323
323
 
324
324
  Args:
@@ -329,13 +329,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
329
329
 
330
330
  from ..langchain.interfaces.llm_processor import add_documents
331
331
 
332
- self._log_tool_event(message=f"Starting the indexing... Parameters: {collection_suffix=}, {clean_index=}, {is_code}", tool_name="index_documents")
332
+ self._log_tool_event(message=f"Starting the indexing... Parameters: {index_name=}, {clean_index=}, {is_code}", tool_name="index_documents")
333
333
  # pre-process documents if needed (find duplicates, etc.)
334
334
  if clean_index:
335
335
  logger.info("Cleaning index before re-indexing all documents.")
336
336
  self._log_data("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
337
337
  try:
338
- self._clean_collection(collection_suffix)
338
+ self._clean_collection(index_name)
339
339
  self.vectoradapter.persist()
340
340
  self.vectoradapter.vacuum()
341
341
  self._log_data("Previous index has been removed",
@@ -349,7 +349,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
349
349
  message="Filter for duplicates",
350
350
  tool_name="index_documents")
351
351
  # remove duplicates based on metadata 'id' and 'updated_on' or 'commit_hash' fields
352
- documents = self._reduce_code_duplicates(documents, collection_suffix)
352
+ documents = self._reduce_code_duplicates(documents, index_name)
353
353
  self._log_tool_event(
354
354
  message="All the duplicates were filtered out. Proceeding with indexing.",
355
355
  tool_name="index_documents")
@@ -377,13 +377,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
377
377
  self._log_tool_event(message=f"Documents for indexing were processed. Total documents: {len(documents)}",
378
378
  tool_name="index_documents")
379
379
 
380
- # if collection_suffix is provided, add it to metadata of each document
381
- if collection_suffix:
380
+ # if index_name is provided, add it to metadata of each document
381
+ if index_name:
382
382
  for doc in documents:
383
383
  if not doc.metadata.get('collection'):
384
- doc.metadata['collection'] = collection_suffix
384
+ doc.metadata['collection'] = index_name
385
385
  else:
386
- doc.metadata['collection'] += f";{collection_suffix}"
386
+ doc.metadata['collection'] += f";{index_name}"
387
387
 
388
388
  total_docs = len(documents)
389
389
  documents_count = 0
@@ -414,7 +414,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
414
414
  return {"status": "error", "message": f"Error: {format_exc()}"}
415
415
  if _documents:
416
416
  add_documents(vectorstore=self.vectorstore, documents=_documents)
417
- return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
417
+ return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
418
+ else "No new documents to index."}
418
419
 
419
420
  def search_documents(self, query:str, doctype: str = 'code',
420
421
  filter:dict|str={}, cut_off: float=0.5,
@@ -216,13 +216,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
216
216
  return "No indexed collections"
217
217
  return collections
218
218
 
219
- def get_index_meta(self, collection_suffix: str):
220
- index_metas = self.vector_adapter.get_index_meta(self, collection_suffix)
219
+ def get_index_meta(self, index_name: str):
220
+ index_metas = self.vector_adapter.get_index_meta(self, index_name)
221
221
  if len(index_metas) > 1:
222
222
  raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
223
223
  return index_metas[0] if index_metas else None
224
224
 
225
- def _clean_collection(self, collection_suffix: str = ''):
225
+ def _clean_collection(self, index_name: str = ''):
226
226
  """
227
227
  Clean the vectorstore collection by deleting all indexed data.
228
228
  """
@@ -230,13 +230,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
230
230
  f"Cleaning collection '{self.dataset}'",
231
231
  tool_name="_clean_collection"
232
232
  )
233
- self.vector_adapter.clean_collection(self, collection_suffix)
233
+ self.vector_adapter.clean_collection(self, index_name)
234
234
  self._log_tool_event(
235
235
  f"Collection '{self.dataset}' has been cleaned. ",
236
236
  tool_name="_clean_collection"
237
237
  )
238
238
 
239
- def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True):
239
+ def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True):
240
240
  """ Index documents in the vectorstore.
241
241
 
242
242
  Args:
@@ -245,21 +245,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
245
245
  clean_index (bool): If True, clean the index before re-indexing all documents.
246
246
  """
247
247
  if clean_index:
248
- self._clean_index(collection_suffix)
248
+ self._clean_index(index_name)
249
249
 
250
- return self._save_index(list(documents), collection_suffix, progress_step)
250
+ return self._save_index(list(documents), index_name, progress_step)
251
251
 
252
- def _clean_index(self, collection_suffix: str):
252
+ def _clean_index(self, index_name: str):
253
253
  logger.info("Cleaning index before re-indexing all documents.")
254
254
  self._log_tool_event("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
255
255
  try:
256
- self._clean_collection(collection_suffix)
256
+ self._clean_collection(index_name)
257
257
  self._log_tool_event("Previous index has been removed",
258
258
  tool_name="index_documents")
259
259
  except Exception as e:
260
260
  logger.warning(f"Failed to clean index: {str(e)}. Continuing with re-indexing.")
261
261
 
262
- def _save_index(self, documents: list[Document], collection_suffix: Optional[str] = None, progress_step: int = 20):
262
+ def _save_index(self, documents: list[Document], index_name: Optional[str] = None, progress_step: int = 20):
263
263
  from ..langchain.interfaces.llm_processor import add_documents
264
264
  #
265
265
  for doc in documents:
@@ -268,13 +268,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
268
268
 
269
269
  logger.debug(f"Indexing documents: {documents}")
270
270
 
271
- # if collection_suffix is provided, add it to metadata of each document
272
- if collection_suffix:
271
+ # if index_name is provided, add it to metadata of each document
272
+ if index_name:
273
273
  for doc in documents:
274
274
  if not doc.metadata.get('collection'):
275
- doc.metadata['collection'] = collection_suffix
275
+ doc.metadata['collection'] = index_name
276
276
  else:
277
- doc.metadata['collection'] += f";{collection_suffix}"
277
+ doc.metadata['collection'] += f";{index_name}"
278
278
 
279
279
  total_docs = len(documents)
280
280
  documents_count = 0
@@ -308,7 +308,8 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
308
308
  return {"status": "error", "message": f"Error: {format_exc()}"}
309
309
  if _documents:
310
310
  add_documents(vectorstore=self.vectorstore, documents=_documents)
311
- return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
311
+ return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
312
+ else "no documents to index"}
312
313
 
313
314
  def search_documents(self, query:str, doctype: str = 'code',
314
315
  filter:dict|str={}, cut_off: float=0.5,
@@ -14,6 +14,7 @@ class IndexerKeywords(Enum):
14
14
  INDEX_META_TYPE = 'index_meta'
15
15
  INDEX_META_IN_PROGRESS = 'in_progress'
16
16
  INDEX_META_COMPLETED = 'completed'
17
+ INDEX_META_FAILED = 'failed'
17
18
 
18
19
  # This pattern matches characters that are NOT alphanumeric, underscores, or hyphens
19
20
  clean_string_pattern = re.compile(r'[^a-zA-Z0-9_.-]')