alita-sdk 0.3.373__py3-none-any.whl → 0.3.375__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

@@ -207,9 +207,9 @@ class VectorStoreWrapper(BaseToolApiWrapper):
207
207
  tool_name="_remove_collection"
208
208
  )
209
209
 
210
- def _get_indexed_ids(self, collection_suffix: Optional[str] = '') -> List[str]:
210
+ def _get_indexed_ids(self, index_name: Optional[str] = '') -> List[str]:
211
211
  """Get all indexed document IDs from vectorstore"""
212
- return self.vector_adapter.get_indexed_ids(self, collection_suffix)
212
+ return self.vector_adapter.get_indexed_ids(self, index_name)
213
213
 
214
214
  def list_collections(self) -> Any:
215
215
  """List all collections in the vectorstore.
@@ -233,7 +233,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
233
233
  return {"collections": [], "message": "No indexed collections"}
234
234
  return cols
235
235
 
236
- def _clean_collection(self, collection_suffix: str = ''):
236
+ def _clean_collection(self, index_name: str = ''):
237
237
  """
238
238
  Clean the vectorstore collection by deleting all indexed data.
239
239
  """
@@ -241,15 +241,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
241
241
  f"Cleaning collection '{self.dataset}'",
242
242
  tool_name="_clean_collection"
243
243
  )
244
- self.vector_adapter.clean_collection(self, collection_suffix)
244
+ self.vector_adapter.clean_collection(self, index_name)
245
245
  self._log_data(
246
246
  f"Collection '{self.dataset}' has been cleaned. ",
247
247
  tool_name="_clean_collection"
248
248
  )
249
249
 
250
- def _get_code_indexed_data(self, collection_suffix: str) -> Dict[str, Dict[str, Any]]:
250
+ def _get_code_indexed_data(self, index_name: str) -> Dict[str, Dict[str, Any]]:
251
251
  """ Get all indexed data from vectorstore for code content """
252
- return self.vector_adapter.get_code_indexed_data(self, collection_suffix)
252
+ return self.vector_adapter.get_code_indexed_data(self, index_name)
253
253
 
254
254
  def _add_to_collection(self, entry_id, new_collection_value):
255
255
  """Add a new collection name to the `collection` key in the `metadata` column."""
@@ -258,7 +258,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
258
258
  def _reduce_duplicates(
259
259
  self,
260
260
  documents: Generator[Any, None, None],
261
- collection_suffix: str,
261
+ index_name: str,
262
262
  get_indexed_data: Callable,
263
263
  key_fn: Callable,
264
264
  compare_fn: Callable,
@@ -267,7 +267,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
267
267
  ) -> List[Any]:
268
268
  """Generic duplicate reduction logic for documents."""
269
269
  self._log_data(log_msg, tool_name="index_documents")
270
- indexed_data = get_indexed_data(collection_suffix)
270
+ indexed_data = get_indexed_data(index_name)
271
271
  indexed_keys = set(indexed_data.keys())
272
272
  if not indexed_keys:
273
273
  self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
@@ -279,14 +279,14 @@ class VectorStoreWrapper(BaseToolApiWrapper):
279
279
  for document in documents:
280
280
  key = key_fn(document)
281
281
  key = key if isinstance(key, str) else str(key)
282
- if key in indexed_keys and collection_suffix == indexed_data[key]['metadata'].get('collection'):
282
+ if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
283
283
  if compare_fn(document, indexed_data[key]):
284
284
  # Disabled addition of new collection to already indexed documents
285
285
  # # check metadata.collection and update if needed
286
286
  # for update_collection_id in remove_ids_fn(indexed_data, key):
287
287
  # self._add_to_collection(
288
288
  # update_collection_id,
289
- # collection_suffix
289
+ # index_name
290
290
  # )
291
291
  continue
292
292
  final_docs.append(document)
@@ -303,10 +303,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
303
303
 
304
304
  return final_docs
305
305
 
306
- def _reduce_code_duplicates(self, documents: Generator[Any, None, None], collection_suffix: str) -> List[Any]:
306
+ def _reduce_code_duplicates(self, documents: Generator[Any, None, None], index_name: str) -> List[Any]:
307
307
  return self._reduce_duplicates(
308
308
  documents,
309
- collection_suffix,
309
+ index_name,
310
310
  self._get_code_indexed_data,
311
311
  lambda doc: doc.metadata.get('filename'),
312
312
  lambda doc, idx: (
@@ -318,7 +318,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
318
318
  log_msg="Verification of code documents to index started"
319
319
  )
320
320
 
321
- def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
321
+ def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
322
322
  """ Index documents in the vectorstore.
323
323
 
324
324
  Args:
@@ -329,13 +329,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
329
329
 
330
330
  from ..langchain.interfaces.llm_processor import add_documents
331
331
 
332
- self._log_tool_event(message=f"Starting the indexing... Parameters: {collection_suffix=}, {clean_index=}, {is_code}", tool_name="index_documents")
332
+ self._log_tool_event(message=f"Starting the indexing... Parameters: {index_name=}, {clean_index=}, {is_code}", tool_name="index_documents")
333
333
  # pre-process documents if needed (find duplicates, etc.)
334
334
  if clean_index:
335
335
  logger.info("Cleaning index before re-indexing all documents.")
336
336
  self._log_data("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
337
337
  try:
338
- self._clean_collection(collection_suffix)
338
+ self._clean_collection(index_name)
339
339
  self.vectoradapter.persist()
340
340
  self.vectoradapter.vacuum()
341
341
  self._log_data("Previous index has been removed",
@@ -349,7 +349,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
349
349
  message="Filter for duplicates",
350
350
  tool_name="index_documents")
351
351
  # remove duplicates based on metadata 'id' and 'updated_on' or 'commit_hash' fields
352
- documents = self._reduce_code_duplicates(documents, collection_suffix)
352
+ documents = self._reduce_code_duplicates(documents, index_name)
353
353
  self._log_tool_event(
354
354
  message="All the duplicates were filtered out. Proceeding with indexing.",
355
355
  tool_name="index_documents")
@@ -377,13 +377,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
377
377
  self._log_tool_event(message=f"Documents for indexing were processed. Total documents: {len(documents)}",
378
378
  tool_name="index_documents")
379
379
 
380
- # if collection_suffix is provided, add it to metadata of each document
381
- if collection_suffix:
380
+ # if index_name is provided, add it to metadata of each document
381
+ if index_name:
382
382
  for doc in documents:
383
383
  if not doc.metadata.get('collection'):
384
- doc.metadata['collection'] = collection_suffix
384
+ doc.metadata['collection'] = index_name
385
385
  else:
386
- doc.metadata['collection'] += f";{collection_suffix}"
386
+ doc.metadata['collection'] += f";{index_name}"
387
387
 
388
388
  total_docs = len(documents)
389
389
  documents_count = 0
@@ -216,13 +216,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
216
216
  return "No indexed collections"
217
217
  return collections
218
218
 
219
- def get_index_meta(self, collection_suffix: str):
220
- index_metas = self.vector_adapter.get_index_meta(self, collection_suffix)
219
+ def get_index_meta(self, index_name: str):
220
+ index_metas = self.vector_adapter.get_index_meta(self, index_name)
221
221
  if len(index_metas) > 1:
222
222
  raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
223
223
  return index_metas[0] if index_metas else None
224
224
 
225
- def _clean_collection(self, collection_suffix: str = ''):
225
+ def _clean_collection(self, index_name: str = ''):
226
226
  """
227
227
  Clean the vectorstore collection by deleting all indexed data.
228
228
  """
@@ -230,13 +230,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
230
230
  f"Cleaning collection '{self.dataset}'",
231
231
  tool_name="_clean_collection"
232
232
  )
233
- self.vector_adapter.clean_collection(self, collection_suffix)
233
+ self.vector_adapter.clean_collection(self, index_name)
234
234
  self._log_tool_event(
235
235
  f"Collection '{self.dataset}' has been cleaned. ",
236
236
  tool_name="_clean_collection"
237
237
  )
238
238
 
239
- def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True):
239
+ def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True):
240
240
  """ Index documents in the vectorstore.
241
241
 
242
242
  Args:
@@ -245,21 +245,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
245
245
  clean_index (bool): If True, clean the index before re-indexing all documents.
246
246
  """
247
247
  if clean_index:
248
- self._clean_index(collection_suffix)
248
+ self._clean_index(index_name)
249
249
 
250
- return self._save_index(list(documents), collection_suffix, progress_step)
250
+ return self._save_index(list(documents), index_name, progress_step)
251
251
 
252
- def _clean_index(self, collection_suffix: str):
252
+ def _clean_index(self, index_name: str):
253
253
  logger.info("Cleaning index before re-indexing all documents.")
254
254
  self._log_tool_event("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
255
255
  try:
256
- self._clean_collection(collection_suffix)
256
+ self._clean_collection(index_name)
257
257
  self._log_tool_event("Previous index has been removed",
258
258
  tool_name="index_documents")
259
259
  except Exception as e:
260
260
  logger.warning(f"Failed to clean index: {str(e)}. Continuing with re-indexing.")
261
261
 
262
- def _save_index(self, documents: list[Document], collection_suffix: Optional[str] = None, progress_step: int = 20):
262
+ def _save_index(self, documents: list[Document], index_name: Optional[str] = None, progress_step: int = 20):
263
263
  from ..langchain.interfaces.llm_processor import add_documents
264
264
  #
265
265
  for doc in documents:
@@ -268,13 +268,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
268
268
 
269
269
  logger.debug(f"Indexing documents: {documents}")
270
270
 
271
- # if collection_suffix is provided, add it to metadata of each document
272
- if collection_suffix:
271
+ # if index_name is provided, add it to metadata of each document
272
+ if index_name:
273
273
  for doc in documents:
274
274
  if not doc.metadata.get('collection'):
275
- doc.metadata['collection'] = collection_suffix
275
+ doc.metadata['collection'] = index_name
276
276
  else:
277
- doc.metadata['collection'] += f";{collection_suffix}"
277
+ doc.metadata['collection'] += f";{index_name}"
278
278
 
279
279
  total_docs = len(documents)
280
280
  documents_count = 0
@@ -19,19 +19,19 @@ logger = logging.getLogger(__name__)
19
19
  # Base Vector Store Schema Models
20
20
  BaseIndexParams = create_model(
21
21
  "BaseIndexParams",
22
- collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
22
+ index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
23
23
  )
24
24
 
25
25
  RemoveIndexParams = create_model(
26
26
  "RemoveIndexParams",
27
- collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
27
+ index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
28
28
  )
29
29
 
30
30
  BaseSearchParams = create_model(
31
31
  "BaseSearchParams",
32
32
  query=(str, Field(description="Query text to search in the index")),
33
- collection_suffix=(Optional[str], Field(
34
- description="Optional suffix for collection name (max 7 characters). Leave empty to search across all datasets",
33
+ index_name=(Optional[str], Field(
34
+ description="Optional index name (max 7 characters). Leave empty to search across all datasets",
35
35
  default="", max_length=7)),
36
36
  filter=(Optional[dict | str], Field(
37
37
  description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
@@ -61,7 +61,7 @@ BaseSearchParams = create_model(
61
61
  BaseStepbackSearchParams = create_model(
62
62
  "BaseStepbackSearchParams",
63
63
  query=(str, Field(description="Query text to search in the index")),
64
- collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
64
+ index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
65
65
  messages=(Optional[List], Field(description="Chat messages for stepback search context", default=[])),
66
66
  filter=(Optional[dict | str], Field(
67
67
  description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
@@ -151,18 +151,18 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
151
151
  yield from ()
152
152
 
153
153
  def index_data(self, **kwargs):
154
- collection_suffix = kwargs.get("collection_suffix")
154
+ index_name = kwargs.get("index_name")
155
155
  progress_step = kwargs.get("progress_step")
156
156
  clean_index = kwargs.get("clean_index")
157
157
  chunking_tool = kwargs.get("chunking_tool")
158
158
  chunking_config = kwargs.get("chunking_config")
159
159
  #
160
160
  if clean_index:
161
- self._clean_index(collection_suffix)
161
+ self._clean_index(index_name)
162
162
  #
163
- self.index_meta_init(collection_suffix, kwargs)
163
+ self.index_meta_init(index_name, kwargs)
164
164
  #
165
- self._log_tool_event(f"Indexing data into collection with suffix '{collection_suffix}'. It can take some time...")
165
+ self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
166
166
  self._log_tool_event(f"Loading the documents to index...{kwargs}")
167
167
  documents = self._base_loader(**kwargs)
168
168
  documents = list(documents) # consume/exhaust generator to count items
@@ -170,16 +170,16 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
170
170
  documents = (doc for doc in documents)
171
171
  self._log_tool_event(f"Base documents were pre-loaded. "
172
172
  f"Search for possible document duplicates and remove them from the indexing list...")
173
- documents = self._reduce_duplicates(documents, collection_suffix)
173
+ documents = self._reduce_duplicates(documents, index_name)
174
174
  self._log_tool_event(f"Duplicates were removed. "
175
175
  f"Processing documents to collect dependencies and prepare them for indexing...")
176
- result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, collection_suffix=collection_suffix, progress_step=progress_step)
176
+ result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, progress_step=progress_step)
177
177
  #
178
- self.index_meta_update(collection_suffix, IndexerKeywords.INDEX_META_COMPLETED.value, result)
178
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, result)
179
179
  #
180
180
  return {"status": "ok", "message": f"successfully indexed {result} documents"}
181
181
 
182
- def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, collection_suffix: Optional[str] = None, progress_step: int = 20):
182
+ def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, index_name: Optional[str] = None, progress_step: int = 20):
183
183
  self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
184
184
  from ..runtime.langchain.interfaces.llm_processor import add_documents
185
185
  #
@@ -211,12 +211,12 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
211
211
  if 'id' not in doc.metadata or 'updated_on' not in doc.metadata:
212
212
  logger.warning(f"Document is missing required metadata field 'id' or 'updated_on': {doc.metadata}")
213
213
  #
214
- # if collection_suffix is provided, add it to metadata of each document
215
- if collection_suffix:
214
+ # if index_name is provided, add it to metadata of each document
215
+ if index_name:
216
216
  if not doc.metadata.get('collection'):
217
- doc.metadata['collection'] = collection_suffix
217
+ doc.metadata['collection'] = index_name
218
218
  else:
219
- doc.metadata['collection'] += f";{collection_suffix}"
219
+ doc.metadata['collection'] += f";{index_name}"
220
220
  #
221
221
  try:
222
222
  pg_vector_add_docs_chunk.append(doc)
@@ -295,12 +295,12 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
295
295
  def _reduce_duplicates(
296
296
  self,
297
297
  documents: Generator[Any, None, None],
298
- collection_suffix: str,
298
+ index_name: str,
299
299
  log_msg: str = "Verification of documents to index started"
300
300
  ) -> Generator[Document, None, None]:
301
301
  """Generic duplicate reduction logic for documents."""
302
302
  self._log_tool_event(log_msg, tool_name="index_documents")
303
- indexed_data = self._get_indexed_data(collection_suffix)
303
+ indexed_data = self._get_indexed_data(index_name)
304
304
  indexed_keys = set(indexed_data.keys())
305
305
  if not indexed_keys:
306
306
  self._log_tool_event("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
@@ -312,7 +312,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
312
312
  for document in documents:
313
313
  key = self.key_fn(document)
314
314
  key = key if isinstance(key, str) else str(key)
315
- if key in indexed_keys and collection_suffix == indexed_data[key]['metadata'].get('collection'):
315
+ if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
316
316
  if self.compare_fn(document, indexed_data[key]):
317
317
  continue
318
318
  yield document
@@ -327,7 +327,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
327
327
  )
328
328
  self.vectorstore.delete(ids=list(docs_to_remove))
329
329
 
330
- def _get_indexed_data(self, collection_suffix: str):
330
+ def _get_indexed_data(self, index_name: str):
331
331
  raise NotImplementedError("Subclasses must implement this method")
332
332
 
333
333
  def key_fn(self, document: Document):
@@ -339,20 +339,20 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
339
339
  def remove_ids_fn(self, idx_data, key: str):
340
340
  raise NotImplementedError("Subclasses must implement this method")
341
341
 
342
- def remove_index(self, collection_suffix: str = ""):
342
+ def remove_index(self, index_name: str = ""):
343
343
  """Cleans the indexed data in the collection."""
344
- super()._clean_collection(collection_suffix=collection_suffix)
345
- return (f"Collection '{collection_suffix}' has been removed from the vector store.\n"
346
- f"Available collections: {self.list_collections()}") if collection_suffix \
344
+ super()._clean_collection(index_name=index_name)
345
+ return (f"Collection '{index_name}' has been removed from the vector store.\n"
346
+ f"Available collections: {self.list_collections()}") if index_name \
347
347
  else "All collections have been removed from the vector store."
348
348
 
349
- def _build_collection_filter(self, filter: dict | str, collection_suffix: str = "") -> dict:
349
+ def _build_collection_filter(self, filter: dict | str, index_name: str = "") -> dict:
350
350
  """Builds a filter for the collection based on the provided suffix."""
351
351
 
352
352
  filter = filter if isinstance(filter, dict) else json.loads(filter)
353
- if collection_suffix:
353
+ if index_name:
354
354
  filter.update({"collection": {
355
- "$eq": collection_suffix.strip()
355
+ "$eq": index_name.strip()
356
356
  }})
357
357
 
358
358
  if filter:
@@ -375,7 +375,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
375
375
 
376
376
  def search_index(self,
377
377
  query: str,
378
- collection_suffix: str = "",
378
+ index_name: str = "",
379
379
  filter: dict | str = {}, cut_off: float = 0.5,
380
380
  search_top: int = 10, reranker: dict = {},
381
381
  full_text_search: Optional[Dict[str, Any]] = None,
@@ -383,13 +383,13 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
383
383
  extended_search: Optional[List[str]] = None,
384
384
  **kwargs):
385
385
  """ Searches indexed documents in the vector store."""
386
- # build filter on top of collection_suffix
386
+ # build filter on top of index_name
387
387
 
388
388
  available_collections = super().list_collections()
389
- if collection_suffix and collection_suffix not in available_collections:
390
- return f"Collection '{collection_suffix}' not found. Available collections: {available_collections}"
389
+ if index_name and index_name not in available_collections:
390
+ return f"Collection '{index_name}' not found. Available collections: {available_collections}"
391
391
 
392
- filter = self._build_collection_filter(filter, collection_suffix)
392
+ filter = self._build_collection_filter(filter, index_name)
393
393
  found_docs = super().search_documents(
394
394
  query,
395
395
  doctype=self.doctype,
@@ -406,7 +406,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
406
406
  def stepback_search_index(self,
407
407
  query: str,
408
408
  messages: List[Dict[str, Any]] = [],
409
- collection_suffix: str = "",
409
+ index_name: str = "",
410
410
  filter: dict | str = {}, cut_off: float = 0.5,
411
411
  search_top: int = 10, reranker: dict = {},
412
412
  full_text_search: Optional[Dict[str, Any]] = None,
@@ -414,7 +414,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
414
414
  extended_search: Optional[List[str]] = None,
415
415
  **kwargs):
416
416
  """ Searches indexed documents in the vector store."""
417
- filter = self._build_collection_filter(filter, collection_suffix)
417
+ filter = self._build_collection_filter(filter, index_name)
418
418
  found_docs = super().stepback_search(
419
419
  query,
420
420
  messages,
@@ -431,7 +431,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
431
431
  def stepback_summary_index(self,
432
432
  query: str,
433
433
  messages: List[Dict[str, Any]] = [],
434
- collection_suffix: str = "",
434
+ index_name: str = "",
435
435
  filter: dict | str = {}, cut_off: float = 0.5,
436
436
  search_top: int = 10, reranker: dict = {},
437
437
  full_text_search: Optional[Dict[str, Any]] = None,
@@ -440,7 +440,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
440
440
  **kwargs):
441
441
  """ Generates a summary of indexed documents using stepback technique."""
442
442
 
443
- filter = self._build_collection_filter(filter, collection_suffix)
443
+ filter = self._build_collection_filter(filter, index_name)
444
444
  return super().stepback_summary(
445
445
  query,
446
446
  messages,
@@ -453,12 +453,12 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
453
453
  extended_search=extended_search
454
454
  )
455
455
 
456
- def index_meta_init(self, collection_suffix: str, index_configuration: dict[str, Any]):
457
- index_meta_raw = super().get_index_meta(collection_suffix)
456
+ def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
457
+ index_meta_raw = super().get_index_meta(index_name)
458
458
  from ..runtime.langchain.interfaces.llm_processor import add_documents
459
459
  created_on = time.time()
460
460
  metadata = {
461
- "collection": collection_suffix,
461
+ "collection": index_name,
462
462
  "type": IndexerKeywords.INDEX_META_TYPE.value,
463
463
  "indexed": 0,
464
464
  "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
@@ -483,11 +483,11 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
483
483
  metadata["history"] = json.dumps(history)
484
484
  index_meta_ids = [index_meta_raw.get("id")]
485
485
  #
486
- index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata=metadata)
486
+ index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
487
487
  add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
488
488
 
489
- def index_meta_update(self, collection_suffix: str, state: str, result: int):
490
- index_meta_raw = super().get_index_meta(collection_suffix)
489
+ def index_meta_update(self, index_name: str, state: str, result: int):
490
+ index_meta_raw = super().get_index_meta(index_name)
491
491
  from ..runtime.langchain.interfaces.llm_processor import add_documents
492
492
  #
493
493
  if index_meta_raw:
@@ -14,11 +14,11 @@ logger = logging.getLogger(__name__)
14
14
 
15
15
 
16
16
  class CodeIndexerToolkit(BaseIndexerToolkit):
17
- def _get_indexed_data(self, collection_suffix: str):
17
+ def _get_indexed_data(self, index_name: str):
18
18
  if not self.vector_adapter:
19
19
  raise ToolException("Vector adapter is not initialized. "
20
20
  "Check your configuration: embedding_model and vectorstore_type.")
21
- return self.vector_adapter.get_code_indexed_data(self, collection_suffix)
21
+ return self.vector_adapter.get_code_indexed_data(self, index_name)
22
22
 
23
23
  def key_fn(self, document: Document):
24
24
  return document.metadata.get('id')
@@ -1674,7 +1674,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
1674
1674
  description="List of file extensions to skip when processing attachments: i.e. ['*.png', '*.jpg']",
1675
1675
  default=[])),
1676
1676
  "include_comments": (Optional[bool], Field(description="Include comments.", default=False)),
1677
- "include_labels": (Optional[bool], Field(description="Include labels.", default=True)),
1677
+ "include_labels": (Optional[bool], Field(description="Include labels.", default=False)),
1678
1678
  "ocr_languages": (Optional[str], Field(description="OCR languages for processing attachments.", default='eng')),
1679
1679
  "keep_markdown_format": (Optional[bool], Field(description="Keep the markdown format.", default=True)),
1680
1680
  "keep_newlines": (Optional[bool], Field(description="Keep newlines in the content.", default=True)),