alita-sdk 0.3.379__py3-none-any.whl → 0.3.462__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (110) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent_executor.py +144 -0
  4. alita_sdk/cli/agent_loader.py +197 -0
  5. alita_sdk/cli/agent_ui.py +166 -0
  6. alita_sdk/cli/agents.py +1069 -0
  7. alita_sdk/cli/callbacks.py +576 -0
  8. alita_sdk/cli/cli.py +159 -0
  9. alita_sdk/cli/config.py +153 -0
  10. alita_sdk/cli/formatting.py +182 -0
  11. alita_sdk/cli/mcp_loader.py +315 -0
  12. alita_sdk/cli/toolkit.py +330 -0
  13. alita_sdk/cli/toolkit_loader.py +55 -0
  14. alita_sdk/cli/tools/__init__.py +9 -0
  15. alita_sdk/cli/tools/filesystem.py +905 -0
  16. alita_sdk/configurations/bitbucket.py +95 -0
  17. alita_sdk/configurations/confluence.py +96 -1
  18. alita_sdk/configurations/gitlab.py +79 -0
  19. alita_sdk/configurations/jira.py +103 -0
  20. alita_sdk/configurations/testrail.py +88 -0
  21. alita_sdk/configurations/xray.py +93 -0
  22. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  23. alita_sdk/configurations/zephyr_essential.py +75 -0
  24. alita_sdk/runtime/clients/client.py +47 -10
  25. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  26. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  27. alita_sdk/runtime/clients/sandbox_client.py +8 -0
  28. alita_sdk/runtime/langchain/assistant.py +37 -16
  29. alita_sdk/runtime/langchain/constants.py +6 -1
  30. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  31. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
  32. alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
  33. alita_sdk/runtime/langchain/langraph_agent.py +146 -31
  34. alita_sdk/runtime/langchain/utils.py +39 -7
  35. alita_sdk/runtime/models/mcp_models.py +61 -0
  36. alita_sdk/runtime/toolkits/__init__.py +24 -0
  37. alita_sdk/runtime/toolkits/application.py +8 -1
  38. alita_sdk/runtime/toolkits/artifact.py +5 -6
  39. alita_sdk/runtime/toolkits/mcp.py +895 -0
  40. alita_sdk/runtime/toolkits/tools.py +137 -56
  41. alita_sdk/runtime/tools/__init__.py +7 -2
  42. alita_sdk/runtime/tools/application.py +7 -0
  43. alita_sdk/runtime/tools/function.py +29 -25
  44. alita_sdk/runtime/tools/graph.py +10 -4
  45. alita_sdk/runtime/tools/image_generation.py +104 -8
  46. alita_sdk/runtime/tools/llm.py +204 -114
  47. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  48. alita_sdk/runtime/tools/mcp_remote_tool.py +166 -0
  49. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  50. alita_sdk/runtime/tools/sandbox.py +57 -43
  51. alita_sdk/runtime/tools/vectorstore.py +2 -1
  52. alita_sdk/runtime/tools/vectorstore_base.py +19 -3
  53. alita_sdk/runtime/utils/mcp_oauth.py +164 -0
  54. alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
  55. alita_sdk/runtime/utils/streamlit.py +34 -3
  56. alita_sdk/runtime/utils/toolkit_utils.py +14 -4
  57. alita_sdk/tools/__init__.py +46 -31
  58. alita_sdk/tools/ado/repos/__init__.py +1 -0
  59. alita_sdk/tools/ado/test_plan/__init__.py +1 -1
  60. alita_sdk/tools/ado/wiki/__init__.py +1 -5
  61. alita_sdk/tools/ado/work_item/__init__.py +1 -5
  62. alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
  63. alita_sdk/tools/base_indexer_toolkit.py +105 -43
  64. alita_sdk/tools/bitbucket/__init__.py +1 -0
  65. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  66. alita_sdk/tools/code/sonar/__init__.py +1 -1
  67. alita_sdk/tools/code_indexer_toolkit.py +13 -3
  68. alita_sdk/tools/confluence/__init__.py +2 -2
  69. alita_sdk/tools/confluence/api_wrapper.py +29 -7
  70. alita_sdk/tools/confluence/loader.py +10 -0
  71. alita_sdk/tools/github/__init__.py +2 -2
  72. alita_sdk/tools/gitlab/__init__.py +2 -1
  73. alita_sdk/tools/gitlab/api_wrapper.py +11 -7
  74. alita_sdk/tools/gitlab_org/__init__.py +1 -2
  75. alita_sdk/tools/google_places/__init__.py +2 -1
  76. alita_sdk/tools/jira/__init__.py +1 -0
  77. alita_sdk/tools/jira/api_wrapper.py +1 -1
  78. alita_sdk/tools/memory/__init__.py +1 -1
  79. alita_sdk/tools/openapi/__init__.py +10 -1
  80. alita_sdk/tools/pandas/__init__.py +1 -1
  81. alita_sdk/tools/postman/__init__.py +2 -1
  82. alita_sdk/tools/pptx/__init__.py +2 -2
  83. alita_sdk/tools/qtest/__init__.py +3 -3
  84. alita_sdk/tools/qtest/api_wrapper.py +1708 -76
  85. alita_sdk/tools/rally/__init__.py +1 -2
  86. alita_sdk/tools/report_portal/__init__.py +1 -0
  87. alita_sdk/tools/salesforce/__init__.py +1 -0
  88. alita_sdk/tools/servicenow/__init__.py +2 -3
  89. alita_sdk/tools/sharepoint/__init__.py +1 -0
  90. alita_sdk/tools/sharepoint/api_wrapper.py +125 -34
  91. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  92. alita_sdk/tools/sharepoint/utils.py +8 -2
  93. alita_sdk/tools/slack/__init__.py +1 -0
  94. alita_sdk/tools/sql/__init__.py +2 -1
  95. alita_sdk/tools/testio/__init__.py +1 -0
  96. alita_sdk/tools/testrail/__init__.py +1 -3
  97. alita_sdk/tools/utils/content_parser.py +27 -16
  98. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +18 -5
  99. alita_sdk/tools/xray/__init__.py +2 -1
  100. alita_sdk/tools/zephyr/__init__.py +2 -1
  101. alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
  102. alita_sdk/tools/zephyr_essential/__init__.py +1 -0
  103. alita_sdk/tools/zephyr_scale/__init__.py +1 -0
  104. alita_sdk/tools/zephyr_squad/__init__.py +1 -0
  105. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/METADATA +8 -2
  106. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/RECORD +110 -86
  107. alita_sdk-0.3.462.dist-info/entry_points.txt +2 -0
  108. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/WHEEL +0 -0
  109. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/licenses/LICENSE +0 -0
  110. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/top_level.txt +0 -0
@@ -23,11 +23,6 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
23
23
  AzureDevOpsWorkItemsToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
24
24
  m = create_model(
25
25
  name,
26
- name=(str, Field(description="Toolkit name",
27
- json_schema_extra={
28
- 'toolkit_name': True,
29
- 'max_toolkit_length': AzureDevOpsWorkItemsToolkit.toolkit_max_length})
30
- ),
31
26
  ado_configuration=(AdoConfiguration, Field(description="Ado Work Item configuration", json_schema_extra={'configuration_types': ['ado']})),
32
27
  limit=(Optional[int], Field(description="ADO plans limit used for limitation of the list with results", default=5)),
33
28
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
@@ -42,6 +37,7 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
42
37
  'metadata': {
43
38
  "label": "ADO boards",
44
39
  "icon_url": "ado-boards-icon.svg",
40
+ "max_length": AzureDevOpsWorkItemsToolkit.toolkit_max_length,
45
41
  "categories": ["project management"],
46
42
  "extra_categories": ["work item management", "issue tracking", "agile boards"],
47
43
  "sections": {
@@ -329,11 +329,14 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
329
329
  parsed_item.update(fields_data)
330
330
 
331
331
  # extract relations if any
332
- relations_data = work_item.relations
332
+ relations_data = None
333
+ if expand and str(expand).lower() in ("relations", "all"):
334
+ try:
335
+ relations_data = getattr(work_item, 'relations', None)
336
+ except KeyError:
337
+ relations_data = None
333
338
  if relations_data:
334
- parsed_item['relations'] = []
335
- for relation in relations_data:
336
- parsed_item['relations'].append(relation.as_dict())
339
+ parsed_item['relations'] = [relation.as_dict() for relation in relations_data]
337
340
 
338
341
  if parse_attachments:
339
342
  # describe images in work item fields if present
@@ -344,13 +347,19 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
344
347
  for img in images:
345
348
  src = img.get('src')
346
349
  if src:
347
- description = self.parse_attachment_by_url(src, image_description_prompt)
350
+ description = self.parse_attachment_by_url(src, image_description_prompt=image_description_prompt)
348
351
  img['image-description'] = description
349
352
  parsed_item[field_name] = str(soup)
350
353
  # parse attached documents if present
351
- if parsed_item['relations']:
352
- for attachment in parsed_item['relations']:
353
- attachment['content'] = self.parse_attachment_by_url(attachment['url'], attachment['attributes']['name'], image_description_prompt)
354
+ for relation in parsed_item.get('relations', []):
355
+ # Only process actual file attachments
356
+ if relation.get('rel') == 'AttachedFile':
357
+ file_name = relation.get('attributes', {}).get('name')
358
+ if file_name:
359
+ try:
360
+ relation['content'] = self.parse_attachment_by_url(relation['url'], file_name, image_description_prompt=image_description_prompt)
361
+ except Exception as att_e:
362
+ logger.warning(f"Failed to parse attachment {file_name}: {att_e}")
354
363
 
355
364
 
356
365
  return parsed_item
@@ -4,10 +4,10 @@ import logging
4
4
  import time
5
5
  from typing import Any, Optional, List, Dict, Generator
6
6
 
7
+ from langchain_core.callbacks import dispatch_custom_event
7
8
  from langchain_core.documents import Document
8
9
  from pydantic import create_model, Field, SecretStr
9
10
 
10
- from .utils import make_json_serializable
11
11
  from .utils.content_parser import file_extension_by_chunker, process_document_by_type
12
12
  from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
13
13
  from ..runtime.langchain.document_loaders.constants import loaders_allowed_to_override
@@ -16,6 +16,8 @@ from ..runtime.utils.utils import IndexerKeywords
16
16
 
17
17
  logger = logging.getLogger(__name__)
18
18
 
19
+ DEFAULT_CUT_OFF = 0.2
20
+
19
21
  # Base Vector Store Schema Models
20
22
  BaseIndexParams = create_model(
21
23
  "BaseIndexParams",
@@ -38,7 +40,7 @@ BaseSearchParams = create_model(
38
40
  default={},
39
41
  examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
40
42
  )),
41
- cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
43
+ cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
42
44
  search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
43
45
  full_text_search=(Optional[Dict[str, Any]], Field(
44
46
  description="Full text search parameters. Can be a dictionary with search options.",
@@ -68,7 +70,7 @@ BaseStepbackSearchParams = create_model(
68
70
  default={},
69
71
  examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
70
72
  )),
71
- cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
73
+ cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
72
74
  search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
73
75
  full_text_search=(Optional[Dict[str, Any]], Field(
74
76
  description="Full text search parameters. Can be a dictionary with search options.",
@@ -111,7 +113,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
111
113
  def __init__(self, **kwargs):
112
114
  conn = kwargs.get('connection_string', None)
113
115
  connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
114
- collection_name = kwargs.get('collection_name')
116
+ collection_name = kwargs.get('collection_schema')
115
117
 
116
118
  if 'vectorstore_type' not in kwargs:
117
119
  kwargs['vectorstore_type'] = 'PGVector'
@@ -176,11 +178,15 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
176
178
  f"Processing documents to collect dependencies and prepare them for indexing...")
177
179
  self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
178
180
  #
179
- self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, result["count"])
181
+ results_count = result["count"]
182
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
183
+ self._emit_index_event(index_name)
180
184
  #
181
- return {"status": "ok", "message": f"successfully indexed {result["count"]} documents"}
185
+ return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
186
+ else "no new documents to index"}
182
187
  except Exception as e:
183
188
  self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
189
+ self._emit_index_event(index_name, error=str(e))
184
190
  raise e
185
191
 
186
192
 
@@ -379,7 +385,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
379
385
  def search_index(self,
380
386
  query: str,
381
387
  index_name: str = "",
382
- filter: dict | str = {}, cut_off: float = 0.5,
388
+ filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
383
389
  search_top: int = 10, reranker: dict = {},
384
390
  full_text_search: Optional[Dict[str, Any]] = None,
385
391
  reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -410,7 +416,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
410
416
  query: str,
411
417
  messages: List[Dict[str, Any]] = [],
412
418
  index_name: str = "",
413
- filter: dict | str = {}, cut_off: float = 0.5,
419
+ filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
414
420
  search_top: int = 10, reranker: dict = {},
415
421
  full_text_search: Optional[Dict[str, Any]] = None,
416
422
  reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -435,7 +441,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
435
441
  query: str,
436
442
  messages: List[Dict[str, Any]] = [],
437
443
  index_name: str = "",
438
- filter: dict | str = {}, cut_off: float = 0.5,
444
+ filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
439
445
  search_top: int = 10, reranker: dict = {},
440
446
  full_text_search: Optional[Dict[str, Any]] = None,
441
447
  reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -457,37 +463,29 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
457
463
  )
458
464
 
459
465
  def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
460
- index_meta_raw = super().get_index_meta(index_name)
461
- from ..runtime.langchain.interfaces.llm_processor import add_documents
462
- created_on = time.time()
463
- metadata = {
464
- "collection": index_name,
465
- "type": IndexerKeywords.INDEX_META_TYPE.value,
466
- "indexed": 0,
467
- "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
468
- "index_configuration": index_configuration,
469
- "created_on": created_on,
470
- "updated_on": created_on,
471
- "history": "[]",
472
- }
473
- index_meta_ids = None
474
- #
475
- if index_meta_raw:
476
- history_raw = index_meta_raw.get("metadata", {}).get("history", "[]")
477
- if isinstance(history_raw, str) and history_raw.strip():
478
- try:
479
- history = json.loads(history_raw)
480
- except (json.JSONDecodeError, TypeError):
481
- history = []
482
- else:
483
- history = []
484
- new_history_item = {k: v for k, v in index_meta_raw.get("metadata", {}).items() if k != "history"}
485
- history.append(new_history_item)
486
- metadata["history"] = json.dumps(history)
487
- index_meta_ids = [index_meta_raw.get("id")]
488
- #
489
- index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
490
- add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
466
+ index_meta = super().get_index_meta(index_name)
467
+ if not index_meta:
468
+ self._log_tool_event(
469
+ f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
470
+ tool_name="index_data"
471
+ )
472
+ from ..runtime.langchain.interfaces.llm_processor import add_documents
473
+ created_on = time.time()
474
+ metadata = {
475
+ "collection": index_name,
476
+ "type": IndexerKeywords.INDEX_META_TYPE.value,
477
+ "indexed": 0,
478
+ "updated": 0,
479
+ "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
480
+ "index_configuration": index_configuration,
481
+ "created_on": created_on,
482
+ "updated_on": created_on,
483
+ "task_id": None,
484
+ "conversation_id": None,
485
+ }
486
+ metadata["history"] = json.dumps([metadata])
487
+ index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
488
+ add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
491
489
 
492
490
  def index_meta_update(self, index_name: str, state: str, result: int):
493
491
  index_meta_raw = super().get_index_meta(index_name)
@@ -495,12 +493,75 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
495
493
  #
496
494
  if index_meta_raw:
497
495
  metadata = copy.deepcopy(index_meta_raw.get("metadata", {}))
498
- metadata["indexed"] = result
496
+ metadata["indexed"] = self.get_indexed_count(index_name)
497
+ metadata["updated"] = result
499
498
  metadata["state"] = state
500
499
  metadata["updated_on"] = time.time()
500
+ #
501
+ history_raw = metadata.pop("history", "[]")
502
+ try:
503
+ history = json.loads(history_raw) if history_raw.strip() else []
504
+ # replace the last history item with updated metadata
505
+ if history and isinstance(history, list):
506
+ history[-1] = metadata
507
+ else:
508
+ history = [metadata]
509
+ except (json.JSONDecodeError, TypeError):
510
+ logger.warning(f"Failed to load index history: {history_raw}. Create new with only current item.")
511
+ history = [metadata]
512
+ #
513
+ metadata["history"] = json.dumps(history)
501
514
  index_meta_doc = Document(page_content=index_meta_raw.get("content", ""), metadata=metadata)
502
515
  add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=[index_meta_raw.get("id")])
503
516
 
517
+ def _emit_index_event(self, index_name: str, error: Optional[str] = None):
518
+ """
519
+ Emit custom event for index data operation.
520
+
521
+ Args:
522
+ index_name: The name of the index
523
+ error: Error message if the operation failed, None otherwise
524
+ """
525
+ index_meta = super().get_index_meta(index_name)
526
+
527
+ if not index_meta:
528
+ logger.warning(
529
+ f"No index_meta found for index '{index_name}'. "
530
+ "Cannot emit index event."
531
+ )
532
+ return
533
+
534
+ metadata = index_meta.get("metadata", {})
535
+
536
+ # Determine if this is a reindex operation
537
+ history_raw = metadata.get("history", "[]")
538
+ try:
539
+ history = json.loads(history_raw) if history_raw.strip() else []
540
+ is_reindex = len(history) > 1
541
+ except (json.JSONDecodeError, TypeError):
542
+ is_reindex = False
543
+
544
+ # Build event message
545
+ event_data = {
546
+ "id": index_meta.get("id"),
547
+ "index_name": index_name,
548
+ "state": metadata.get("state"),
549
+ "error": error,
550
+ "reindex": is_reindex,
551
+ "indexed": metadata.get("indexed", 0),
552
+ "updated": metadata.get("updated", 0),
553
+ }
554
+
555
+ # Emit the event
556
+ try:
557
+ dispatch_custom_event("index_data_status", event_data)
558
+ logger.debug(
559
+ f"Emitted index_data_status event for index "
560
+ f"'{index_name}': {event_data}"
561
+ )
562
+ except Exception as e:
563
+ logger.warning(f"Failed to emit index_data_status event: {e}")
564
+
504
565
  def get_available_tools(self):
505
566
  """
506
567
  Returns the standardized vector search tools (search operations only).
@@ -554,6 +615,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
554
615
  "mode": "list_collections",
555
616
  "ref": self.list_collections,
556
617
  "description": self.list_collections.__doc__,
557
- "args_schema": create_model("ListCollectionsParams") # No parameters
618
+ # No parameters
619
+ "args_schema": create_model("ListCollectionsParams")
558
620
  },
559
- ]
621
+ ]
@@ -61,6 +61,7 @@ class AlitaBitbucketToolkit(BaseToolkit):
61
61
  'metadata':
62
62
  {
63
63
  "label": "Bitbucket", "icon_url": "bitbucket-icon.svg",
64
+ "max_length": AlitaBitbucketToolkit.toolkit_max_length,
64
65
  "categories": ["code repositories"],
65
66
  "extra_categories": ["bitbucket", "git", "repository", "code", "version control"],
66
67
  }
@@ -6,7 +6,7 @@ from langchain_core.prompts import ChatPromptTemplate
6
6
  from langchain.text_splitter import TokenTextSplitter
7
7
 
8
8
  from typing import Optional, List
9
- from langchain_core.pydantic_v1 import BaseModel
9
+ from pydantic import BaseModel
10
10
  from ..utils import tiktoken_length
11
11
 
12
12
  logger = getLogger(__name__)
@@ -29,7 +29,7 @@ class SonarToolkit(BaseToolkit):
29
29
  SonarToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
30
30
  return create_model(
31
31
  name,
32
- sonar_project_name=(str, Field(description="Project name of the desired repository", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': SonarToolkit.toolkit_max_length})),
32
+ sonar_project_name=(str, Field(description="Project name of the desired repository")),
33
33
  sonar_configuration=(SonarConfiguration, Field(description="Sonar Configuration", json_schema_extra={'configuration_types': ['sonar']})),
34
34
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
35
35
  __config__=ConfigDict(json_schema_extra=
@@ -1,5 +1,6 @@
1
1
  import ast
2
2
  import fnmatch
3
+ import json
3
4
  import logging
4
5
  from typing import Optional, List, Generator
5
6
 
@@ -21,7 +22,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
21
22
  return self.vector_adapter.get_code_indexed_data(self, index_name)
22
23
 
23
24
  def key_fn(self, document: Document):
24
- return document.metadata.get('id')
25
+ return document.metadata.get("filename")
25
26
 
26
27
  def compare_fn(self, document: Document, idx_data):
27
28
  return (document.metadata.get('commit_hash') and
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
46
47
  )
47
48
 
48
49
  def _extend_data(self, documents: Generator[Document, None, None]):
49
- yield from parse_code_files_for_db(documents)
50
+ yield from documents
50
51
 
51
52
  def _index_tool_params(self):
52
53
  """Return the parameters for indexing data."""
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
117
118
  if not file_content:
118
119
  # empty file, skip
119
120
  continue
121
+ #
122
+ # ensure file content is a string
123
+ if isinstance(file_content, bytes):
124
+ file_content = file_content.decode("utf-8", errors="ignore")
125
+ elif isinstance(file_content, dict) and file.endswith('.json'):
126
+ file_content = json.dumps(file_content)
127
+ elif not isinstance(file_content, str):
128
+ file_content = str(file_content)
129
+ #
120
130
  # hash the file content to ensure uniqueness
121
131
  import hashlib
122
132
  file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
127
137
  self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
128
138
  self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
129
139
 
130
- return file_content_generator()
140
+ return parse_code_files_for_db(file_content_generator())
131
141
 
132
142
  def __handle_get_files(self, path: str, branch: str):
133
143
  """
@@ -67,8 +67,7 @@ class ConfluenceToolkit(BaseToolkit):
67
67
 
68
68
  model = create_model(
69
69
  name,
70
- space=(str, Field(description="Space", json_schema_extra={'toolkit_name': True,
71
- 'max_toolkit_length': ConfluenceToolkit.toolkit_max_length})),
70
+ space=(str, Field(description="Space")),
72
71
  cloud=(bool, Field(description="Hosting Option", json_schema_extra={'configuration': True})),
73
72
  limit=(int, Field(description="Pages limit per request", default=5)),
74
73
  labels=(Optional[str], Field(
@@ -95,6 +94,7 @@ class ConfluenceToolkit(BaseToolkit):
95
94
  'metadata': {
96
95
  "label": "Confluence",
97
96
  "icon_url": None,
97
+ "max_length": ConfluenceToolkit.toolkit_max_length,
98
98
  "categories": ["documentation"],
99
99
  "extra_categories": ["confluence", "wiki", "knowledge base", "documentation", "atlassian"]
100
100
  }
@@ -7,12 +7,14 @@ from json import JSONDecodeError
7
7
  from typing import Optional, List, Any, Dict, Callable, Generator, Literal
8
8
 
9
9
  import requests
10
+ from atlassian.errors import ApiError
10
11
  from langchain_community.document_loaders.confluence import ContentFormat
11
12
  from langchain_core.documents import Document
12
13
  from langchain_core.messages import HumanMessage
13
14
  from langchain_core.tools import ToolException
14
15
  from markdownify import markdownify
15
16
  from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
17
+ from requests import HTTPError
16
18
  from tenacity import retry, stop_after_attempt, wait_exponential, before_sleep_log
17
19
 
18
20
  from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
@@ -194,6 +196,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
194
196
  keep_markdown_format: Optional[bool] = True
195
197
  ocr_languages: Optional[str] = None
196
198
  keep_newlines: Optional[bool] = True
199
+ _errors: Optional[list[str]] = None
197
200
  _image_cache: ImageDescriptionCache = PrivateAttr(default_factory=ImageDescriptionCache)
198
201
 
199
202
  @model_validator(mode='before')
@@ -498,7 +501,9 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
498
501
  restrictions = self.client.get_all_restrictions_for_content(page["id"])
499
502
 
500
503
  return (
501
- page["status"] == "current"
504
+ (page["status"] == "current"
505
+ # allow user to see archived content if needed
506
+ or page["status"] == "archived")
502
507
  and not restrictions["read"]["restrictions"]["user"]["results"]
503
508
  and not restrictions["read"]["restrictions"]["group"]["results"]
504
509
  )
@@ -518,18 +523,35 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
518
523
  ),
519
524
  before_sleep=before_sleep_log(logger, logging.WARNING),
520
525
  )(self.client.get_page_by_id)
521
- page = get_page(
522
- page_id=page_id, expand=f"{self.content_format.value},version"
523
- )
524
- if not self.include_restricted_content and not self.is_public_page(page):
525
- continue
526
+ try:
527
+ page = get_page(
528
+ page_id=page_id, expand=f"{self.content_format.value},version"
529
+ )
530
+ except (ApiError, HTTPError) as e:
531
+ logger.error(f"Error fetching page with ID {page_id}: {e}")
532
+ page_content_temp = f"Confluence API Error: cannot fetch the page with ID {page_id}: {e}"
533
+ # store errors
534
+ if self._errors is None:
535
+ self._errors = []
536
+ self._errors.append(page_content_temp)
537
+ return Document(page_content=page_content_temp,
538
+ metadata={})
539
+ # TODO: update on toolkit advanced settings level as a separate feature
540
+ # if not self.include_restricted_content and not self.is_public_page(page):
541
+ # continue
526
542
  yield self.process_page(page, skip_images)
527
543
 
544
+ def _log_errors(self):
545
+ """ Log errors encountered during toolkit execution. """
546
+ if self._errors:
547
+ logger.info(f"Errors encountered during toolkit execution: {self._errors}")
548
+
528
549
  def read_page_by_id(self, page_id: str, skip_images: bool = False):
529
550
  """Reads a page by its id in the Confluence space. If id is not available, but there is a title - use get_page_id first."""
530
551
  result = list(self.get_pages_by_id([page_id], skip_images))
531
552
  if not result:
532
- "Page not found"
553
+ return f"Pages not found. Errors: {self._errors}" if self._errors \
554
+ else "Pages not found or you do not have access to them."
533
555
  return result[0].page_content
534
556
  # return self._strip_base64_images(result[0].page_content) if skip_images else result[0].page_content
535
557
 
@@ -3,6 +3,7 @@ from typing import Optional, List
3
3
  from logging import getLogger
4
4
 
5
5
  import requests
6
+ from langchain_core.documents import Document
6
7
 
7
8
  logger = getLogger(__name__)
8
9
  from PIL import Image
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
193
194
  else:
194
195
  return super().process_image(link, ocr_languages)
195
196
 
197
+ def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
198
+ content_format: ContentFormat, ocr_languages: Optional[str] = None,
199
+ keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
200
+ if not page.get("title"):
201
+ # if 'include_restricted_content' set to True, draft pages are loaded and can have no title
202
+ page["title"] = "Untitled"
203
+ return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
204
+ ocr_languages, keep_markdown_format, keep_newlines)
205
+
196
206
  # TODO review usage
197
207
  # def process_svg(
198
208
  # self,
@@ -53,6 +53,7 @@ class AlitaGitHubToolkit(BaseToolkit):
53
53
  'metadata': {
54
54
  "label": "GitHub",
55
55
  "icon_url": None,
56
+ "max_length": AlitaGitHubToolkit.toolkit_max_length,
56
57
  "categories": ["code repositories"],
57
58
  "extra_categories": ["github", "git", "repository", "code", "version control"],
58
59
  },
@@ -62,8 +63,7 @@ class AlitaGitHubToolkit(BaseToolkit):
62
63
  json_schema_extra={'configuration_types': ['github']})),
63
64
  pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector configuration", default=None,
64
65
  json_schema_extra={'configuration_types': ['pgvector']})),
65
- repository=(str, Field(description="Github repository", json_schema_extra={'toolkit_name': True,
66
- 'max_toolkit_length': AlitaGitHubToolkit.toolkit_max_length})),
66
+ repository=(str, Field(description="Github repository")),
67
67
  active_branch=(Optional[str], Field(description="Active branch", default="main")),
68
68
  base_branch=(Optional[str], Field(description="Github Base branch", default="main")),
69
69
  # embedder settings
@@ -43,7 +43,7 @@ class AlitaGitlabToolkit(BaseToolkit):
43
43
  AlitaGitlabToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
44
44
  return create_model(
45
45
  name,
46
- repository=(str, Field(description="GitLab repository", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': AlitaGitlabToolkit.toolkit_max_length})),
46
+ repository=(str, Field(description="GitLab repository")),
47
47
  gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration", json_schema_extra={'configuration_types': ['gitlab']})),
48
48
  branch=(str, Field(description="Main branch", default="main")),
49
49
  # indexer settings
@@ -57,6 +57,7 @@ class AlitaGitlabToolkit(BaseToolkit):
57
57
  'metadata': {
58
58
  "label": "GitLab",
59
59
  "icon_url": None,
60
+ "max_length": AlitaGitlabToolkit.toolkit_max_length,
60
61
  "categories": ["code repositories"],
61
62
  "extra_categories": ["gitlab", "git", "repository", "code", "version control"],
62
63
  }
@@ -117,7 +117,11 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
117
117
 
118
118
  @model_validator(mode='before')
119
119
  @classmethod
120
- def validate_toolkit(cls, values: Dict) -> Dict:
120
+ def validate_toolkit_before(cls, values: Dict) -> Dict:
121
+ return super().validate_toolkit(values)
122
+
123
+ @model_validator(mode='after')
124
+ def validate_toolkit(self):
121
125
  try:
122
126
  import gitlab
123
127
  except ImportError:
@@ -125,17 +129,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
125
129
  "python-gitlab is not installed. "
126
130
  "Please install it with `pip install python-gitlab`"
127
131
  )
128
- values['repository'] = cls._sanitize_url(values['repository'])
132
+ self.repository = self._sanitize_url(self.repository)
129
133
  g = gitlab.Gitlab(
130
- url=cls._sanitize_url(values['url']),
131
- private_token=values['private_token'],
134
+ url=self._sanitize_url(self.url),
135
+ private_token=self.private_token.get_secret_value(),
132
136
  keep_base_url=True,
133
137
  )
134
138
 
135
139
  g.auth()
136
- cls._git = g
137
- cls._active_branch = values.get('branch')
138
- return super().validate_toolkit(values)
140
+ self._git = g
141
+ self._active_branch = self.branch
142
+ return self
139
143
 
140
144
  @property
141
145
  def repo_instance(self):
@@ -30,8 +30,6 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
30
30
  AlitaGitlabSpaceToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
31
31
  return create_model(
32
32
  name,
33
- name=(str, Field(description="Toolkit name", json_schema_extra={'toolkit_name': True,
34
- 'max_toolkit_length': AlitaGitlabSpaceToolkit.toolkit_max_length})),
35
33
  gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration",
36
34
  json_schema_extra={
37
35
  'configuration_types': ['gitlab']})),
@@ -46,6 +44,7 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
46
44
  'metadata': {
47
45
  "label": "GitLab Org",
48
46
  "icon_url": None,
47
+ "max_length": AlitaGitlabSpaceToolkit.toolkit_max_length,
49
48
  "categories": ["code repositories"],
50
49
  "extra_categories": ["gitlab", "git", "repository", "code", "version control"],
51
50
  }
@@ -30,7 +30,7 @@ class GooglePlacesToolkit(BaseToolkit):
30
30
  GooglePlacesToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
31
31
  return create_model(
32
32
  name,
33
- results_count=(Optional[int], Field(description="Results number to show", default=None, json_schema_extra={'toolkit_name': True, 'max_toolkit_length': GooglePlacesToolkit.toolkit_max_length})),
33
+ results_count=(Optional[int], Field(description="Results number to show", default=None)),
34
34
  google_places_configuration=(GooglePlacesConfiguration, Field(description="Google Places Configuration", json_schema_extra={'configuration_types': ['google_places']})),
35
35
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
36
36
  __config__=ConfigDict(json_schema_extra=
@@ -38,6 +38,7 @@ class GooglePlacesToolkit(BaseToolkit):
38
38
  'metadata':
39
39
  {
40
40
  "label": "Google Places", "icon_url": "gplaces-icon.svg",
41
+ "max_length": GooglePlacesToolkit.toolkit_max_length,
41
42
  "categories": ["other"],
42
43
  "extra_categories": ["google", "places", "maps", "location",
43
44
  "geolocation"],
@@ -89,6 +89,7 @@ class JiraToolkit(BaseToolkit):
89
89
  'metadata': {
90
90
  "label": "Jira",
91
91
  "icon_url": "jira-icon.svg",
92
+ "max_length": JiraToolkit.toolkit_max_length,
92
93
  "categories": ["project management"],
93
94
  "extra_categories": ["jira", "atlassian", "issue tracking", "project management", "task management"],
94
95
  }
@@ -563,7 +563,7 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
563
563
  Use the appropriate issue link type (e.g., "Test", "Relates", "Blocks").
564
564
  If we use "Test" linktype, the test is inward issue, the story/other issue is outward issue.."""
565
565
 
566
- comment = "This test is linked to the story."
566
+ comment = f"Issue {inward_issue_key} was linked to {outward_issue_key}."
567
567
  comment_body = {"content": [{"content": [{"text": comment,"type": "text"}],"type": "paragraph"}],"type": "doc","version": 1} if self.api_version == "3" else comment
568
568
  link_data = {
569
569
  "type": {"name": f"{linktype}"},
@@ -61,7 +61,7 @@ class MemoryToolkit(BaseToolkit):
61
61
 
62
62
  return create_model(
63
63
  'memory',
64
- namespace=(str, Field(description="Memory namespace", json_schema_extra={'toolkit_name': True})),
64
+ namespace=(str, Field(description="Memory namespace")),
65
65
  pgvector_configuration=(PgVectorConfiguration, Field(description="PgVector Configuration",
66
66
  json_schema_extra={
67
67
  'configuration_types': ['pgvector']})),