alita-sdk 0.3.365__py3-none-any.whl → 0.3.462__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (118) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent_executor.py +144 -0
  4. alita_sdk/cli/agent_loader.py +197 -0
  5. alita_sdk/cli/agent_ui.py +166 -0
  6. alita_sdk/cli/agents.py +1069 -0
  7. alita_sdk/cli/callbacks.py +576 -0
  8. alita_sdk/cli/cli.py +159 -0
  9. alita_sdk/cli/config.py +153 -0
  10. alita_sdk/cli/formatting.py +182 -0
  11. alita_sdk/cli/mcp_loader.py +315 -0
  12. alita_sdk/cli/toolkit.py +330 -0
  13. alita_sdk/cli/toolkit_loader.py +55 -0
  14. alita_sdk/cli/tools/__init__.py +9 -0
  15. alita_sdk/cli/tools/filesystem.py +905 -0
  16. alita_sdk/configurations/bitbucket.py +95 -0
  17. alita_sdk/configurations/confluence.py +96 -1
  18. alita_sdk/configurations/gitlab.py +79 -0
  19. alita_sdk/configurations/jira.py +103 -0
  20. alita_sdk/configurations/testrail.py +88 -0
  21. alita_sdk/configurations/xray.py +93 -0
  22. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  23. alita_sdk/configurations/zephyr_essential.py +75 -0
  24. alita_sdk/runtime/clients/artifact.py +1 -1
  25. alita_sdk/runtime/clients/client.py +47 -10
  26. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  27. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  28. alita_sdk/runtime/clients/sandbox_client.py +373 -0
  29. alita_sdk/runtime/langchain/assistant.py +70 -41
  30. alita_sdk/runtime/langchain/constants.py +6 -1
  31. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  32. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
  33. alita_sdk/runtime/langchain/document_loaders/constants.py +73 -100
  34. alita_sdk/runtime/langchain/langraph_agent.py +164 -38
  35. alita_sdk/runtime/langchain/utils.py +43 -7
  36. alita_sdk/runtime/models/mcp_models.py +61 -0
  37. alita_sdk/runtime/toolkits/__init__.py +24 -0
  38. alita_sdk/runtime/toolkits/application.py +8 -1
  39. alita_sdk/runtime/toolkits/artifact.py +5 -6
  40. alita_sdk/runtime/toolkits/mcp.py +895 -0
  41. alita_sdk/runtime/toolkits/tools.py +140 -50
  42. alita_sdk/runtime/tools/__init__.py +7 -2
  43. alita_sdk/runtime/tools/application.py +7 -0
  44. alita_sdk/runtime/tools/function.py +94 -5
  45. alita_sdk/runtime/tools/graph.py +10 -4
  46. alita_sdk/runtime/tools/image_generation.py +104 -8
  47. alita_sdk/runtime/tools/llm.py +204 -114
  48. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  49. alita_sdk/runtime/tools/mcp_remote_tool.py +166 -0
  50. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  51. alita_sdk/runtime/tools/sandbox.py +180 -79
  52. alita_sdk/runtime/tools/vectorstore.py +22 -21
  53. alita_sdk/runtime/tools/vectorstore_base.py +79 -26
  54. alita_sdk/runtime/utils/mcp_oauth.py +164 -0
  55. alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
  56. alita_sdk/runtime/utils/streamlit.py +34 -3
  57. alita_sdk/runtime/utils/toolkit_utils.py +14 -4
  58. alita_sdk/runtime/utils/utils.py +1 -0
  59. alita_sdk/tools/__init__.py +48 -31
  60. alita_sdk/tools/ado/repos/__init__.py +1 -0
  61. alita_sdk/tools/ado/test_plan/__init__.py +1 -1
  62. alita_sdk/tools/ado/wiki/__init__.py +1 -5
  63. alita_sdk/tools/ado/work_item/__init__.py +1 -5
  64. alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
  65. alita_sdk/tools/base_indexer_toolkit.py +194 -112
  66. alita_sdk/tools/bitbucket/__init__.py +1 -0
  67. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  68. alita_sdk/tools/code/sonar/__init__.py +1 -1
  69. alita_sdk/tools/code_indexer_toolkit.py +15 -5
  70. alita_sdk/tools/confluence/__init__.py +2 -2
  71. alita_sdk/tools/confluence/api_wrapper.py +110 -63
  72. alita_sdk/tools/confluence/loader.py +10 -0
  73. alita_sdk/tools/elitea_base.py +22 -22
  74. alita_sdk/tools/github/__init__.py +2 -2
  75. alita_sdk/tools/gitlab/__init__.py +2 -1
  76. alita_sdk/tools/gitlab/api_wrapper.py +11 -7
  77. alita_sdk/tools/gitlab_org/__init__.py +1 -2
  78. alita_sdk/tools/google_places/__init__.py +2 -1
  79. alita_sdk/tools/jira/__init__.py +1 -0
  80. alita_sdk/tools/jira/api_wrapper.py +1 -1
  81. alita_sdk/tools/memory/__init__.py +1 -1
  82. alita_sdk/tools/non_code_indexer_toolkit.py +2 -2
  83. alita_sdk/tools/openapi/__init__.py +10 -1
  84. alita_sdk/tools/pandas/__init__.py +1 -1
  85. alita_sdk/tools/postman/__init__.py +2 -1
  86. alita_sdk/tools/postman/api_wrapper.py +18 -8
  87. alita_sdk/tools/postman/postman_analysis.py +8 -1
  88. alita_sdk/tools/pptx/__init__.py +2 -2
  89. alita_sdk/tools/qtest/__init__.py +3 -3
  90. alita_sdk/tools/qtest/api_wrapper.py +1708 -76
  91. alita_sdk/tools/rally/__init__.py +1 -2
  92. alita_sdk/tools/report_portal/__init__.py +1 -0
  93. alita_sdk/tools/salesforce/__init__.py +1 -0
  94. alita_sdk/tools/servicenow/__init__.py +2 -3
  95. alita_sdk/tools/sharepoint/__init__.py +1 -0
  96. alita_sdk/tools/sharepoint/api_wrapper.py +125 -34
  97. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  98. alita_sdk/tools/sharepoint/utils.py +8 -2
  99. alita_sdk/tools/slack/__init__.py +1 -0
  100. alita_sdk/tools/sql/__init__.py +2 -1
  101. alita_sdk/tools/sql/api_wrapper.py +71 -23
  102. alita_sdk/tools/testio/__init__.py +1 -0
  103. alita_sdk/tools/testrail/__init__.py +1 -3
  104. alita_sdk/tools/utils/__init__.py +17 -0
  105. alita_sdk/tools/utils/content_parser.py +35 -24
  106. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +67 -21
  107. alita_sdk/tools/xray/__init__.py +2 -1
  108. alita_sdk/tools/zephyr/__init__.py +2 -1
  109. alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
  110. alita_sdk/tools/zephyr_essential/__init__.py +1 -0
  111. alita_sdk/tools/zephyr_scale/__init__.py +1 -0
  112. alita_sdk/tools/zephyr_squad/__init__.py +1 -0
  113. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/METADATA +8 -2
  114. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/RECORD +118 -93
  115. alita_sdk-0.3.462.dist-info/entry_points.txt +2 -0
  116. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/WHEEL +0 -0
  117. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/licenses/LICENSE +0 -0
  118. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  import ast
2
2
  import fnmatch
3
+ import json
3
4
  import logging
4
5
  from typing import Optional, List, Generator
5
6
 
@@ -14,14 +15,14 @@ logger = logging.getLogger(__name__)
14
15
 
15
16
 
16
17
  class CodeIndexerToolkit(BaseIndexerToolkit):
17
- def _get_indexed_data(self, collection_suffix: str):
18
+ def _get_indexed_data(self, index_name: str):
18
19
  if not self.vector_adapter:
19
20
  raise ToolException("Vector adapter is not initialized. "
20
21
  "Check your configuration: embedding_model and vectorstore_type.")
21
- return self.vector_adapter.get_code_indexed_data(self, collection_suffix)
22
+ return self.vector_adapter.get_code_indexed_data(self, index_name)
22
23
 
23
24
  def key_fn(self, document: Document):
24
- return document.metadata.get('id')
25
+ return document.metadata.get("filename")
25
26
 
26
27
  def compare_fn(self, document: Document, idx_data):
27
28
  return (document.metadata.get('commit_hash') and
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
46
47
  )
47
48
 
48
49
  def _extend_data(self, documents: Generator[Document, None, None]):
49
- yield from parse_code_files_for_db(documents)
50
+ yield from documents
50
51
 
51
52
  def _index_tool_params(self):
52
53
  """Return the parameters for indexing data."""
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
117
118
  if not file_content:
118
119
  # empty file, skip
119
120
  continue
121
+ #
122
+ # ensure file content is a string
123
+ if isinstance(file_content, bytes):
124
+ file_content = file_content.decode("utf-8", errors="ignore")
125
+ elif isinstance(file_content, dict) and file.endswith('.json'):
126
+ file_content = json.dumps(file_content)
127
+ elif not isinstance(file_content, str):
128
+ file_content = str(file_content)
129
+ #
120
130
  # hash the file content to ensure uniqueness
121
131
  import hashlib
122
132
  file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
127
137
  self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
128
138
  self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
129
139
 
130
- return file_content_generator()
140
+ return parse_code_files_for_db(file_content_generator())
131
141
 
132
142
  def __handle_get_files(self, path: str, branch: str):
133
143
  """
@@ -67,8 +67,7 @@ class ConfluenceToolkit(BaseToolkit):
67
67
 
68
68
  model = create_model(
69
69
  name,
70
- space=(str, Field(description="Space", json_schema_extra={'toolkit_name': True,
71
- 'max_toolkit_length': ConfluenceToolkit.toolkit_max_length})),
70
+ space=(str, Field(description="Space")),
72
71
  cloud=(bool, Field(description="Hosting Option", json_schema_extra={'configuration': True})),
73
72
  limit=(int, Field(description="Pages limit per request", default=5)),
74
73
  labels=(Optional[str], Field(
@@ -95,6 +94,7 @@ class ConfluenceToolkit(BaseToolkit):
95
94
  'metadata': {
96
95
  "label": "Confluence",
97
96
  "icon_url": None,
97
+ "max_length": ConfluenceToolkit.toolkit_max_length,
98
98
  "categories": ["documentation"],
99
99
  "extra_categories": ["confluence", "wiki", "knowledge base", "documentation", "atlassian"]
100
100
  }
@@ -7,12 +7,14 @@ from json import JSONDecodeError
7
7
  from typing import Optional, List, Any, Dict, Callable, Generator, Literal
8
8
 
9
9
  import requests
10
+ from atlassian.errors import ApiError
10
11
  from langchain_community.document_loaders.confluence import ContentFormat
11
12
  from langchain_core.documents import Document
12
13
  from langchain_core.messages import HumanMessage
13
14
  from langchain_core.tools import ToolException
14
15
  from markdownify import markdownify
15
16
  from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
17
+ from requests import HTTPError
16
18
  from tenacity import retry, stop_after_attempt, wait_exponential, before_sleep_log
17
19
 
18
20
  from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
@@ -194,6 +196,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
194
196
  keep_markdown_format: Optional[bool] = True
195
197
  ocr_languages: Optional[str] = None
196
198
  keep_newlines: Optional[bool] = True
199
+ _errors: Optional[list[str]] = None
197
200
  _image_cache: ImageDescriptionCache = PrivateAttr(default_factory=ImageDescriptionCache)
198
201
 
199
202
  @model_validator(mode='before')
@@ -498,7 +501,9 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
498
501
  restrictions = self.client.get_all_restrictions_for_content(page["id"])
499
502
 
500
503
  return (
501
- page["status"] == "current"
504
+ (page["status"] == "current"
505
+ # allow user to see archived content if needed
506
+ or page["status"] == "archived")
502
507
  and not restrictions["read"]["restrictions"]["user"]["results"]
503
508
  and not restrictions["read"]["restrictions"]["group"]["results"]
504
509
  )
@@ -518,18 +523,35 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
518
523
  ),
519
524
  before_sleep=before_sleep_log(logger, logging.WARNING),
520
525
  )(self.client.get_page_by_id)
521
- page = get_page(
522
- page_id=page_id, expand=f"{self.content_format.value},version"
523
- )
524
- if not self.include_restricted_content and not self.is_public_page(page):
525
- continue
526
+ try:
527
+ page = get_page(
528
+ page_id=page_id, expand=f"{self.content_format.value},version"
529
+ )
530
+ except (ApiError, HTTPError) as e:
531
+ logger.error(f"Error fetching page with ID {page_id}: {e}")
532
+ page_content_temp = f"Confluence API Error: cannot fetch the page with ID {page_id}: {e}"
533
+ # store errors
534
+ if self._errors is None:
535
+ self._errors = []
536
+ self._errors.append(page_content_temp)
537
+ return Document(page_content=page_content_temp,
538
+ metadata={})
539
+ # TODO: update on toolkit advanced settings level as a separate feature
540
+ # if not self.include_restricted_content and not self.is_public_page(page):
541
+ # continue
526
542
  yield self.process_page(page, skip_images)
527
543
 
544
+ def _log_errors(self):
545
+ """ Log errors encountered during toolkit execution. """
546
+ if self._errors:
547
+ logger.info(f"Errors encountered during toolkit execution: {self._errors}")
548
+
528
549
  def read_page_by_id(self, page_id: str, skip_images: bool = False):
529
550
  """Reads a page by its id in the Confluence space. If id is not available, but there is a title - use get_page_id first."""
530
551
  result = list(self.get_pages_by_id([page_id], skip_images))
531
552
  if not result:
532
- "Page not found"
553
+ return f"Pages not found. Errors: {self._errors}" if self._errors \
554
+ else "Pages not found or you do not have access to them."
533
555
  return result[0].page_content
534
556
  # return self._strip_base64_images(result[0].page_content) if skip_images else result[0].page_content
535
557
 
@@ -815,6 +837,10 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
815
837
  from .loader import AlitaConfluenceLoader
816
838
  from copy import copy
817
839
  content_format = kwargs.get('content_format', 'view').lower()
840
+
841
+ self._index_include_attachments = kwargs.get('include_attachments', False)
842
+ self._include_extensions = kwargs.get('include_extensions', [])
843
+ self._skip_extensions = kwargs.get('skip_extensions', [])
818
844
  base_params = {
819
845
  'url': self.base_url,
820
846
  'space_key': self.space,
@@ -847,65 +873,79 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
847
873
 
848
874
  def _process_document(self, document: Document) -> Generator[Document, None, None]:
849
875
  try:
850
- page_id = document.metadata.get('id')
851
- attachments = self.client.get_attachments_from_content(page_id)
852
- if not attachments or not attachments.get('results'):
853
- return f"No attachments found for page ID {page_id}."
854
-
855
- # Get attachment history for created/updated info
856
- history_map = {}
857
- for attachment in attachments['results']:
858
- try:
859
- hist = self.client.history(attachment['id'])
860
- history_map[attachment['id']] = hist
861
- except Exception as e:
862
- logger.warning(f"Failed to fetch history for attachment {attachment.get('title', '')}: {str(e)}")
863
- history_map[attachment['id']] = None
864
-
865
- import re
866
- for attachment in attachments['results']:
867
- title = attachment.get('title', '')
868
- file_ext = title.lower().split('.')[-1] if '.' in title else ''
869
-
870
- media_type = attachment.get('metadata', {}).get('mediaType', '')
871
- # Core metadata extraction with history
872
- hist = history_map.get(attachment['id']) or {}
873
- created_by = hist.get('createdBy', {}).get('displayName', '') if hist else attachment.get('creator', {}).get('displayName', '')
874
- created_date = hist.get('createdDate', '') if hist else attachment.get('created', '')
875
- last_updated = hist.get('lastUpdated', {}).get('when', '') if hist else ''
876
+ if self._index_include_attachments:
877
+ page_id = document.metadata.get('id')
878
+ attachments = self.client.get_attachments_from_content(page_id)
879
+ if not attachments or not attachments.get('results'):
880
+ return f"No attachments found for page ID {page_id}."
881
+
882
+ # Get attachment history for created/updated info
883
+ history_map = {}
884
+ for attachment in attachments['results']:
885
+ try:
886
+ hist = self.client.history(attachment['id'])
887
+ history_map[attachment['id']] = hist
888
+ except Exception as e:
889
+ logger.warning(f"Failed to fetch history for attachment {attachment.get('title', '')}: {str(e)}")
890
+ history_map[attachment['id']] = None
891
+
892
+ import re
893
+ for attachment in attachments['results']:
894
+ title = attachment.get('title', '')
895
+ file_ext = title.lower().split('.')[-1] if '.' in title else ''
896
+
897
+ # Re-verify extension filters
898
+ # Check if file should be skipped based on skip_extensions
899
+ if any(re.match(pattern.replace('*', '.*') + '$', title, re.IGNORECASE)
900
+ for pattern in self._skip_extensions):
901
+ continue
902
+
903
+ # Check if file should be included based on include_extensions
904
+ # If include_extensions is empty, process all files (that weren't skipped)
905
+ if self._include_extensions and not (
906
+ any(re.match(pattern.replace('*', '.*') + '$', title, re.IGNORECASE)
907
+ for pattern in self._include_extensions)):
908
+ continue
909
+
910
+ media_type = attachment.get('metadata', {}).get('mediaType', '')
911
+ # Core metadata extraction with history
912
+ hist = history_map.get(attachment['id']) or {}
913
+ created_by = hist.get('createdBy', {}).get('displayName', '') if hist else attachment.get('creator', {}).get('displayName', '')
914
+ created_date = hist.get('createdDate', '') if hist else attachment.get('created', '')
915
+ last_updated = hist.get('lastUpdated', {}).get('when', '') if hist else ''
916
+
917
+ metadata = {
918
+ 'name': title,
919
+ 'size': attachment.get('extensions', {}).get('fileSize', None),
920
+ 'creator': created_by,
921
+ 'created': created_date,
922
+ 'updated': last_updated,
923
+ 'media_type': media_type,
924
+ 'labels': [label['name'] for label in
925
+ attachment.get('metadata', {}).get('labels', {}).get('results', [])],
926
+ 'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
927
+ '_links', {}).get('download') else None
928
+ }
876
929
 
877
- metadata = {
878
- 'name': title,
879
- 'size': attachment.get('extensions', {}).get('fileSize', None),
880
- 'creator': created_by,
881
- 'created': created_date,
882
- 'updated': last_updated,
883
- 'media_type': media_type,
884
- 'labels': [label['name'] for label in
885
- attachment.get('metadata', {}).get('labels', {}).get('results', [])],
886
- 'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
887
- '_links', {}).get('download') else None
888
- }
930
+ download_url = self.base_url.rstrip('/') + attachment['_links']['download']
889
931
 
890
- download_url = self.base_url.rstrip('/') + attachment['_links']['download']
932
+ try:
933
+ resp = self.client.request(method="GET", path=download_url[len(self.base_url):], advanced_mode=True)
934
+ if resp.status_code == 200:
935
+ content = resp.content
936
+ else:
937
+ content = f"[Failed to download {download_url}: HTTP status code {resp.status_code}]"
938
+ except Exception as e:
939
+ content = f"[Error downloading content: {str(e)}]"
891
940
 
892
- try:
893
- resp = self.client.request(method="GET", path=download_url[len(self.base_url):], advanced_mode=True)
894
- if resp.status_code == 200:
895
- content = resp.content
941
+ if isinstance(content, str):
942
+ yield Document(page_content=content, metadata=metadata)
896
943
  else:
897
- content = f"[Failed to download {download_url}: HTTP status code {resp.status_code}]"
898
- except Exception as e:
899
- content = f"[Error downloading content: {str(e)}]"
900
-
901
- if isinstance(content, str):
902
- yield Document(page_content=content, metadata=metadata)
903
- else:
904
- yield Document(page_content="", metadata={
905
- **metadata,
906
- IndexerKeywords.CONTENT_FILE_NAME.value: f".{file_ext}",
907
- IndexerKeywords.CONTENT_IN_BYTES.value: content
908
- })
944
+ yield Document(page_content="", metadata={
945
+ **metadata,
946
+ IndexerKeywords.CONTENT_FILE_NAME.value: f".{file_ext}",
947
+ IndexerKeywords.CONTENT_IN_BYTES.value: content
948
+ })
909
949
  except Exception as e:
910
950
  yield from ()
911
951
 
@@ -1648,8 +1688,15 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
1648
1688
  "include_restricted_content": (Optional[bool], Field(description="Include restricted content.", default=False)),
1649
1689
  "include_archived_content": (Optional[bool], Field(description="Include archived content.", default=False)),
1650
1690
  "include_attachments": (Optional[bool], Field(description="Include attachments.", default=False)),
1691
+ 'include_extensions': (Optional[List[str]], Field(
1692
+ description="List of file extensions to include when processing attachments: i.e. ['*.png', '*.jpg']. "
1693
+ "If empty, all files will be processed (except skip_extensions).",
1694
+ default=[])),
1695
+ 'skip_extensions': (Optional[List[str]], Field(
1696
+ description="List of file extensions to skip when processing attachments: i.e. ['*.png', '*.jpg']",
1697
+ default=[])),
1651
1698
  "include_comments": (Optional[bool], Field(description="Include comments.", default=False)),
1652
- "include_labels": (Optional[bool], Field(description="Include labels.", default=True)),
1699
+ "include_labels": (Optional[bool], Field(description="Include labels.", default=False)),
1653
1700
  "ocr_languages": (Optional[str], Field(description="OCR languages for processing attachments.", default='eng')),
1654
1701
  "keep_markdown_format": (Optional[bool], Field(description="Keep the markdown format.", default=True)),
1655
1702
  "keep_newlines": (Optional[bool], Field(description="Keep newlines in the content.", default=True)),
@@ -3,6 +3,7 @@ from typing import Optional, List
3
3
  from logging import getLogger
4
4
 
5
5
  import requests
6
+ from langchain_core.documents import Document
6
7
 
7
8
  logger = getLogger(__name__)
8
9
  from PIL import Image
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
193
194
  else:
194
195
  return super().process_image(link, ocr_languages)
195
196
 
197
+ def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
198
+ content_format: ContentFormat, ocr_languages: Optional[str] = None,
199
+ keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
200
+ if not page.get("title"):
201
+ # if 'include_restricted_content' set to True, draft pages are loaded and can have no title
202
+ page["title"] = "Untitled"
203
+ return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
204
+ ocr_languages, keep_markdown_format, keep_newlines)
205
+
196
206
  # TODO review usage
197
207
  # def process_svg(
198
208
  # self,
@@ -33,12 +33,12 @@ LoaderSchema = create_model(
33
33
  # Base Vector Store Schema Models
34
34
  BaseIndexParams = create_model(
35
35
  "BaseIndexParams",
36
- collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
36
+ index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
37
37
  )
38
38
 
39
39
  BaseCodeIndexParams = create_model(
40
40
  "BaseCodeIndexParams",
41
- collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
41
+ index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
42
42
  clean_index=(Optional[bool], Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
43
43
  progress_step=(Optional[int], Field(default=5, ge=0, le=100,
44
44
  description="Optional step size for progress reporting during indexing")),
@@ -50,14 +50,14 @@ BaseCodeIndexParams = create_model(
50
50
 
51
51
  RemoveIndexParams = create_model(
52
52
  "RemoveIndexParams",
53
- collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
53
+ index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
54
54
  )
55
55
 
56
56
  BaseSearchParams = create_model(
57
57
  "BaseSearchParams",
58
58
  query=(str, Field(description="Query text to search in the index")),
59
- collection_suffix=(Optional[str], Field(
60
- description="Optional suffix for collection name (max 7 characters). Leave empty to search across all datasets",
59
+ index_name=(Optional[str], Field(
60
+ description="Optional index name (max 7 characters). Leave empty to search across all datasets",
61
61
  default="", max_length=7)),
62
62
  filter=(Optional[dict], Field(
63
63
  description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
@@ -87,7 +87,7 @@ BaseSearchParams = create_model(
87
87
  BaseStepbackSearchParams = create_model(
88
88
  "BaseStepbackSearchParams",
89
89
  query=(str, Field(description="Query text to search in the index")),
90
- collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
90
+ index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
91
91
  messages=(Optional[List], Field(description="Chat messages for stepback search context", default=[])),
92
92
  filter=(Optional[dict], Field(
93
93
  description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
@@ -324,12 +324,12 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
324
324
  #
325
325
  docs = base_chunker(file_content_generator=docs, config=base_chunking_config)
326
326
  #
327
- collection_suffix = kwargs.get("collection_suffix")
327
+ index_name = kwargs.get("index_name")
328
328
  progress_step = kwargs.get("progress_step")
329
329
  clean_index = kwargs.get("clean_index")
330
330
  vs = self._init_vector_store()
331
331
  #
332
- return vs.index_documents(docs, collection_suffix=collection_suffix, progress_step=progress_step, clean_index=clean_index)
332
+ return vs.index_documents(docs, index_name=index_name, progress_step=progress_step, clean_index=clean_index)
333
333
 
334
334
  def _process_documents(self, documents: List[Document]) -> Generator[Document, None, None]:
335
335
  """
@@ -399,10 +399,10 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
399
399
  )
400
400
  return self._vector_store
401
401
 
402
- def remove_index(self, collection_suffix: str = ""):
402
+ def remove_index(self, index_name: str = ""):
403
403
  """Cleans the indexed data in the collection."""
404
- self._init_vector_store()._clean_collection(collection_suffix=collection_suffix)
405
- return (f"Collection '{collection_suffix}' has been removed from the vector store.\n"
404
+ self._init_vector_store()._clean_collection(index_name=index_name)
405
+ return (f"Collection '{index_name}' has been removed from the vector store.\n"
406
406
  f"Available collections: {self.list_collections()}")
407
407
 
408
408
  def list_collections(self):
@@ -410,19 +410,19 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
410
410
  vectorstore_wrapper = self._init_vector_store()
411
411
  return vectorstore_wrapper.list_collections()
412
412
 
413
- def _build_collection_filter(self, filter: dict | str, collection_suffix: str = "") -> dict:
413
+ def _build_collection_filter(self, filter: dict | str, index_name: str = "") -> dict:
414
414
  """Builds a filter for the collection based on the provided suffix."""
415
415
 
416
416
  filter = filter if isinstance(filter, dict) else json.loads(filter)
417
- if collection_suffix:
417
+ if index_name:
418
418
  filter.update({"collection": {
419
- "$eq": collection_suffix.strip()
419
+ "$eq": index_name.strip()
420
420
  }})
421
421
  return filter
422
422
 
423
423
  def search_index(self,
424
424
  query: str,
425
- collection_suffix: str = "",
425
+ index_name: str = "",
426
426
  filter: dict | str = {}, cut_off: float = 0.5,
427
427
  search_top: int = 10, reranker: dict = {},
428
428
  full_text_search: Optional[Dict[str, Any]] = None,
@@ -431,7 +431,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
431
431
  **kwargs):
432
432
  """ Searches indexed documents in the vector store."""
433
433
  vectorstore = self._init_vector_store()
434
- filter = self._build_collection_filter(filter, collection_suffix)
434
+ filter = self._build_collection_filter(filter, index_name)
435
435
  found_docs = vectorstore.search_documents(
436
436
  query,
437
437
  doctype=self.doctype,
@@ -448,7 +448,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
448
448
  def stepback_search_index(self,
449
449
  query: str,
450
450
  messages: List[Dict[str, Any]] = [],
451
- collection_suffix: str = "",
451
+ index_name: str = "",
452
452
  filter: dict | str = {}, cut_off: float = 0.5,
453
453
  search_top: int = 10, reranker: dict = {},
454
454
  full_text_search: Optional[Dict[str, Any]] = None,
@@ -457,7 +457,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
457
457
  **kwargs):
458
458
  """ Searches indexed documents in the vector store."""
459
459
 
460
- filter = self._build_collection_filter(filter, collection_suffix)
460
+ filter = self._build_collection_filter(filter, index_name)
461
461
  vectorstore = self._init_vector_store()
462
462
  found_docs = vectorstore.stepback_search(
463
463
  query,
@@ -475,7 +475,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
475
475
  def stepback_summary_index(self,
476
476
  query: str,
477
477
  messages: List[Dict[str, Any]] = [],
478
- collection_suffix: str = "",
478
+ index_name: str = "",
479
479
  filter: dict | str = {}, cut_off: float = 0.5,
480
480
  search_top: int = 10, reranker: dict = {},
481
481
  full_text_search: Optional[Dict[str, Any]] = None,
@@ -484,7 +484,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
484
484
  **kwargs):
485
485
  """ Generates a summary of indexed documents using stepback technique."""
486
486
  vectorstore = self._init_vector_store()
487
- filter = self._build_collection_filter(filter, collection_suffix)
487
+ filter = self._build_collection_filter(filter, index_name)
488
488
 
489
489
  found_docs = vectorstore.stepback_summary(
490
490
  query,
@@ -655,7 +655,7 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
655
655
  return parse_code_files_for_db(file_content_generator())
656
656
 
657
657
  def index_data(self,
658
- collection_suffix: str,
658
+ index_name: str,
659
659
  branch: Optional[str] = None,
660
660
  whitelist: Optional[List[str]] = None,
661
661
  blacklist: Optional[List[str]] = None,
@@ -669,7 +669,7 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
669
669
  )
670
670
  vectorstore = self._init_vector_store()
671
671
  clean_index = kwargs.get('clean_index', False)
672
- return vectorstore.index_documents(documents, collection_suffix=collection_suffix,
672
+ return vectorstore.index_documents(documents, index_name=index_name,
673
673
  clean_index=clean_index, is_code=True,
674
674
  progress_step=kwargs.get('progress_step', 5))
675
675
 
@@ -53,6 +53,7 @@ class AlitaGitHubToolkit(BaseToolkit):
53
53
  'metadata': {
54
54
  "label": "GitHub",
55
55
  "icon_url": None,
56
+ "max_length": AlitaGitHubToolkit.toolkit_max_length,
56
57
  "categories": ["code repositories"],
57
58
  "extra_categories": ["github", "git", "repository", "code", "version control"],
58
59
  },
@@ -62,8 +63,7 @@ class AlitaGitHubToolkit(BaseToolkit):
62
63
  json_schema_extra={'configuration_types': ['github']})),
63
64
  pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector configuration", default=None,
64
65
  json_schema_extra={'configuration_types': ['pgvector']})),
65
- repository=(str, Field(description="Github repository", json_schema_extra={'toolkit_name': True,
66
- 'max_toolkit_length': AlitaGitHubToolkit.toolkit_max_length})),
66
+ repository=(str, Field(description="Github repository")),
67
67
  active_branch=(Optional[str], Field(description="Active branch", default="main")),
68
68
  base_branch=(Optional[str], Field(description="Github Base branch", default="main")),
69
69
  # embedder settings
@@ -43,7 +43,7 @@ class AlitaGitlabToolkit(BaseToolkit):
43
43
  AlitaGitlabToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
44
44
  return create_model(
45
45
  name,
46
- repository=(str, Field(description="GitLab repository", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': AlitaGitlabToolkit.toolkit_max_length})),
46
+ repository=(str, Field(description="GitLab repository")),
47
47
  gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration", json_schema_extra={'configuration_types': ['gitlab']})),
48
48
  branch=(str, Field(description="Main branch", default="main")),
49
49
  # indexer settings
@@ -57,6 +57,7 @@ class AlitaGitlabToolkit(BaseToolkit):
57
57
  'metadata': {
58
58
  "label": "GitLab",
59
59
  "icon_url": None,
60
+ "max_length": AlitaGitlabToolkit.toolkit_max_length,
60
61
  "categories": ["code repositories"],
61
62
  "extra_categories": ["gitlab", "git", "repository", "code", "version control"],
62
63
  }
@@ -117,7 +117,11 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
117
117
 
118
118
  @model_validator(mode='before')
119
119
  @classmethod
120
- def validate_toolkit(cls, values: Dict) -> Dict:
120
+ def validate_toolkit_before(cls, values: Dict) -> Dict:
121
+ return super().validate_toolkit(values)
122
+
123
+ @model_validator(mode='after')
124
+ def validate_toolkit(self):
121
125
  try:
122
126
  import gitlab
123
127
  except ImportError:
@@ -125,17 +129,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
125
129
  "python-gitlab is not installed. "
126
130
  "Please install it with `pip install python-gitlab`"
127
131
  )
128
- values['repository'] = cls._sanitize_url(values['repository'])
132
+ self.repository = self._sanitize_url(self.repository)
129
133
  g = gitlab.Gitlab(
130
- url=cls._sanitize_url(values['url']),
131
- private_token=values['private_token'],
134
+ url=self._sanitize_url(self.url),
135
+ private_token=self.private_token.get_secret_value(),
132
136
  keep_base_url=True,
133
137
  )
134
138
 
135
139
  g.auth()
136
- cls._git = g
137
- cls._active_branch = values.get('branch')
138
- return super().validate_toolkit(values)
140
+ self._git = g
141
+ self._active_branch = self.branch
142
+ return self
139
143
 
140
144
  @property
141
145
  def repo_instance(self):
@@ -30,8 +30,6 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
30
30
  AlitaGitlabSpaceToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
31
31
  return create_model(
32
32
  name,
33
- name=(str, Field(description="Toolkit name", json_schema_extra={'toolkit_name': True,
34
- 'max_toolkit_length': AlitaGitlabSpaceToolkit.toolkit_max_length})),
35
33
  gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration",
36
34
  json_schema_extra={
37
35
  'configuration_types': ['gitlab']})),
@@ -46,6 +44,7 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
46
44
  'metadata': {
47
45
  "label": "GitLab Org",
48
46
  "icon_url": None,
47
+ "max_length": AlitaGitlabSpaceToolkit.toolkit_max_length,
49
48
  "categories": ["code repositories"],
50
49
  "extra_categories": ["gitlab", "git", "repository", "code", "version control"],
51
50
  }
@@ -30,7 +30,7 @@ class GooglePlacesToolkit(BaseToolkit):
30
30
  GooglePlacesToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
31
31
  return create_model(
32
32
  name,
33
- results_count=(Optional[int], Field(description="Results number to show", default=None, json_schema_extra={'toolkit_name': True, 'max_toolkit_length': GooglePlacesToolkit.toolkit_max_length})),
33
+ results_count=(Optional[int], Field(description="Results number to show", default=None)),
34
34
  google_places_configuration=(GooglePlacesConfiguration, Field(description="Google Places Configuration", json_schema_extra={'configuration_types': ['google_places']})),
35
35
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
36
36
  __config__=ConfigDict(json_schema_extra=
@@ -38,6 +38,7 @@ class GooglePlacesToolkit(BaseToolkit):
38
38
  'metadata':
39
39
  {
40
40
  "label": "Google Places", "icon_url": "gplaces-icon.svg",
41
+ "max_length": GooglePlacesToolkit.toolkit_max_length,
41
42
  "categories": ["other"],
42
43
  "extra_categories": ["google", "places", "maps", "location",
43
44
  "geolocation"],
@@ -89,6 +89,7 @@ class JiraToolkit(BaseToolkit):
89
89
  'metadata': {
90
90
  "label": "Jira",
91
91
  "icon_url": "jira-icon.svg",
92
+ "max_length": JiraToolkit.toolkit_max_length,
92
93
  "categories": ["project management"],
93
94
  "extra_categories": ["jira", "atlassian", "issue tracking", "project management", "task management"],
94
95
  }
@@ -563,7 +563,7 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
563
563
  Use the appropriate issue link type (e.g., "Test", "Relates", "Blocks").
564
564
  If we use "Test" linktype, the test is inward issue, the story/other issue is outward issue.."""
565
565
 
566
- comment = "This test is linked to the story."
566
+ comment = f"Issue {inward_issue_key} was linked to {outward_issue_key}."
567
567
  comment_body = {"content": [{"content": [{"text": comment,"type": "text"}],"type": "paragraph"}],"type": "doc","version": 1} if self.api_version == "3" else comment
568
568
  link_data = {
569
569
  "type": {"name": f"{linktype}"},
@@ -61,7 +61,7 @@ class MemoryToolkit(BaseToolkit):
61
61
 
62
62
  return create_model(
63
63
  'memory',
64
- namespace=(str, Field(description="Memory namespace", json_schema_extra={'toolkit_name': True})),
64
+ namespace=(str, Field(description="Memory namespace")),
65
65
  pgvector_configuration=(PgVectorConfiguration, Field(description="PgVector Configuration",
66
66
  json_schema_extra={
67
67
  'configuration_types': ['pgvector']})),