alita-sdk 0.3.351__py3-none-any.whl → 0.3.499__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +155 -0
  6. alita_sdk/cli/agent_loader.py +215 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3601 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1256 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/toolkit.py +327 -0
  23. alita_sdk/cli/toolkit_loader.py +85 -0
  24. alita_sdk/cli/tools/__init__.py +43 -0
  25. alita_sdk/cli/tools/approval.py +224 -0
  26. alita_sdk/cli/tools/filesystem.py +1751 -0
  27. alita_sdk/cli/tools/planning.py +389 -0
  28. alita_sdk/cli/tools/terminal.py +414 -0
  29. alita_sdk/community/__init__.py +64 -8
  30. alita_sdk/community/inventory/__init__.py +224 -0
  31. alita_sdk/community/inventory/config.py +257 -0
  32. alita_sdk/community/inventory/enrichment.py +2137 -0
  33. alita_sdk/community/inventory/extractors.py +1469 -0
  34. alita_sdk/community/inventory/ingestion.py +3172 -0
  35. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  36. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  37. alita_sdk/community/inventory/parsers/base.py +295 -0
  38. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  39. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  40. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  41. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  42. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  43. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  44. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  45. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  46. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  47. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  48. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  49. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  50. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  51. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  52. alita_sdk/community/inventory/patterns/loader.py +348 -0
  53. alita_sdk/community/inventory/patterns/registry.py +198 -0
  54. alita_sdk/community/inventory/presets.py +535 -0
  55. alita_sdk/community/inventory/retrieval.py +1403 -0
  56. alita_sdk/community/inventory/toolkit.py +173 -0
  57. alita_sdk/community/inventory/visualize.py +1370 -0
  58. alita_sdk/configurations/bitbucket.py +94 -2
  59. alita_sdk/configurations/confluence.py +96 -1
  60. alita_sdk/configurations/gitlab.py +79 -0
  61. alita_sdk/configurations/jira.py +103 -0
  62. alita_sdk/configurations/testrail.py +88 -0
  63. alita_sdk/configurations/xray.py +93 -0
  64. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  65. alita_sdk/configurations/zephyr_essential.py +75 -0
  66. alita_sdk/runtime/clients/artifact.py +1 -1
  67. alita_sdk/runtime/clients/client.py +214 -42
  68. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  69. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  70. alita_sdk/runtime/clients/sandbox_client.py +373 -0
  71. alita_sdk/runtime/langchain/assistant.py +118 -30
  72. alita_sdk/runtime/langchain/constants.py +8 -1
  73. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  74. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  75. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
  76. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +41 -12
  77. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
  78. alita_sdk/runtime/langchain/document_loaders/constants.py +116 -99
  79. alita_sdk/runtime/langchain/interfaces/llm_processor.py +2 -2
  80. alita_sdk/runtime/langchain/langraph_agent.py +307 -71
  81. alita_sdk/runtime/langchain/utils.py +48 -8
  82. alita_sdk/runtime/llms/preloaded.py +2 -6
  83. alita_sdk/runtime/models/mcp_models.py +61 -0
  84. alita_sdk/runtime/toolkits/__init__.py +26 -0
  85. alita_sdk/runtime/toolkits/application.py +9 -2
  86. alita_sdk/runtime/toolkits/artifact.py +18 -6
  87. alita_sdk/runtime/toolkits/datasource.py +13 -6
  88. alita_sdk/runtime/toolkits/mcp.py +780 -0
  89. alita_sdk/runtime/toolkits/planning.py +178 -0
  90. alita_sdk/runtime/toolkits/tools.py +205 -55
  91. alita_sdk/runtime/toolkits/vectorstore.py +9 -4
  92. alita_sdk/runtime/tools/__init__.py +11 -3
  93. alita_sdk/runtime/tools/application.py +7 -0
  94. alita_sdk/runtime/tools/artifact.py +225 -12
  95. alita_sdk/runtime/tools/function.py +95 -5
  96. alita_sdk/runtime/tools/graph.py +10 -4
  97. alita_sdk/runtime/tools/image_generation.py +212 -0
  98. alita_sdk/runtime/tools/llm.py +494 -102
  99. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  100. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  101. alita_sdk/runtime/tools/mcp_server_tool.py +4 -4
  102. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  103. alita_sdk/runtime/tools/planning/models.py +246 -0
  104. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  105. alita_sdk/runtime/tools/router.py +2 -1
  106. alita_sdk/runtime/tools/sandbox.py +180 -79
  107. alita_sdk/runtime/tools/vectorstore.py +22 -21
  108. alita_sdk/runtime/tools/vectorstore_base.py +125 -52
  109. alita_sdk/runtime/utils/AlitaCallback.py +106 -20
  110. alita_sdk/runtime/utils/mcp_client.py +465 -0
  111. alita_sdk/runtime/utils/mcp_oauth.py +244 -0
  112. alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
  113. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  114. alita_sdk/runtime/utils/streamlit.py +40 -13
  115. alita_sdk/runtime/utils/toolkit_utils.py +28 -9
  116. alita_sdk/runtime/utils/utils.py +12 -0
  117. alita_sdk/tools/__init__.py +77 -33
  118. alita_sdk/tools/ado/repos/__init__.py +7 -6
  119. alita_sdk/tools/ado/repos/repos_wrapper.py +11 -11
  120. alita_sdk/tools/ado/test_plan/__init__.py +7 -7
  121. alita_sdk/tools/ado/wiki/__init__.py +7 -11
  122. alita_sdk/tools/ado/wiki/ado_wrapper.py +89 -15
  123. alita_sdk/tools/ado/work_item/__init__.py +7 -11
  124. alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
  125. alita_sdk/tools/advanced_jira_mining/__init__.py +8 -7
  126. alita_sdk/tools/aws/delta_lake/__init__.py +11 -9
  127. alita_sdk/tools/azure_ai/search/__init__.py +7 -6
  128. alita_sdk/tools/base_indexer_toolkit.py +345 -70
  129. alita_sdk/tools/bitbucket/__init__.py +9 -8
  130. alita_sdk/tools/bitbucket/api_wrapper.py +50 -6
  131. alita_sdk/tools/browser/__init__.py +4 -4
  132. alita_sdk/tools/carrier/__init__.py +4 -6
  133. alita_sdk/tools/chunkers/__init__.py +3 -1
  134. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  135. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  136. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  137. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  138. alita_sdk/tools/cloud/aws/__init__.py +7 -6
  139. alita_sdk/tools/cloud/azure/__init__.py +7 -6
  140. alita_sdk/tools/cloud/gcp/__init__.py +7 -6
  141. alita_sdk/tools/cloud/k8s/__init__.py +7 -6
  142. alita_sdk/tools/code/linter/__init__.py +7 -7
  143. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  144. alita_sdk/tools/code/sonar/__init__.py +8 -7
  145. alita_sdk/tools/code_indexer_toolkit.py +199 -0
  146. alita_sdk/tools/confluence/__init__.py +9 -8
  147. alita_sdk/tools/confluence/api_wrapper.py +171 -75
  148. alita_sdk/tools/confluence/loader.py +10 -0
  149. alita_sdk/tools/custom_open_api/__init__.py +9 -4
  150. alita_sdk/tools/elastic/__init__.py +8 -7
  151. alita_sdk/tools/elitea_base.py +492 -52
  152. alita_sdk/tools/figma/__init__.py +7 -7
  153. alita_sdk/tools/figma/api_wrapper.py +2 -1
  154. alita_sdk/tools/github/__init__.py +9 -9
  155. alita_sdk/tools/github/api_wrapper.py +9 -26
  156. alita_sdk/tools/github/github_client.py +62 -2
  157. alita_sdk/tools/gitlab/__init__.py +8 -8
  158. alita_sdk/tools/gitlab/api_wrapper.py +135 -33
  159. alita_sdk/tools/gitlab_org/__init__.py +7 -8
  160. alita_sdk/tools/google/bigquery/__init__.py +11 -12
  161. alita_sdk/tools/google_places/__init__.py +8 -7
  162. alita_sdk/tools/jira/__init__.py +9 -7
  163. alita_sdk/tools/jira/api_wrapper.py +100 -52
  164. alita_sdk/tools/keycloak/__init__.py +8 -7
  165. alita_sdk/tools/localgit/local_git.py +56 -54
  166. alita_sdk/tools/memory/__init__.py +1 -1
  167. alita_sdk/tools/non_code_indexer_toolkit.py +3 -2
  168. alita_sdk/tools/ocr/__init__.py +8 -7
  169. alita_sdk/tools/openapi/__init__.py +10 -1
  170. alita_sdk/tools/pandas/__init__.py +8 -7
  171. alita_sdk/tools/postman/__init__.py +7 -8
  172. alita_sdk/tools/postman/api_wrapper.py +19 -8
  173. alita_sdk/tools/postman/postman_analysis.py +8 -1
  174. alita_sdk/tools/pptx/__init__.py +8 -9
  175. alita_sdk/tools/qtest/__init__.py +16 -11
  176. alita_sdk/tools/qtest/api_wrapper.py +1784 -88
  177. alita_sdk/tools/rally/__init__.py +7 -8
  178. alita_sdk/tools/report_portal/__init__.py +9 -7
  179. alita_sdk/tools/salesforce/__init__.py +7 -7
  180. alita_sdk/tools/servicenow/__init__.py +10 -10
  181. alita_sdk/tools/sharepoint/__init__.py +7 -6
  182. alita_sdk/tools/sharepoint/api_wrapper.py +127 -36
  183. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  184. alita_sdk/tools/sharepoint/utils.py +8 -2
  185. alita_sdk/tools/slack/__init__.py +7 -6
  186. alita_sdk/tools/sql/__init__.py +8 -7
  187. alita_sdk/tools/sql/api_wrapper.py +71 -23
  188. alita_sdk/tools/testio/__init__.py +7 -6
  189. alita_sdk/tools/testrail/__init__.py +8 -9
  190. alita_sdk/tools/utils/__init__.py +26 -4
  191. alita_sdk/tools/utils/content_parser.py +88 -60
  192. alita_sdk/tools/utils/text_operations.py +254 -0
  193. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +76 -26
  194. alita_sdk/tools/xray/__init__.py +9 -7
  195. alita_sdk/tools/zephyr/__init__.py +7 -6
  196. alita_sdk/tools/zephyr_enterprise/__init__.py +8 -6
  197. alita_sdk/tools/zephyr_essential/__init__.py +7 -6
  198. alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
  199. alita_sdk/tools/zephyr_scale/__init__.py +7 -6
  200. alita_sdk/tools/zephyr_squad/__init__.py +7 -6
  201. {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +147 -2
  202. {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/RECORD +206 -130
  203. alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
  204. {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
  205. {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
  206. {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ from langchain_core.tools import BaseToolkit, BaseTool
6
6
  from .api_wrapper import AWSToolConfig
7
7
  from ...base.tool import BaseAction
8
8
  from ...elitea_base import filter_missconfigured_index_tools
9
- from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
9
+ from ...utils import clean_string, get_max_toolkit_length
10
10
 
11
11
  name = "aws"
12
12
 
@@ -22,12 +22,10 @@ def get_tools(tool):
22
22
 
23
23
  class AWSToolkit(BaseToolkit):
24
24
  tools: list[BaseTool] = []
25
- toolkit_max_length: int = 0
26
25
 
27
26
  @staticmethod
28
27
  def toolkit_config_schema() -> BaseModel:
29
28
  selected_tools = {x['name']: x['args_schema'].schema() for x in AWSToolConfig.model_construct().get_available_tools()}
30
- AWSToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
31
29
  return create_model(
32
30
  name,
33
31
  region=(str, Field(default="", title="Region", description="AWS region")),
@@ -54,14 +52,17 @@ class AWSToolkit(BaseToolkit):
54
52
  aws_tool_config = AWSToolConfig(**kwargs)
55
53
  available_tools = aws_tool_config.get_available_tools()
56
54
  tools = []
57
- prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
58
55
  for tool in available_tools:
59
56
  if selected_tools and tool["name"] not in selected_tools:
60
57
  continue
58
+ description = tool["description"]
59
+ if toolkit_name:
60
+ description = f"Toolkit: {toolkit_name}\n{description}"
61
+ description = description[:1000]
61
62
  tools.append(BaseAction(
62
63
  api_wrapper=aws_tool_config,
63
- name=prefix + tool["name"],
64
- description=tool["description"],
64
+ name=tool["name"],
65
+ description=description,
65
66
  args_schema=tool["args_schema"]
66
67
  ))
67
68
  return cls(tools=tools)
@@ -6,7 +6,7 @@ from pydantic import create_model, BaseModel, ConfigDict, Field, SecretStr
6
6
  from .api_wrapper import AzureApiWrapper
7
7
  from ...base.tool import BaseAction
8
8
  from ...elitea_base import filter_missconfigured_index_tools
9
- from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
9
+ from ...utils import clean_string, get_max_toolkit_length
10
10
 
11
11
  name = "azure"
12
12
 
@@ -23,12 +23,10 @@ def get_tools(tool):
23
23
 
24
24
  class AzureToolkit(BaseToolkit):
25
25
  tools: list[BaseTool] = []
26
- toolkit_max_length: int = 0
27
26
 
28
27
  @staticmethod
29
28
  def toolkit_config_schema() -> BaseModel:
30
29
  selected_tools = {x['name']: x['args_schema'].schema() for x in AzureApiWrapper.model_construct().get_available_tools()}
31
- AzureToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
32
30
  return create_model(
33
31
  name,
34
32
  subscription_id=(str, Field(default="", title="Subscription ID", description="Azure subscription ID")),
@@ -47,14 +45,17 @@ class AzureToolkit(BaseToolkit):
47
45
  azure_api_wrapper = AzureApiWrapper(**kwargs)
48
46
  available_tools = azure_api_wrapper.get_available_tools()
49
47
  tools = []
50
- prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
51
48
  for tool in available_tools:
52
49
  if selected_tools and tool["name"] not in selected_tools:
53
50
  continue
51
+ description = tool["description"]
52
+ if toolkit_name:
53
+ description = f"Toolkit: {toolkit_name}\n{description}"
54
+ description = description[:1000]
54
55
  tools.append(BaseAction(
55
56
  api_wrapper=azure_api_wrapper,
56
- name=prefix + tool["name"],
57
- description=tool["description"],
57
+ name=tool["name"],
58
+ description=description,
58
59
  args_schema=tool["args_schema"]
59
60
  ))
60
61
  return cls(tools=tools)
@@ -6,7 +6,7 @@ from pydantic import create_model, BaseModel, ConfigDict, Field, SecretStr
6
6
  from .api_wrapper import GCPApiWrapper
7
7
  from ...base.tool import BaseAction
8
8
  from ...elitea_base import filter_missconfigured_index_tools
9
- from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
9
+ from ...utils import clean_string, get_max_toolkit_length
10
10
 
11
11
  name = "gcp"
12
12
 
@@ -20,12 +20,10 @@ def get_tools(tool):
20
20
 
21
21
  class GCPToolkit(BaseToolkit):
22
22
  tools: list[BaseTool] = []
23
- toolkit_max_length: int = 0
24
23
 
25
24
  @staticmethod
26
25
  def toolkit_config_schema() -> BaseModel:
27
26
  selected_tools = {x['name']: x['args_schema'].schema() for x in GCPApiWrapper.model_construct().get_available_tools()}
28
- GCPToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
29
27
  return create_model(
30
28
  name,
31
29
  api_key=(SecretStr, Field(default="", title="API key", description="GCP API key", json_schema_extra={'secret': True})),
@@ -41,14 +39,17 @@ class GCPToolkit(BaseToolkit):
41
39
  gcp_api_wrapper = GCPApiWrapper(**kwargs)
42
40
  available_tools = gcp_api_wrapper.get_available_tools()
43
41
  tools = []
44
- prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
45
42
  for tool in available_tools:
46
43
  if selected_tools and tool["name"] not in selected_tools:
47
44
  continue
45
+ description = tool["description"]
46
+ if toolkit_name:
47
+ description = f"Toolkit: {toolkit_name}\n{description}"
48
+ description = description[:1000]
48
49
  tools.append(BaseAction(
49
50
  api_wrapper=gcp_api_wrapper,
50
- name=prefix + tool["name"],
51
- description=tool["description"],
51
+ name=tool["name"],
52
+ description=description,
52
53
  args_schema=tool["args_schema"]
53
54
  ))
54
55
  return cls(tools=tools)
@@ -6,7 +6,7 @@ from pydantic import create_model, BaseModel, ConfigDict, Field, SecretStr
6
6
  from .api_wrapper import KubernetesApiWrapper
7
7
  from ...base.tool import BaseAction
8
8
  from ...elitea_base import filter_missconfigured_index_tools
9
- from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
9
+ from ...utils import clean_string, get_max_toolkit_length
10
10
 
11
11
  name = "kubernetes"
12
12
 
@@ -21,12 +21,10 @@ def get_tools(tool):
21
21
 
22
22
  class KubernetesToolkit(BaseToolkit):
23
23
  tools: list[BaseTool] = []
24
- toolkit_max_length: int = 0
25
24
 
26
25
  @staticmethod
27
26
  def toolkit_config_schema() -> BaseModel:
28
27
  selected_tools = {x['name']: x['args_schema'].schema() for x in KubernetesApiWrapper.model_construct().get_available_tools()}
29
- KubernetesToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
30
28
  return create_model(
31
29
  name,
32
30
  url=(str, Field(default="", title="Cluster URL", description="The URL of the Kubernetes cluster")),
@@ -51,14 +49,17 @@ class KubernetesToolkit(BaseToolkit):
51
49
  kubernetes_api_wrapper = KubernetesApiWrapper(**kwargs)
52
50
  available_tools = kubernetes_api_wrapper.get_available_tools()
53
51
  tools = []
54
- prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
55
52
  for tool in available_tools:
56
53
  if selected_tools and tool["name"] not in selected_tools:
57
54
  continue
55
+ description = tool["description"]
56
+ if toolkit_name:
57
+ description = f"Toolkit: {toolkit_name}\n{description}"
58
+ description = description[:1000]
58
59
  tools.append(BaseAction(
59
60
  api_wrapper=kubernetes_api_wrapper,
60
- name=prefix + tool["name"],
61
- description=tool["description"],
61
+ name=tool["name"],
62
+ description=description,
62
63
  args_schema=tool["args_schema"]
63
64
  ))
64
65
  return cls(tools=tools)
@@ -5,7 +5,7 @@ from pydantic import BaseModel, create_model, Field
5
5
 
6
6
  from .api_wrapper import PythonLinter
7
7
  from ...base.tool import BaseAction
8
- from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
8
+ from ...utils import clean_string, get_max_toolkit_length
9
9
 
10
10
  name = "python_linter"
11
11
 
@@ -19,11 +19,9 @@ def get_tools(tool):
19
19
 
20
20
  class PythonLinterToolkit(BaseToolkit):
21
21
  tools: list[BaseTool] = []
22
- toolkit_max_length: int = 0
23
22
 
24
23
  @staticmethod
25
24
  def toolkit_config_schema() -> BaseModel:
26
- PythonLinterToolkit.toolkit_max_length = get_max_toolkit_length([])
27
25
  return create_model(
28
26
  name,
29
27
  error_codes=(str, Field(description="Error codes to be used by the linter")),
@@ -39,15 +37,17 @@ class PythonLinterToolkit(BaseToolkit):
39
37
  python_linter = PythonLinter(**kwargs)
40
38
  available_tools = python_linter.get_available_tools()
41
39
  tools = []
42
- toolkit_max_length = get_max_toolkit_length(selected_tools)
43
- prefix = clean_string(toolkit_name, PythonLinterToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
44
40
  for tool in available_tools:
45
41
  if selected_tools and tool["name"] not in selected_tools:
46
42
  continue
43
+ description = tool["description"]
44
+ if toolkit_name:
45
+ description = f"Toolkit: {toolkit_name}\n{description}"
46
+ description = description[:1000]
47
47
  tools.append(BaseAction(
48
48
  api_wrapper=python_linter,
49
- name=prefix + tool["name"],
50
- description=tool["description"],
49
+ name=tool["name"],
50
+ description=description,
51
51
  args_schema=tool["args_schema"]
52
52
  ))
53
53
  return cls(tools=tools)
@@ -4,8 +4,9 @@ def search_format(items):
4
4
  results = []
5
5
  for (doc, score) in items:
6
6
  res_chunk = ''
7
- language = get_programming_language(get_file_extension(doc.metadata["filename"]))
8
- res_chunk += doc.metadata["filename"] + " -> " + doc.metadata["method_name"] + " (score: " + str(score) + ")"
7
+ language = get_programming_language(get_file_extension(doc.metadata.get("filename", "unknown")))
8
+ method_name = doc.metadata.get("method_name", "text")
9
+ res_chunk += doc.metadata.get("filename", "unknown") + " -> " + method_name + " (score: " + str(score) + ")"
9
10
  res_chunk += "\n\n```" + language.value + "\n"+ doc.page_content + "\n```\n\n"
10
11
  results.append(res_chunk)
11
12
  return results
@@ -5,7 +5,7 @@ from pydantic import create_model, BaseModel, ConfigDict, Field
5
5
  from .api_wrapper import SonarApiWrapper
6
6
  from ...base.tool import BaseAction
7
7
  from ...elitea_base import filter_missconfigured_index_tools
8
- from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
8
+ from ...utils import clean_string, get_max_toolkit_length
9
9
  from ....configurations.sonar import SonarConfiguration
10
10
 
11
11
  name = "sonar"
@@ -21,15 +21,13 @@ def get_tools(tool):
21
21
 
22
22
  class SonarToolkit(BaseToolkit):
23
23
  tools: list[BaseTool] = []
24
- toolkit_max_length: int = 0
25
24
 
26
25
  @staticmethod
27
26
  def toolkit_config_schema() -> BaseModel:
28
27
  selected_tools = {x['name']: x['args_schema'].schema() for x in SonarApiWrapper.model_construct().get_available_tools()}
29
- SonarToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
30
28
  return create_model(
31
29
  name,
32
- sonar_project_name=(str, Field(description="Project name of the desired repository", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': SonarToolkit.toolkit_max_length})),
30
+ sonar_project_name=(str, Field(description="Project name of the desired repository")),
33
31
  sonar_configuration=(SonarConfiguration, Field(description="Sonar Configuration", json_schema_extra={'configuration_types': ['sonar']})),
34
32
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
35
33
  __config__=ConfigDict(json_schema_extra=
@@ -55,14 +53,17 @@ class SonarToolkit(BaseToolkit):
55
53
  sonar_api_wrapper = SonarApiWrapper(**wrapper_payload)
56
54
  available_tools = sonar_api_wrapper.get_available_tools()
57
55
  tools = []
58
- prefix = clean_string(toolkit_name, SonarToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
59
56
  for tool in available_tools:
60
57
  if selected_tools and tool["name"] not in selected_tools:
61
58
  continue
59
+ description = tool["description"]
60
+ if toolkit_name:
61
+ description = f"Toolkit: {toolkit_name}\n{description}"
62
+ description = description[:1000]
62
63
  tools.append(BaseAction(
63
64
  api_wrapper=sonar_api_wrapper,
64
- name=prefix + tool["name"],
65
- description=tool["description"],
65
+ name=tool["name"],
66
+ description=description,
66
67
  args_schema=tool["args_schema"]
67
68
  ))
68
69
  return cls(tools=tools)
@@ -0,0 +1,199 @@
1
+ import ast
2
+ import fnmatch
3
+ import json
4
+ import logging
5
+ from typing import Optional, List, Generator
6
+
7
+ from langchain_core.documents import Document
8
+ from langchain_core.tools import ToolException
9
+ from pydantic import Field
10
+
11
+ from alita_sdk.tools.base_indexer_toolkit import BaseIndexerToolkit
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class CodeIndexerToolkit(BaseIndexerToolkit):
17
+ def _get_indexed_data(self, index_name: str):
18
+ self._ensure_vectorstore_initialized()
19
+ if not self.vector_adapter:
20
+ raise ToolException("Vector adapter is not initialized. "
21
+ "Check your configuration: embedding_model and vectorstore_type.")
22
+ return self.vector_adapter.get_code_indexed_data(self, index_name)
23
+
24
+ def key_fn(self, document: Document):
25
+ return document.metadata.get("filename")
26
+
27
+ def compare_fn(self, document: Document, idx_data):
28
+ return (document.metadata.get('commit_hash') and
29
+ idx_data.get('commit_hashes') and
30
+ document.metadata.get('commit_hash') in idx_data.get('commit_hashes')
31
+ )
32
+
33
+ def remove_ids_fn(self, idx_data, key: str):
34
+ return idx_data[key]['ids']
35
+
36
+ def _base_loader(
37
+ self,
38
+ branch: Optional[str] = None,
39
+ whitelist: Optional[List[str]] = None,
40
+ blacklist: Optional[List[str]] = None,
41
+ **kwargs) -> Generator[Document, None, None]:
42
+ """Index repository files in the vector store using code parsing."""
43
+ yield from self.loader(
44
+ branch=branch,
45
+ whitelist=whitelist,
46
+ blacklist=blacklist
47
+ )
48
+
49
+ def _extend_data(self, documents: Generator[Document, None, None]):
50
+ yield from documents
51
+
52
+ def _index_tool_params(self):
53
+ """Return the parameters for indexing data."""
54
+ return {
55
+ "branch": (Optional[str], Field(
56
+ description="Branch to index files from. Defaults to active branch if None.",
57
+ default=None)),
58
+ "whitelist": (Optional[List[str]], Field(
59
+ description='File extensions or paths to include. Defaults to all files if None. Example: ["*.md", "*.java"]',
60
+ default=None)),
61
+ "blacklist": (Optional[List[str]], Field(
62
+ description='File extensions or paths to exclude. Defaults to no exclusions if None. Example: ["*.md", "*.java"]',
63
+ default=None)),
64
+ }
65
+
66
+ def loader(self,
67
+ branch: Optional[str] = None,
68
+ whitelist: Optional[List[str]] = None,
69
+ blacklist: Optional[List[str]] = None,
70
+ chunked: bool = True) -> Generator[Document, None, None]:
71
+ """
72
+ Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
73
+
74
+ Parameters:
75
+ - branch (Optional[str]): Branch for listing files. Defaults to the current branch if None.
76
+ - whitelist (Optional[List[str]]): File extensions or paths to include. Defaults to all files if None.
77
+ - blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
78
+ - chunked (bool): If True (default), applies universal chunker based on file type.
79
+ If False, returns raw Documents without chunking.
80
+
81
+ Returns:
82
+ - generator: Yields Documents from files matching the whitelist but not the blacklist.
83
+ Each document has exactly the key 'filename' in metadata, which is used as an ID
84
+ for further operations (indexing, deduplication, and retrieval).
85
+
86
+ Example:
87
+ # Use 'feature-branch', include '.py' files, exclude 'test_' files
88
+ for doc in loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*']):
89
+ print(doc.page_content)
90
+
91
+ Notes:
92
+ - Whitelist and blacklist use Unix shell-style wildcards.
93
+ - Files must match the whitelist and not the blacklist to be included.
94
+ - Each document MUST have exactly the key 'filename' in metadata. This key is used as an ID
95
+ for further operations such as indexing, deduplication, and retrieval.
96
+ - When chunked=True:
97
+ - .md files → markdown chunker (header-based splitting)
98
+ - .py/.js/.ts/etc → code parser (TreeSitter-based)
99
+ - .json files → JSON chunker
100
+ - other files → default text chunker
101
+ """
102
+ import hashlib
103
+
104
+ _files = self.__handle_get_files("", self.__get_branch(branch))
105
+ self._log_tool_event(message="Listing files in branch", tool_name="loader")
106
+ logger.info(f"Files in branch: {_files}")
107
+
108
+ def is_whitelisted(file_path: str) -> bool:
109
+ if whitelist:
110
+ return (any(fnmatch.fnmatch(file_path, pattern) for pattern in whitelist)
111
+ or any(file_path.endswith(f'.{pattern}') for pattern in whitelist))
112
+ return True
113
+
114
+ def is_blacklisted(file_path: str) -> bool:
115
+ if blacklist:
116
+ return (any(fnmatch.fnmatch(file_path, pattern) for pattern in blacklist)
117
+ or any(file_path.endswith(f'.{pattern}') for pattern in blacklist))
118
+ return False
119
+
120
+ def raw_document_generator() -> Generator[Document, None, None]:
121
+ """Yields raw Documents without chunking."""
122
+ self._log_tool_event(message="Reading the files", tool_name="loader")
123
+ total_files = len(_files)
124
+ processed = 0
125
+
126
+ for idx, file in enumerate(_files, 1):
127
+ if is_whitelisted(file) and not is_blacklisted(file):
128
+ try:
129
+ file_content = self._read_file(file, self.__get_branch(branch))
130
+ except Exception as e:
131
+ logger.error(f"Failed to read file {file}: {e}")
132
+ continue
133
+
134
+ if not file_content:
135
+ continue
136
+
137
+ # Ensure file content is a string
138
+ if isinstance(file_content, bytes):
139
+ file_content = file_content.decode("utf-8", errors="ignore")
140
+ elif isinstance(file_content, dict) and file.endswith('.json'):
141
+ file_content = json.dumps(file_content)
142
+ elif not isinstance(file_content, str):
143
+ file_content = str(file_content)
144
+
145
+ # Hash the file content for uniqueness tracking
146
+ file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
147
+ processed += 1
148
+
149
+ yield Document(
150
+ page_content=file_content,
151
+ metadata={
152
+ 'file_path': file,
153
+ 'filename': file,
154
+ 'source': file,
155
+ 'commit_hash': file_hash,
156
+ }
157
+ )
158
+
159
+ if idx % 10 == 0 or idx == total_files:
160
+ self._log_tool_event(
161
+ message=f"{idx} out of {total_files} files checked, {processed} matched",
162
+ tool_name="loader"
163
+ )
164
+
165
+ self._log_tool_event(message=f"{processed} files loaded", tool_name="loader")
166
+
167
+ if not chunked:
168
+ # Return raw documents without chunking
169
+ return raw_document_generator()
170
+
171
+ # Apply universal chunker based on file type
172
+ from .chunkers.universal_chunker import universal_chunker
173
+ return universal_chunker(raw_document_generator())
174
+
175
+ def __handle_get_files(self, path: str, branch: str):
176
+ """
177
+ Handles the retrieval of files from a specific path and branch.
178
+ This method should be implemented in subclasses to provide the actual file retrieval logic.
179
+ """
180
+ _files = self._get_files(path=path, branch=branch)
181
+ if isinstance(_files, str):
182
+ try:
183
+ # Attempt to convert the string to a list using ast.literal_eval
184
+ _files = ast.literal_eval(_files)
185
+ # Ensure that the result is actually a list of strings
186
+ if not isinstance(_files, list) or not all(isinstance(item, str) for item in _files):
187
+ raise ValueError("The evaluated result is not a list of strings")
188
+ except (SyntaxError, ValueError):
189
+ # Handle the case where the string cannot be converted to a list
190
+ raise ValueError("Expected a list of strings, but got a string that cannot be converted")
191
+
192
+ # Ensure _files is a list of strings
193
+ if not isinstance(_files, list) or not all(isinstance(item, str) for item in _files):
194
+ raise ValueError("Expected a list of strings")
195
+ return _files
196
+
197
+ def __get_branch(self, branch):
198
+ return (branch or getattr(self, 'active_branch', None)
199
+ or getattr(self, '_active_branch', None) or getattr(self, 'branch', None))
@@ -6,7 +6,7 @@ from ..base.tool import BaseAction
6
6
  from pydantic import create_model, BaseModel, ConfigDict, Field
7
7
 
8
8
  from ..elitea_base import filter_missconfigured_index_tools
9
- from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, parse_list, check_connection_response
9
+ from ..utils import clean_string, get_max_toolkit_length, parse_list, check_connection_response
10
10
  from ...configurations.confluence import ConfluenceConfiguration
11
11
  from ...configurations.pgvector import PgVectorConfiguration
12
12
  import requests
@@ -38,13 +38,11 @@ def get_tools(tool):
38
38
 
39
39
  class ConfluenceToolkit(BaseToolkit):
40
40
  tools: List[BaseTool] = []
41
- toolkit_max_length: int = 0
42
41
 
43
42
  @staticmethod
44
43
  def toolkit_config_schema() -> BaseModel:
45
44
  selected_tools = {x['name']: x['args_schema'].schema() for x in
46
45
  ConfluenceAPIWrapper.model_construct().get_available_tools()}
47
- ConfluenceToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
48
46
 
49
47
  @check_connection_response
50
48
  def check_connection(self):
@@ -67,8 +65,7 @@ class ConfluenceToolkit(BaseToolkit):
67
65
 
68
66
  model = create_model(
69
67
  name,
70
- space=(str, Field(description="Space", json_schema_extra={'toolkit_name': True,
71
- 'max_toolkit_length': ConfluenceToolkit.toolkit_max_length})),
68
+ space=(str, Field(description="Space")),
72
69
  cloud=(bool, Field(description="Hosting Option", json_schema_extra={'configuration': True})),
73
70
  limit=(int, Field(description="Pages limit per request", default=5)),
74
71
  labels=(Optional[str], Field(
@@ -115,17 +112,21 @@ class ConfluenceToolkit(BaseToolkit):
115
112
  **(kwargs.get('pgvector_configuration') or {}),
116
113
  }
117
114
  confluence_api_wrapper = ConfluenceAPIWrapper(**wrapper_payload)
118
- prefix = clean_string(toolkit_name, ConfluenceToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
119
115
  available_tools = confluence_api_wrapper.get_available_tools()
120
116
  tools = []
121
117
  for tool in available_tools:
122
118
  if selected_tools:
123
119
  if tool["name"] not in selected_tools:
124
120
  continue
121
+ description = tool["description"]
122
+ if toolkit_name:
123
+ description = f"Toolkit: {toolkit_name}\n{description}"
124
+ description = f"Confluence space: {confluence_api_wrapper.space}\n{description}"
125
+ description = description[:1000]
125
126
  tools.append(BaseAction(
126
127
  api_wrapper=confluence_api_wrapper,
127
- name=prefix + tool["name"],
128
- description=f"Confluence space: {confluence_api_wrapper.space}" + tool["description"],
128
+ name=tool["name"],
129
+ description=description,
129
130
  args_schema=tool["args_schema"]
130
131
  ))
131
132
  return cls(tools=tools)