alita-sdk 0.3.532__py3-none-any.whl → 0.3.602__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (137) hide show
  1. alita_sdk/cli/agent_executor.py +2 -1
  2. alita_sdk/cli/agent_loader.py +34 -4
  3. alita_sdk/cli/agents.py +433 -203
  4. alita_sdk/community/__init__.py +8 -4
  5. alita_sdk/configurations/__init__.py +1 -0
  6. alita_sdk/configurations/openapi.py +323 -0
  7. alita_sdk/runtime/clients/client.py +165 -7
  8. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  9. alita_sdk/runtime/langchain/assistant.py +61 -11
  10. alita_sdk/runtime/langchain/constants.py +419 -171
  11. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -2
  12. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
  13. alita_sdk/runtime/langchain/langraph_agent.py +108 -23
  14. alita_sdk/runtime/langchain/utils.py +76 -14
  15. alita_sdk/runtime/skills/__init__.py +91 -0
  16. alita_sdk/runtime/skills/callbacks.py +498 -0
  17. alita_sdk/runtime/skills/discovery.py +540 -0
  18. alita_sdk/runtime/skills/executor.py +610 -0
  19. alita_sdk/runtime/skills/input_builder.py +371 -0
  20. alita_sdk/runtime/skills/models.py +330 -0
  21. alita_sdk/runtime/skills/registry.py +355 -0
  22. alita_sdk/runtime/skills/skill_runner.py +330 -0
  23. alita_sdk/runtime/toolkits/__init__.py +5 -0
  24. alita_sdk/runtime/toolkits/artifact.py +2 -1
  25. alita_sdk/runtime/toolkits/mcp.py +6 -3
  26. alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
  27. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  28. alita_sdk/runtime/toolkits/tools.py +139 -10
  29. alita_sdk/runtime/toolkits/vectorstore.py +1 -1
  30. alita_sdk/runtime/tools/__init__.py +3 -1
  31. alita_sdk/runtime/tools/artifact.py +15 -0
  32. alita_sdk/runtime/tools/data_analysis.py +183 -0
  33. alita_sdk/runtime/tools/llm.py +260 -73
  34. alita_sdk/runtime/tools/loop.py +3 -1
  35. alita_sdk/runtime/tools/loop_output.py +3 -1
  36. alita_sdk/runtime/tools/mcp_server_tool.py +6 -3
  37. alita_sdk/runtime/tools/router.py +2 -4
  38. alita_sdk/runtime/tools/sandbox.py +9 -6
  39. alita_sdk/runtime/tools/skill_router.py +776 -0
  40. alita_sdk/runtime/tools/tool.py +3 -1
  41. alita_sdk/runtime/tools/vectorstore.py +7 -2
  42. alita_sdk/runtime/tools/vectorstore_base.py +7 -2
  43. alita_sdk/runtime/utils/constants.py +5 -1
  44. alita_sdk/runtime/utils/mcp_client.py +1 -1
  45. alita_sdk/runtime/utils/mcp_sse_client.py +1 -1
  46. alita_sdk/runtime/utils/toolkit_utils.py +2 -0
  47. alita_sdk/tools/__init__.py +44 -2
  48. alita_sdk/tools/ado/repos/__init__.py +26 -8
  49. alita_sdk/tools/ado/repos/repos_wrapper.py +78 -52
  50. alita_sdk/tools/ado/test_plan/__init__.py +3 -2
  51. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
  52. alita_sdk/tools/ado/utils.py +1 -18
  53. alita_sdk/tools/ado/wiki/__init__.py +2 -1
  54. alita_sdk/tools/ado/wiki/ado_wrapper.py +23 -1
  55. alita_sdk/tools/ado/work_item/__init__.py +3 -2
  56. alita_sdk/tools/ado/work_item/ado_wrapper.py +56 -3
  57. alita_sdk/tools/advanced_jira_mining/__init__.py +2 -1
  58. alita_sdk/tools/aws/delta_lake/__init__.py +2 -1
  59. alita_sdk/tools/azure_ai/search/__init__.py +2 -1
  60. alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
  61. alita_sdk/tools/base_indexer_toolkit.py +51 -30
  62. alita_sdk/tools/bitbucket/__init__.py +2 -1
  63. alita_sdk/tools/bitbucket/api_wrapper.py +1 -1
  64. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +3 -3
  65. alita_sdk/tools/browser/__init__.py +1 -1
  66. alita_sdk/tools/carrier/__init__.py +1 -1
  67. alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
  68. alita_sdk/tools/cloud/aws/__init__.py +2 -1
  69. alita_sdk/tools/cloud/azure/__init__.py +2 -1
  70. alita_sdk/tools/cloud/gcp/__init__.py +2 -1
  71. alita_sdk/tools/cloud/k8s/__init__.py +2 -1
  72. alita_sdk/tools/code/linter/__init__.py +2 -1
  73. alita_sdk/tools/code/sonar/__init__.py +2 -1
  74. alita_sdk/tools/code_indexer_toolkit.py +19 -2
  75. alita_sdk/tools/confluence/__init__.py +7 -6
  76. alita_sdk/tools/confluence/api_wrapper.py +7 -8
  77. alita_sdk/tools/confluence/loader.py +4 -2
  78. alita_sdk/tools/custom_open_api/__init__.py +2 -1
  79. alita_sdk/tools/elastic/__init__.py +2 -1
  80. alita_sdk/tools/elitea_base.py +28 -9
  81. alita_sdk/tools/figma/__init__.py +52 -6
  82. alita_sdk/tools/figma/api_wrapper.py +1158 -123
  83. alita_sdk/tools/figma/figma_client.py +73 -0
  84. alita_sdk/tools/figma/toon_tools.py +2748 -0
  85. alita_sdk/tools/github/__init__.py +2 -1
  86. alita_sdk/tools/github/github_client.py +56 -92
  87. alita_sdk/tools/github/schemas.py +4 -4
  88. alita_sdk/tools/gitlab/__init__.py +2 -1
  89. alita_sdk/tools/gitlab/api_wrapper.py +118 -38
  90. alita_sdk/tools/gitlab_org/__init__.py +2 -1
  91. alita_sdk/tools/gitlab_org/api_wrapper.py +60 -62
  92. alita_sdk/tools/google/bigquery/__init__.py +2 -1
  93. alita_sdk/tools/google_places/__init__.py +2 -1
  94. alita_sdk/tools/jira/__init__.py +2 -1
  95. alita_sdk/tools/keycloak/__init__.py +2 -1
  96. alita_sdk/tools/localgit/__init__.py +2 -1
  97. alita_sdk/tools/memory/__init__.py +1 -1
  98. alita_sdk/tools/ocr/__init__.py +2 -1
  99. alita_sdk/tools/openapi/__init__.py +490 -118
  100. alita_sdk/tools/openapi/api_wrapper.py +1368 -0
  101. alita_sdk/tools/openapi/tool.py +20 -0
  102. alita_sdk/tools/pandas/__init__.py +11 -5
  103. alita_sdk/tools/pandas/api_wrapper.py +38 -25
  104. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  105. alita_sdk/tools/postman/__init__.py +2 -1
  106. alita_sdk/tools/pptx/__init__.py +2 -1
  107. alita_sdk/tools/qtest/__init__.py +21 -2
  108. alita_sdk/tools/qtest/api_wrapper.py +430 -13
  109. alita_sdk/tools/rally/__init__.py +2 -1
  110. alita_sdk/tools/rally/api_wrapper.py +1 -1
  111. alita_sdk/tools/report_portal/__init__.py +2 -1
  112. alita_sdk/tools/salesforce/__init__.py +2 -1
  113. alita_sdk/tools/servicenow/__init__.py +11 -10
  114. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  115. alita_sdk/tools/sharepoint/__init__.py +2 -1
  116. alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
  117. alita_sdk/tools/slack/__init__.py +3 -2
  118. alita_sdk/tools/slack/api_wrapper.py +2 -2
  119. alita_sdk/tools/sql/__init__.py +3 -2
  120. alita_sdk/tools/testio/__init__.py +2 -1
  121. alita_sdk/tools/testrail/__init__.py +2 -1
  122. alita_sdk/tools/utils/content_parser.py +77 -3
  123. alita_sdk/tools/utils/text_operations.py +163 -71
  124. alita_sdk/tools/xray/__init__.py +3 -2
  125. alita_sdk/tools/yagmail/__init__.py +2 -1
  126. alita_sdk/tools/zephyr/__init__.py +2 -1
  127. alita_sdk/tools/zephyr_enterprise/__init__.py +2 -1
  128. alita_sdk/tools/zephyr_essential/__init__.py +2 -1
  129. alita_sdk/tools/zephyr_scale/__init__.py +3 -2
  130. alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
  131. alita_sdk/tools/zephyr_squad/__init__.py +2 -1
  132. {alita_sdk-0.3.532.dist-info → alita_sdk-0.3.602.dist-info}/METADATA +7 -6
  133. {alita_sdk-0.3.532.dist-info → alita_sdk-0.3.602.dist-info}/RECORD +137 -119
  134. {alita_sdk-0.3.532.dist-info → alita_sdk-0.3.602.dist-info}/WHEEL +0 -0
  135. {alita_sdk-0.3.532.dist-info → alita_sdk-0.3.602.dist-info}/entry_points.txt +0 -0
  136. {alita_sdk-0.3.532.dist-info → alita_sdk-0.3.602.dist-info}/licenses/LICENSE +0 -0
  137. {alita_sdk-0.3.532.dist-info → alita_sdk-0.3.602.dist-info}/top_level.txt +0 -0
@@ -127,7 +127,29 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
127
127
  cls._core_client = connection.clients_v7_1.get_core_client()
128
128
 
129
129
  except Exception as e:
130
- return ImportError(f"Failed to connect to Azure DevOps: {e}")
130
+ error_msg = str(e).lower()
131
+ if "expired" in error_msg or "token" in error_msg and ("invalid" in error_msg or "unauthorized" in error_msg):
132
+ raise ValueError(
133
+ "Azure DevOps connection failed: Your access token has expired or is invalid. "
134
+ "Please refresh your token in the toolkit configuration."
135
+ )
136
+ elif "401" in error_msg or "unauthorized" in error_msg:
137
+ raise ValueError(
138
+ "Azure DevOps connection failed: Authentication failed. "
139
+ "Please check your credentials in the toolkit configuration."
140
+ )
141
+ elif "404" in error_msg or "not found" in error_msg:
142
+ raise ValueError(
143
+ "Azure DevOps connection failed: Organization or project not found. "
144
+ "Please verify your organization URL and project name."
145
+ )
146
+ elif "timeout" in error_msg or "timed out" in error_msg:
147
+ raise ValueError(
148
+ "Azure DevOps connection failed: Connection timed out. "
149
+ "Please check your network connection and try again."
150
+ )
151
+ else:
152
+ raise ValueError(f"Azure DevOps connection failed: {e}")
131
153
 
132
154
  return super().validate_toolkit(values)
133
155
 
@@ -576,9 +598,40 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
576
598
  return b"".join(content_generator)
577
599
 
578
600
  def _process_document(self, document: Document) -> Generator[Document, None, None]:
579
- for attachment_id, file_name in document.metadata.get('attachment_ids', {}).items():
601
+ raw_attachment_ids = document.metadata.get('attachment_ids', {})
602
+
603
+ # Normalize attachment_ids: accept dict or JSON string, raise otherwise
604
+ if isinstance(raw_attachment_ids, str):
605
+ try:
606
+ loaded = json.loads(raw_attachment_ids)
607
+ except json.JSONDecodeError:
608
+ raise TypeError(
609
+ f"Expected dict or JSON string for 'attachment_ids', got non-JSON string for id="
610
+ f"{document.metadata.get('id')}: {raw_attachment_ids!r}"
611
+ )
612
+ if not isinstance(loaded, dict):
613
+ raise TypeError(
614
+ f"'attachment_ids' JSON did not decode to dict for id={document.metadata.get('id')}: {loaded!r}"
615
+ )
616
+ attachment_ids = loaded
617
+ elif isinstance(raw_attachment_ids, dict):
618
+ attachment_ids = raw_attachment_ids
619
+ else:
620
+ raise TypeError(
621
+ f"Expected 'attachment_ids' to be dict or JSON string, got {type(raw_attachment_ids)} "
622
+ f"for id={document.metadata.get('id')}: {raw_attachment_ids!r}"
623
+ )
624
+
625
+ for attachment_id, file_name in attachment_ids.items():
580
626
  content = self.get_attachment_content(attachment_id=attachment_id)
581
- yield Document(page_content="", metadata={'id': attachment_id, IndexerKeywords.CONTENT_FILE_NAME.value: file_name, IndexerKeywords.CONTENT_IN_BYTES.value: content})
627
+ yield Document(
628
+ page_content="",
629
+ metadata={
630
+ 'id': attachment_id,
631
+ IndexerKeywords.CONTENT_FILE_NAME.value: file_name,
632
+ IndexerKeywords.CONTENT_IN_BYTES.value: content,
633
+ },
634
+ )
582
635
 
583
636
  def _index_tool_params(self):
584
637
  """Return the parameters for indexing data."""
@@ -7,6 +7,7 @@ from .data_mining_wrapper import AdvancedJiraMiningWrapper
7
7
  from ..base.tool import BaseAction
8
8
  from ..elitea_base import filter_missconfigured_index_tools
9
9
  from ..utils import clean_string, get_max_toolkit_length
10
+ from ...runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
10
11
 
11
12
  name = "advanced_jira_mining"
12
13
 
@@ -78,7 +79,7 @@ class AdvancedJiraMiningToolkit(BaseToolkit):
78
79
  name=tool["name"],
79
80
  description=description,
80
81
  args_schema=tool["args_schema"],
81
- metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
82
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
82
83
  ))
83
84
  return cls(tools=tools)
84
85
 
@@ -9,6 +9,7 @@ from alita_sdk.configurations.delta_lake import DeltaLakeConfiguration
9
9
  from ...utils import clean_string, get_max_toolkit_length
10
10
  from .api_wrapper import DeltaLakeApiWrapper
11
11
  from .tool import DeltaLakeAction
12
+ from ....runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
12
13
 
13
14
  name = "delta_lake"
14
15
 
@@ -126,7 +127,7 @@ class DeltaLakeToolkit(BaseToolkit):
126
127
  name=t["name"],
127
128
  description=description,
128
129
  args_schema=t["args_schema"],
129
- metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
130
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: t["name"]} if toolkit_name else {TOOL_NAME_META: t["name"]}
130
131
  )
131
132
  )
132
133
  return instance
@@ -9,6 +9,7 @@ from ...elitea_base import filter_missconfigured_index_tools
9
9
  from ...utils import clean_string, get_max_toolkit_length, check_connection_response
10
10
  from ....configurations.azure_search import AzureSearchConfiguration
11
11
  import requests
12
+ from ....runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
12
13
 
13
14
  logger = getLogger(__name__)
14
15
 
@@ -91,7 +92,7 @@ class AzureSearchToolkit(BaseToolkit):
91
92
  name=tool["name"],
92
93
  description=description,
93
94
  args_schema=tool["args_schema"],
94
- metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
95
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
95
96
  ))
96
97
  return cls(tools=tools)
97
98
 
@@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
11
11
 
12
12
  class AzureSearchInput(BaseModel):
13
13
  search_text: str = Field(..., description="The text to search for in the Azure Search index.")
14
- limit: int = Field(10, description="The number of results to return.")
14
+ limit: int = Field(10, description="The number of results to return.", gt=0)
15
15
  selected_fields: Optional[List[str]] = Field(None, description="The fields to retrieve from the document.")
16
16
 
17
17
  class AzureDocumentInput(BaseModel):
@@ -29,12 +29,6 @@ class IndexTools(str, Enum):
29
29
  REMOVE_INDEX = "remove_index"
30
30
  LIST_COLLECTIONS = "list_collections"
31
31
 
32
- # Base Vector Store Schema Models
33
- BaseIndexParams = create_model(
34
- "BaseIndexParams",
35
- index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
36
- )
37
-
38
32
  RemoveIndexParams = create_model(
39
33
  "RemoveIndexParams",
40
34
  index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
@@ -52,7 +46,7 @@ BaseSearchParams = create_model(
52
46
  examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
53
47
  )),
54
48
  cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
55
- search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
49
+ search_top=(Optional[int], Field(description="Number of top results to return", default=10, gt=0)),
56
50
  full_text_search=(Optional[Dict[str, Any]], Field(
57
51
  description="Full text search parameters. Can be a dictionary with search options.",
58
52
  default=None
@@ -82,7 +76,7 @@ BaseStepbackSearchParams = create_model(
82
76
  examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
83
77
  )),
84
78
  cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
85
- search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
79
+ search_top=(Optional[int], Field(description="Number of top results to return", default=10, gt=0)),
86
80
  full_text_search=(Optional[Dict[str, Any]], Field(
87
81
  description="Full text search parameters. Can be a dictionary with search options.",
88
82
  default=None
@@ -101,16 +95,6 @@ BaseStepbackSearchParams = create_model(
101
95
  )),
102
96
  )
103
97
 
104
- BaseIndexDataParams = create_model(
105
- "indexData",
106
- __base__=BaseIndexParams,
107
- clean_index=(Optional[bool], Field(default=False,
108
- description="Optional flag to enforce clean existing index before indexing new data")),
109
- progress_step=(Optional[int], Field(default=10, ge=0, le=100,
110
- description="Optional step size for progress reporting during indexing")),
111
- chunking_config=(Optional[dict], Field(description="Chunking tool configuration", default=loaders_allowed_to_override)),
112
- )
113
-
114
98
 
115
99
  class BaseIndexerToolkit(VectorStoreWrapperBase):
116
100
  """Base class for tool API wrappers that support vector store functionality."""
@@ -202,7 +186,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
202
186
  #
203
187
  results_count = result["count"]
204
188
  # Final update should always be forced
205
- self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count, update_force=True)
189
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count, update_force=True, error=None)
206
190
  self._emit_index_event(index_name)
207
191
  #
208
192
  return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
@@ -211,8 +195,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
211
195
  # Do maximum effort at least send custom event for supposed changed status
212
196
  msg = str(e)
213
197
  try:
214
- # Error update should also be forced
215
- self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"], update_force=True)
198
+ # Error update should also be forced and include the error message
199
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"], update_force=True, error=msg)
216
200
  except Exception as ie:
217
201
  logger.error(f"Failed to update index meta status to FAILED for index '{index_name}': {ie}")
218
202
  msg = f"{msg}; additionally failed to update index meta status to FAILED: {ie}"
@@ -236,7 +220,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
236
220
  self._log_tool_event(f"Dependent documents were processed. "
237
221
  f"Applying chunking tool '{chunking_tool}' if specified and preparing documents for indexing...")
238
222
  documents = self._apply_loaders_chunkers(documents, chunking_tool, chunking_config)
239
- self._clean_metadata(documents)
223
+ documents = self._clean_metadata(documents)
240
224
 
241
225
  logger.debug(f"Indexing base document #{base_doc_counter}: {base_doc} and all dependent documents: {documents}")
242
226
 
@@ -521,12 +505,14 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
521
505
  "task_id": None,
522
506
  "conversation_id": None,
523
507
  "toolkit_id": self.toolkit_id,
508
+ # Initialize error field to keep track of the latest failure reason if any
509
+ "error": None,
524
510
  }
525
511
  metadata["history"] = json.dumps([metadata])
526
512
  index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
527
513
  add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
528
514
 
529
- def index_meta_update(self, index_name: str, state: str, result: int, update_force: bool = True, interval: Optional[float] = None):
515
+ def index_meta_update(self, index_name: str, state: str, result: int, update_force: bool = True, interval: Optional[float] = None, error: Optional[str] = None):
530
516
  """Update `index_meta` document with optional time-based throttling.
531
517
 
532
518
  Args:
@@ -538,6 +524,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
538
524
  interval: Optional custom interval (in seconds) for this call when `update_force` is `False`.
539
525
  If `None`, falls back to the value stored in `self._index_meta_config["update_interval"]`
540
526
  if present, otherwise uses `INDEX_META_UPDATE_INTERVAL`.
527
+ error: Optional error message to record when the state represents a failed index.
541
528
  """
542
529
  self._ensure_vectorstore_initialized()
543
530
  if not hasattr(self, "_index_meta_last_update_time"):
@@ -576,6 +563,12 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
576
563
  metadata["updated"] = result
577
564
  metadata["state"] = state
578
565
  metadata["updated_on"] = time.time()
566
+ # Attach error if provided, else clear on success
567
+ if error is not None:
568
+ metadata["error"] = error
569
+ elif state == IndexerKeywords.INDEX_META_COMPLETED.value:
570
+ # Clear previous error on successful completion
571
+ metadata["error"] = None
579
572
  #
580
573
  history_raw = metadata.pop("history", "[]")
581
574
  try:
@@ -670,21 +663,49 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
670
663
  """
671
664
  Returns the standardized vector search tools (search operations only).
672
665
  Index operations are toolkit-specific and should be added manually to each toolkit.
673
-
666
+
667
+ This method constructs the argument schemas for each tool, merging base parameters with any extra parameters
668
+ defined in the subclass. It also handles the special case for chunking tools and their configuration.
669
+
674
670
  Returns:
675
- List of tool dictionaries with name, ref, description, and args_schema
671
+ list: List of tool dictionaries with name, ref, description, and args_schema.
676
672
  """
673
+ index_params = {
674
+ "index_name": (
675
+ str,
676
+ Field(description="Index name (max 7 characters)", min_length=1, max_length=7)
677
+ ),
678
+ "clean_index": (
679
+ Optional[bool],
680
+ Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")
681
+ ),
682
+ "progress_step": (
683
+ Optional[int],
684
+ Field(default=10, ge=0, le=100, description="Optional step size for progress reporting during indexing")
685
+ ),
686
+ }
687
+ chunking_config = (
688
+ Optional[dict],
689
+ Field(description="Chunking tool configuration", default=loaders_allowed_to_override)
690
+ )
691
+
692
+ index_extra_params = self._index_tool_params() or {}
693
+ chunking_tool = index_extra_params.pop("chunking_tool", None)
694
+ if chunking_tool:
695
+ index_params = {
696
+ **index_params,
697
+ "chunking_tool": chunking_tool,
698
+ }
699
+ index_params["chunking_config"] = chunking_config
700
+ index_args_schema = create_model("IndexData", **index_params, **index_extra_params)
701
+
677
702
  return [
678
703
  {
679
704
  "name": IndexTools.INDEX_DATA.value,
680
705
  "mode": IndexTools.INDEX_DATA.value,
681
706
  "ref": self.index_data,
682
707
  "description": "Loads data to index.",
683
- "args_schema": create_model(
684
- "IndexData",
685
- __base__=BaseIndexDataParams,
686
- **self._index_tool_params() if self._index_tool_params() else {}
687
- )
708
+ "args_schema": index_args_schema,
688
709
  },
689
710
  {
690
711
  "name": IndexTools.SEARCH_INDEX.value,
@@ -13,6 +13,7 @@ from ..utils import clean_string, get_max_toolkit_length, check_connection_respo
13
13
  from ...configurations.bitbucket import BitbucketConfiguration
14
14
  from ...configurations.pgvector import PgVectorConfiguration
15
15
  import requests
16
+ from ...runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
16
17
 
17
18
 
18
19
  name = "bitbucket"
@@ -114,7 +115,7 @@ class AlitaBitbucketToolkit(BaseToolkit):
114
115
  name=tool["name"],
115
116
  description=description,
116
117
  args_schema=tool["args_schema"],
117
- metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
118
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
118
119
  ))
119
120
  return cls(tools=tools)
120
121
 
@@ -57,7 +57,7 @@ SetActiveBranchModel = create_model(
57
57
 
58
58
  ListBranchesInRepoModel = create_model(
59
59
  "ListBranchesInRepoModel",
60
- limit=(Optional[int], Field(default=20, description="Maximum number of branches to return. If not provided, all branches will be returned.")),
60
+ limit=(Optional[int], Field(default=20, description="Maximum number of branches to return. If not provided, all branches will be returned.", gt=0)),
61
61
  branch_wildcard=(Optional[str], Field(default=None, description="Wildcard pattern to filter branches by name. If not provided, all branches will be returned."))
62
62
  )
63
63
 
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING, Any, Dict, List
8
8
  from atlassian.bitbucket import Bitbucket, Cloud
9
9
  from langchain_core.tools import ToolException
10
10
  from requests import Response
11
- from ..ado.utils import extract_old_new_pairs
11
+ from ..utils.text_operations import parse_old_new_markers
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
  logging.basicConfig(level=logging.DEBUG)
@@ -145,7 +145,7 @@ class BitbucketServerApi(BitbucketApiAbstract):
145
145
  def update_file(self, file_path: str, update_query: str, branch: str) -> str:
146
146
  file_content = self.get_file(file_path=file_path, branch=branch)
147
147
  updated_file_content = file_content
148
- for old, new in extract_old_new_pairs(update_query):
148
+ for old, new in parse_old_new_markers(update_query):
149
149
  if not old.strip():
150
150
  continue
151
151
  updated_file_content = updated_file_content.replace(old, new)
@@ -319,7 +319,7 @@ class BitbucketCloudApi(BitbucketApiAbstract):
319
319
 
320
320
  file_content = self.get_file(file_path=file_path, branch=branch)
321
321
  updated_file_content = file_content
322
- for old, new in extract_old_new_pairs(file_query=update_query):
322
+ for old, new in parse_old_new_markers(file_query=update_query):
323
323
  if not old.strip():
324
324
  continue
325
325
  updated_file_content = updated_file_content.replace(old, new)
@@ -128,7 +128,7 @@ class BrowserToolkit(BaseToolkit):
128
128
  if toolkit_name:
129
129
  tool_entry.description = f"{tool_entry.description}\nToolkit: {toolkit_name}"
130
130
  tool_entry.description = tool_entry.description[:1000]
131
- tool_entry.metadata = {"toolkit_name": toolkit_name}
131
+ tool_entry.metadata = {"toolkit_name": toolkit_name, "toolkit_type": name}
132
132
  tools.append(tool_entry)
133
133
  return cls(tools=tools)
134
134
 
@@ -77,7 +77,7 @@ class AlitaCarrierToolkit(BaseToolkit):
77
77
  if toolkit_name:
78
78
  tool_instance.description = f"{tool_instance.description}\nToolkit: {toolkit_name}"
79
79
  tool_instance.description = tool_instance.description[:1000]
80
- tool_instance.metadata = {"toolkit_name": toolkit_name}
80
+ tool_instance.metadata = {"toolkit_name": toolkit_name, "toolkit_type": name}
81
81
  tools.append(tool_instance)
82
82
  logger.info(f"[AlitaCarrierToolkit] Successfully initialized tool '{tool_instance.name}'")
83
83
  except Exception as e:
@@ -40,25 +40,49 @@ class Treesitter(ABC):
40
40
  return TreesitterRegistry.create_treesitter(language)
41
41
 
42
42
  def parse(self, file_bytes: bytes) -> list[TreesitterMethodNode]:
43
- """
44
- Parses the given file bytes and extracts method nodes.
43
+ """Parses the given file bytes and extracts method nodes.
44
+
45
+ If no nodes matching the configured ``method_declaration_identifier`` are
46
+ found, a single fallback node spanning the entire file is returned so
47
+ that callers always receive at least one ``TreesitterMethodNode``.
45
48
 
46
49
  Args:
47
50
  file_bytes (bytes): The content of the file to be parsed.
48
51
 
49
52
  Returns:
50
- list[TreesitterMethodNode]: A list of TreesitterMethodNode objects representing the methods in the file.
53
+ list[TreesitterMethodNode]: A list of TreesitterMethodNode objects
54
+ representing the methods in the file, or a single fallback node
55
+ covering the whole file when no methods are detected.
51
56
  """
52
57
  self.tree = self.parser.parse(file_bytes)
53
- result = []
54
58
  methods = self._query_all_methods(self.tree.root_node)
55
- for method in methods:
56
- method_name = self._query_method_name(method["method"])
57
- doc_comment = method["doc_comment"]
58
- result.append(
59
- TreesitterMethodNode(method_name, doc_comment, None, method["method"])
59
+
60
+ # Normal path: at least one method node was found.
61
+ if methods:
62
+ result: list[TreesitterMethodNode] = []
63
+ for method in methods:
64
+ method_name = self._query_method_name(method["method"])
65
+ doc_comment = method["doc_comment"]
66
+ result.append(
67
+ TreesitterMethodNode(
68
+ method_name, doc_comment, None, method["method"]
69
+ )
70
+ )
71
+ return result
72
+
73
+ # Fallback path: no method nodes were found. Return a single node that
74
+ # spans the entire file so that callers can still index/summarize the
75
+ # content even when the language-specific patterns do not match.
76
+ full_source = file_bytes.decode(errors="replace")
77
+ fallback_node = self.tree.root_node
78
+ return [
79
+ TreesitterMethodNode(
80
+ name=None,
81
+ doc_comment=None,
82
+ method_source_code=full_source,
83
+ node=fallback_node,
60
84
  )
61
- return result
85
+ ]
62
86
 
63
87
  def _query_all_methods(
64
88
  self,
@@ -71,7 +95,8 @@ class Treesitter(ABC):
71
95
  node (tree_sitter.Node): The root node to start the query from.
72
96
 
73
97
  Returns:
74
- list: A list of dictionaries, each containing a method node and its associated doc comment (if any).
98
+ list: A list of dictionaries, each containing a method node and its
99
+ associated doc comment (if any).
75
100
  """
76
101
  methods = []
77
102
  if node.type == self.method_declaration_identifier:
@@ -88,8 +113,7 @@ class Treesitter(ABC):
88
113
  return methods
89
114
 
90
115
  def _query_method_name(self, node: tree_sitter.Node):
91
- """
92
- Queries the method name from the given syntax tree node.
116
+ """Queries the method name from the given syntax tree node.
93
117
 
94
118
  Args:
95
119
  node (tree_sitter.Node): The syntax tree node to query.
@@ -7,6 +7,7 @@ from .api_wrapper import AWSToolConfig
7
7
  from ...base.tool import BaseAction
8
8
  from ...elitea_base import filter_missconfigured_index_tools
9
9
  from ...utils import clean_string, get_max_toolkit_length
10
+ from ....runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
10
11
 
11
12
  name = "aws"
12
13
 
@@ -64,7 +65,7 @@ class AWSToolkit(BaseToolkit):
64
65
  name=tool["name"],
65
66
  description=description,
66
67
  args_schema=tool["args_schema"],
67
- metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
68
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
68
69
  ))
69
70
  return cls(tools=tools)
70
71
 
@@ -7,6 +7,7 @@ from .api_wrapper import AzureApiWrapper
7
7
  from ...base.tool import BaseAction
8
8
  from ...elitea_base import filter_missconfigured_index_tools
9
9
  from ...utils import clean_string, get_max_toolkit_length
10
+ from ....runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
10
11
 
11
12
  name = "azure"
12
13
 
@@ -57,7 +58,7 @@ class AzureToolkit(BaseToolkit):
57
58
  name=tool["name"],
58
59
  description=description,
59
60
  args_schema=tool["args_schema"],
60
- metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
61
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
61
62
  ))
62
63
  return cls(tools=tools)
63
64
 
@@ -7,6 +7,7 @@ from .api_wrapper import GCPApiWrapper
7
7
  from ...base.tool import BaseAction
8
8
  from ...elitea_base import filter_missconfigured_index_tools
9
9
  from ...utils import clean_string, get_max_toolkit_length
10
+ from ....runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
10
11
 
11
12
  name = "gcp"
12
13
 
@@ -51,7 +52,7 @@ class GCPToolkit(BaseToolkit):
51
52
  name=tool["name"],
52
53
  description=description,
53
54
  args_schema=tool["args_schema"],
54
- metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
55
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
55
56
  ))
56
57
  return cls(tools=tools)
57
58
 
@@ -7,6 +7,7 @@ from .api_wrapper import KubernetesApiWrapper
7
7
  from ...base.tool import BaseAction
8
8
  from ...elitea_base import filter_missconfigured_index_tools
9
9
  from ...utils import clean_string, get_max_toolkit_length
10
+ from ....runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
10
11
 
11
12
  name = "kubernetes"
12
13
 
@@ -61,7 +62,7 @@ class KubernetesToolkit(BaseToolkit):
61
62
  name=tool["name"],
62
63
  description=description,
63
64
  args_schema=tool["args_schema"],
64
- metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
65
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
65
66
  ))
66
67
  return cls(tools=tools)
67
68
 
@@ -6,6 +6,7 @@ from pydantic import BaseModel, create_model, Field
6
6
  from .api_wrapper import PythonLinter
7
7
  from ...base.tool import BaseAction
8
8
  from ...utils import clean_string, get_max_toolkit_length
9
+ from ....runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
9
10
 
10
11
  name = "python_linter"
11
12
 
@@ -49,7 +50,7 @@ class PythonLinterToolkit(BaseToolkit):
49
50
  name=tool["name"],
50
51
  description=description,
51
52
  args_schema=tool["args_schema"],
52
- metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
53
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
53
54
  ))
54
55
  return cls(tools=tools)
55
56
 
@@ -7,6 +7,7 @@ from ...base.tool import BaseAction
7
7
  from ...elitea_base import filter_missconfigured_index_tools
8
8
  from ...utils import clean_string, get_max_toolkit_length
9
9
  from ....configurations.sonar import SonarConfiguration
10
+ from ....runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
10
11
 
11
12
  name = "sonar"
12
13
 
@@ -65,7 +66,7 @@ class SonarToolkit(BaseToolkit):
65
66
  name=tool["name"],
66
67
  description=description,
67
68
  args_schema=tool["args_schema"],
68
- metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
69
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
69
70
  ))
70
71
  return cls(tools=tools)
71
72
 
@@ -38,12 +38,14 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
38
38
  branch: Optional[str] = None,
39
39
  whitelist: Optional[List[str]] = None,
40
40
  blacklist: Optional[List[str]] = None,
41
+ chunking_config: Optional[dict] = None,
41
42
  **kwargs) -> Generator[Document, None, None]:
42
43
  """Index repository files in the vector store using code parsing."""
43
44
  yield from self.loader(
44
45
  branch=branch,
45
46
  whitelist=whitelist,
46
- blacklist=blacklist
47
+ blacklist=blacklist,
48
+ chunking_config=chunking_config
47
49
  )
48
50
 
49
51
  def _extend_data(self, documents: Generator[Document, None, None]):
@@ -67,7 +69,8 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
67
69
  branch: Optional[str] = None,
68
70
  whitelist: Optional[List[str]] = None,
69
71
  blacklist: Optional[List[str]] = None,
70
- chunked: bool = True) -> Generator[Document, None, None]:
72
+ chunked: bool = True,
73
+ chunking_config: Optional[dict] = None) -> Generator[Document, None, None]:
71
74
  """
72
75
  Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
73
76
 
@@ -77,6 +80,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
77
80
  - blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
78
81
  - chunked (bool): If True (default), applies universal chunker based on file type.
79
82
  If False, returns raw Documents without chunking.
83
+ - chunking_config (Optional[dict]): Chunking configuration by file extension
80
84
 
81
85
  Returns:
82
86
  - generator: Yields Documents from files matching the whitelist but not the blacklist.
@@ -101,6 +105,19 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
101
105
  """
102
106
  import hashlib
103
107
 
108
+ # Auto-include extensions from chunking_config if whitelist is specified
109
+ # This allows chunking config to work without manually adding extensions to whitelist
110
+ if chunking_config and whitelist:
111
+ for ext_pattern in chunking_config.keys():
112
+ # Normalize extension pattern (both ".cbl" and "*.cbl" should work)
113
+ normalized = ext_pattern if ext_pattern.startswith('*') else f'*{ext_pattern}'
114
+ if normalized not in whitelist:
115
+ whitelist.append(normalized)
116
+ self._log_tool_event(
117
+ message=f"Auto-included extension '{normalized}' from chunking_config",
118
+ tool_name="loader"
119
+ )
120
+
104
121
  _files = self.__handle_get_files("", self.__get_branch(branch))
105
122
  self._log_tool_event(message="Listing files in branch", tool_name="loader")
106
123
  logger.info(f"Files in branch: {_files}")