alita-sdk 0.3.257__py3-none-any.whl → 0.3.584__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (281) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +155 -0
  6. alita_sdk/cli/agent_loader.py +215 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3794 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1073 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/toolkit.py +327 -0
  23. alita_sdk/cli/toolkit_loader.py +85 -0
  24. alita_sdk/cli/tools/__init__.py +43 -0
  25. alita_sdk/cli/tools/approval.py +224 -0
  26. alita_sdk/cli/tools/filesystem.py +1751 -0
  27. alita_sdk/cli/tools/planning.py +389 -0
  28. alita_sdk/cli/tools/terminal.py +414 -0
  29. alita_sdk/community/__init__.py +72 -12
  30. alita_sdk/community/inventory/__init__.py +236 -0
  31. alita_sdk/community/inventory/config.py +257 -0
  32. alita_sdk/community/inventory/enrichment.py +2137 -0
  33. alita_sdk/community/inventory/extractors.py +1469 -0
  34. alita_sdk/community/inventory/ingestion.py +3172 -0
  35. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  36. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  37. alita_sdk/community/inventory/parsers/base.py +295 -0
  38. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  39. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  40. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  41. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  42. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  43. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  44. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  45. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  46. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  47. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  48. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  49. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  50. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  51. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  52. alita_sdk/community/inventory/patterns/loader.py +348 -0
  53. alita_sdk/community/inventory/patterns/registry.py +198 -0
  54. alita_sdk/community/inventory/presets.py +535 -0
  55. alita_sdk/community/inventory/retrieval.py +1403 -0
  56. alita_sdk/community/inventory/toolkit.py +173 -0
  57. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  58. alita_sdk/community/inventory/visualize.py +1370 -0
  59. alita_sdk/configurations/__init__.py +11 -0
  60. alita_sdk/configurations/ado.py +148 -2
  61. alita_sdk/configurations/azure_search.py +1 -1
  62. alita_sdk/configurations/bigquery.py +1 -1
  63. alita_sdk/configurations/bitbucket.py +94 -2
  64. alita_sdk/configurations/browser.py +18 -0
  65. alita_sdk/configurations/carrier.py +19 -0
  66. alita_sdk/configurations/confluence.py +130 -1
  67. alita_sdk/configurations/delta_lake.py +1 -1
  68. alita_sdk/configurations/figma.py +76 -5
  69. alita_sdk/configurations/github.py +65 -1
  70. alita_sdk/configurations/gitlab.py +81 -0
  71. alita_sdk/configurations/google_places.py +17 -0
  72. alita_sdk/configurations/jira.py +103 -0
  73. alita_sdk/configurations/openapi.py +323 -0
  74. alita_sdk/configurations/postman.py +1 -1
  75. alita_sdk/configurations/qtest.py +72 -3
  76. alita_sdk/configurations/report_portal.py +115 -0
  77. alita_sdk/configurations/salesforce.py +19 -0
  78. alita_sdk/configurations/service_now.py +1 -12
  79. alita_sdk/configurations/sharepoint.py +167 -0
  80. alita_sdk/configurations/sonar.py +18 -0
  81. alita_sdk/configurations/sql.py +20 -0
  82. alita_sdk/configurations/testio.py +101 -0
  83. alita_sdk/configurations/testrail.py +88 -0
  84. alita_sdk/configurations/xray.py +94 -1
  85. alita_sdk/configurations/zephyr_enterprise.py +94 -1
  86. alita_sdk/configurations/zephyr_essential.py +95 -0
  87. alita_sdk/runtime/clients/artifact.py +21 -4
  88. alita_sdk/runtime/clients/client.py +458 -67
  89. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  90. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  91. alita_sdk/runtime/clients/sandbox_client.py +352 -0
  92. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  93. alita_sdk/runtime/langchain/assistant.py +183 -43
  94. alita_sdk/runtime/langchain/constants.py +647 -1
  95. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  96. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
  97. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
  98. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  99. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
  100. alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
  101. alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
  102. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
  103. alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
  104. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
  105. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
  106. alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
  107. alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
  108. alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
  109. alita_sdk/runtime/langchain/langraph_agent.py +493 -105
  110. alita_sdk/runtime/langchain/utils.py +118 -8
  111. alita_sdk/runtime/llms/preloaded.py +2 -6
  112. alita_sdk/runtime/models/mcp_models.py +61 -0
  113. alita_sdk/runtime/skills/__init__.py +91 -0
  114. alita_sdk/runtime/skills/callbacks.py +498 -0
  115. alita_sdk/runtime/skills/discovery.py +540 -0
  116. alita_sdk/runtime/skills/executor.py +610 -0
  117. alita_sdk/runtime/skills/input_builder.py +371 -0
  118. alita_sdk/runtime/skills/models.py +330 -0
  119. alita_sdk/runtime/skills/registry.py +355 -0
  120. alita_sdk/runtime/skills/skill_runner.py +330 -0
  121. alita_sdk/runtime/toolkits/__init__.py +28 -0
  122. alita_sdk/runtime/toolkits/application.py +14 -4
  123. alita_sdk/runtime/toolkits/artifact.py +25 -9
  124. alita_sdk/runtime/toolkits/datasource.py +13 -6
  125. alita_sdk/runtime/toolkits/mcp.py +782 -0
  126. alita_sdk/runtime/toolkits/planning.py +178 -0
  127. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  128. alita_sdk/runtime/toolkits/subgraph.py +11 -6
  129. alita_sdk/runtime/toolkits/tools.py +314 -70
  130. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  131. alita_sdk/runtime/tools/__init__.py +24 -0
  132. alita_sdk/runtime/tools/application.py +16 -4
  133. alita_sdk/runtime/tools/artifact.py +367 -33
  134. alita_sdk/runtime/tools/data_analysis.py +183 -0
  135. alita_sdk/runtime/tools/function.py +100 -4
  136. alita_sdk/runtime/tools/graph.py +81 -0
  137. alita_sdk/runtime/tools/image_generation.py +218 -0
  138. alita_sdk/runtime/tools/llm.py +1032 -177
  139. alita_sdk/runtime/tools/loop.py +3 -1
  140. alita_sdk/runtime/tools/loop_output.py +3 -1
  141. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  142. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  143. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  144. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  145. alita_sdk/runtime/tools/planning/models.py +246 -0
  146. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  147. alita_sdk/runtime/tools/router.py +2 -1
  148. alita_sdk/runtime/tools/sandbox.py +375 -0
  149. alita_sdk/runtime/tools/skill_router.py +776 -0
  150. alita_sdk/runtime/tools/tool.py +3 -1
  151. alita_sdk/runtime/tools/vectorstore.py +69 -65
  152. alita_sdk/runtime/tools/vectorstore_base.py +163 -90
  153. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  154. alita_sdk/runtime/utils/constants.py +5 -1
  155. alita_sdk/runtime/utils/mcp_client.py +492 -0
  156. alita_sdk/runtime/utils/mcp_oauth.py +361 -0
  157. alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
  158. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  159. alita_sdk/runtime/utils/streamlit.py +41 -14
  160. alita_sdk/runtime/utils/toolkit_utils.py +28 -9
  161. alita_sdk/runtime/utils/utils.py +48 -0
  162. alita_sdk/tools/__init__.py +135 -37
  163. alita_sdk/tools/ado/__init__.py +2 -2
  164. alita_sdk/tools/ado/repos/__init__.py +16 -19
  165. alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
  166. alita_sdk/tools/ado/test_plan/__init__.py +27 -8
  167. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
  168. alita_sdk/tools/ado/wiki/__init__.py +28 -12
  169. alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
  170. alita_sdk/tools/ado/work_item/__init__.py +28 -12
  171. alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
  172. alita_sdk/tools/advanced_jira_mining/__init__.py +13 -8
  173. alita_sdk/tools/aws/delta_lake/__init__.py +15 -11
  174. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  175. alita_sdk/tools/azure_ai/search/__init__.py +14 -8
  176. alita_sdk/tools/base/tool.py +5 -1
  177. alita_sdk/tools/base_indexer_toolkit.py +454 -110
  178. alita_sdk/tools/bitbucket/__init__.py +28 -19
  179. alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
  180. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
  181. alita_sdk/tools/browser/__init__.py +41 -16
  182. alita_sdk/tools/browser/crawler.py +3 -1
  183. alita_sdk/tools/browser/utils.py +15 -6
  184. alita_sdk/tools/carrier/__init__.py +18 -17
  185. alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
  186. alita_sdk/tools/carrier/excel_reporter.py +8 -4
  187. alita_sdk/tools/chunkers/__init__.py +3 -1
  188. alita_sdk/tools/chunkers/code/codeparser.py +1 -1
  189. alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
  190. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  191. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  192. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  193. alita_sdk/tools/cloud/aws/__init__.py +12 -7
  194. alita_sdk/tools/cloud/azure/__init__.py +12 -7
  195. alita_sdk/tools/cloud/gcp/__init__.py +12 -7
  196. alita_sdk/tools/cloud/k8s/__init__.py +12 -7
  197. alita_sdk/tools/code/linter/__init__.py +10 -8
  198. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  199. alita_sdk/tools/code/sonar/__init__.py +21 -13
  200. alita_sdk/tools/code_indexer_toolkit.py +199 -0
  201. alita_sdk/tools/confluence/__init__.py +22 -14
  202. alita_sdk/tools/confluence/api_wrapper.py +197 -58
  203. alita_sdk/tools/confluence/loader.py +14 -2
  204. alita_sdk/tools/custom_open_api/__init__.py +12 -5
  205. alita_sdk/tools/elastic/__init__.py +11 -8
  206. alita_sdk/tools/elitea_base.py +546 -64
  207. alita_sdk/tools/figma/__init__.py +60 -11
  208. alita_sdk/tools/figma/api_wrapper.py +1400 -167
  209. alita_sdk/tools/figma/figma_client.py +73 -0
  210. alita_sdk/tools/figma/toon_tools.py +2748 -0
  211. alita_sdk/tools/github/__init__.py +18 -17
  212. alita_sdk/tools/github/api_wrapper.py +9 -26
  213. alita_sdk/tools/github/github_client.py +81 -12
  214. alita_sdk/tools/github/schemas.py +2 -1
  215. alita_sdk/tools/github/tool.py +5 -1
  216. alita_sdk/tools/gitlab/__init__.py +19 -13
  217. alita_sdk/tools/gitlab/api_wrapper.py +256 -80
  218. alita_sdk/tools/gitlab_org/__init__.py +14 -10
  219. alita_sdk/tools/google/bigquery/__init__.py +14 -13
  220. alita_sdk/tools/google/bigquery/tool.py +5 -1
  221. alita_sdk/tools/google_places/__init__.py +21 -11
  222. alita_sdk/tools/jira/__init__.py +22 -11
  223. alita_sdk/tools/jira/api_wrapper.py +315 -168
  224. alita_sdk/tools/keycloak/__init__.py +11 -8
  225. alita_sdk/tools/localgit/__init__.py +9 -3
  226. alita_sdk/tools/localgit/local_git.py +62 -54
  227. alita_sdk/tools/localgit/tool.py +5 -1
  228. alita_sdk/tools/memory/__init__.py +38 -14
  229. alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
  230. alita_sdk/tools/ocr/__init__.py +11 -8
  231. alita_sdk/tools/openapi/__init__.py +491 -106
  232. alita_sdk/tools/openapi/api_wrapper.py +1357 -0
  233. alita_sdk/tools/openapi/tool.py +20 -0
  234. alita_sdk/tools/pandas/__init__.py +20 -12
  235. alita_sdk/tools/pandas/api_wrapper.py +40 -45
  236. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  237. alita_sdk/tools/postman/__init__.py +11 -11
  238. alita_sdk/tools/postman/api_wrapper.py +19 -8
  239. alita_sdk/tools/postman/postman_analysis.py +8 -1
  240. alita_sdk/tools/pptx/__init__.py +11 -10
  241. alita_sdk/tools/qtest/__init__.py +22 -14
  242. alita_sdk/tools/qtest/api_wrapper.py +1784 -88
  243. alita_sdk/tools/rally/__init__.py +13 -10
  244. alita_sdk/tools/report_portal/__init__.py +23 -16
  245. alita_sdk/tools/salesforce/__init__.py +22 -16
  246. alita_sdk/tools/servicenow/__init__.py +21 -16
  247. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  248. alita_sdk/tools/sharepoint/__init__.py +17 -14
  249. alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
  250. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  251. alita_sdk/tools/sharepoint/utils.py +8 -2
  252. alita_sdk/tools/slack/__init__.py +13 -8
  253. alita_sdk/tools/sql/__init__.py +22 -19
  254. alita_sdk/tools/sql/api_wrapper.py +71 -23
  255. alita_sdk/tools/testio/__init__.py +21 -13
  256. alita_sdk/tools/testrail/__init__.py +13 -11
  257. alita_sdk/tools/testrail/api_wrapper.py +214 -46
  258. alita_sdk/tools/utils/__init__.py +28 -4
  259. alita_sdk/tools/utils/content_parser.py +241 -55
  260. alita_sdk/tools/utils/text_operations.py +254 -0
  261. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
  262. alita_sdk/tools/xray/__init__.py +18 -14
  263. alita_sdk/tools/xray/api_wrapper.py +58 -113
  264. alita_sdk/tools/yagmail/__init__.py +9 -3
  265. alita_sdk/tools/zephyr/__init__.py +12 -7
  266. alita_sdk/tools/zephyr_enterprise/__init__.py +16 -9
  267. alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
  268. alita_sdk/tools/zephyr_essential/__init__.py +16 -10
  269. alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
  270. alita_sdk/tools/zephyr_essential/client.py +6 -4
  271. alita_sdk/tools/zephyr_scale/__init__.py +13 -8
  272. alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
  273. alita_sdk/tools/zephyr_squad/__init__.py +12 -7
  274. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/METADATA +184 -37
  275. alita_sdk-0.3.584.dist-info/RECORD +452 -0
  276. alita_sdk-0.3.584.dist-info/entry_points.txt +2 -0
  277. alita_sdk/tools/bitbucket/tools.py +0 -304
  278. alita_sdk-0.3.257.dist-info/RECORD +0 -343
  279. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/WHEEL +0 -0
  280. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/licenses/LICENSE +0 -0
  281. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/top_level.txt +0 -0
@@ -11,12 +11,14 @@ from pydantic import BaseModel, create_model, Field, SecretStr
11
11
 
12
12
  # from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
13
13
  from .chunkers import markdown_chunker
14
- from .utils import TOOLKIT_SPLITTER
15
14
  from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
16
15
  from ..runtime.utils.utils import IndexerKeywords
17
16
 
18
17
  logger = logging.getLogger(__name__)
19
18
 
19
+ INDEX_TOOL_NAMES = ['index_data', 'remove_index', 'list_collections', 'search_index', 'stepback_search_index',
20
+ 'stepback_summary_index']
21
+
20
22
  LoaderSchema = create_model(
21
23
  "LoaderSchema",
22
24
  branch=(Optional[str], Field(
@@ -30,36 +32,39 @@ LoaderSchema = create_model(
30
32
  # Base Vector Store Schema Models
31
33
  BaseIndexParams = create_model(
32
34
  "BaseIndexParams",
33
- collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
35
+ index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
34
36
  )
35
37
 
36
38
  BaseCodeIndexParams = create_model(
37
39
  "BaseCodeIndexParams",
38
- collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
40
+ index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
41
+ clean_index=(Optional[bool], Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
42
+ progress_step=(Optional[int], Field(default=5, ge=0, le=100,
43
+ description="Optional step size for progress reporting during indexing")),
39
44
  branch=(Optional[str], Field(description="Branch to index files from. Defaults to active branch if None.", default=None)),
40
45
  whitelist=(Optional[List[str]], Field(description='File extensions or paths to include. Defaults to all files if None. Example: ["*.md", "*.java"]', default=None)),
41
46
  blacklist=(Optional[List[str]], Field(description='File extensions or paths to exclude. Defaults to no exclusions if None. Example: ["*.md", "*.java"]', default=None)),
42
- clean_index=(Optional[bool], Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
47
+
43
48
  )
44
49
 
45
50
  RemoveIndexParams = create_model(
46
51
  "RemoveIndexParams",
47
- collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
52
+ index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
48
53
  )
49
54
 
50
55
  BaseSearchParams = create_model(
51
56
  "BaseSearchParams",
52
57
  query=(str, Field(description="Query text to search in the index")),
53
- collection_suffix=(Optional[str], Field(
54
- description="Optional suffix for collection name (max 7 characters). Leave empty to search across all datasets",
58
+ index_name=(Optional[str], Field(
59
+ description="Optional index name (max 7 characters). Leave empty to search across all datasets",
55
60
  default="", max_length=7)),
56
61
  filter=(Optional[dict], Field(
57
62
  description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
58
63
  default={},
59
64
  examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
60
65
  )),
61
- cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5)),
62
- search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
66
+ cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
67
+ search_top=(Optional[int], Field(description="Number of top results to return", default=10, ge=0)),
63
68
  full_text_search=(Optional[Dict[str, Any]], Field(
64
69
  description="Full text search parameters. Can be a dictionary with search options.",
65
70
  default=None
@@ -81,52 +86,130 @@ BaseSearchParams = create_model(
81
86
  BaseStepbackSearchParams = create_model(
82
87
  "BaseStepbackSearchParams",
83
88
  query=(str, Field(description="Query text to search in the index")),
84
- collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
89
+ index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
85
90
  messages=(Optional[List], Field(description="Chat messages for stepback search context", default=[])),
86
91
  filter=(Optional[dict], Field(
87
92
  description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
88
93
  default={},
89
94
  examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
90
95
  )),
91
- cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5)),
92
- search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
93
- reranker=(Optional[dict], Field(
94
- description="Reranker configuration. Can be a dictionary with reranking parameters.",
95
- default={}
96
- )),
96
+ cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
97
+ search_top=(Optional[int], Field(description="Number of top results to return", default=10, ge=0)),
97
98
  full_text_search=(Optional[Dict[str, Any]], Field(
98
99
  description="Full text search parameters. Can be a dictionary with search options.",
99
100
  default=None
100
101
  )),
101
- reranking_config=(Optional[Dict[str, Dict[str, Any]]], Field(
102
- description="Reranking configuration. Can be a dictionary with reranking settings.",
103
- default=None
104
- )),
105
102
  extended_search=(Optional[List[str]], Field(
106
103
  description="List of additional fields to include in the search results.",
107
104
  default=None
108
105
  )),
106
+ reranker=(Optional[dict], Field(
107
+ description="Reranker configuration. Can be a dictionary with reranking parameters.",
108
+ default={}
109
+ )),
110
+ reranking_config=(Optional[Dict[str, Dict[str, Any]]], Field(
111
+ description="Reranking configuration. Can be a dictionary with reranking settings.",
112
+ default=None
113
+ )),
114
+
109
115
  )
110
116
 
111
117
  BaseIndexDataParams = create_model(
112
118
  "indexData",
113
119
  __base__=BaseIndexParams,
114
- progress_step=(Optional[int], Field(default=5, ge=0, le=100,
115
- description="Optional step size for progress reporting during indexing")),
116
120
  clean_index=(Optional[bool], Field(default=False,
117
121
  description="Optional flag to enforce clean existing index before indexing new data")),
122
+ progress_step=(Optional[int], Field(default=5, ge=0, le=100,
123
+ description="Optional step size for progress reporting during indexing")),
118
124
  chunking_tool=(Literal[None,'markdown', 'statistical', 'proposal'], Field(description="Name of chunking tool", default=None)),
119
125
  chunking_config=(Optional[dict], Field(description="Chunking tool configuration", default_factory=dict)),
120
126
  )
121
127
 
128
+ # File Operations Schema Models
129
+ ReadFileInput = create_model(
130
+ "ReadFileInput",
131
+ file_path=(str, Field(description="Path to the file to read")),
132
+ branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
133
+ offset=(Optional[int], Field(description="Starting line number (1-indexed, inclusive). Read from this line onwards.", default=None, ge=1)),
134
+ limit=(Optional[int], Field(description="Number of lines to read from offset. If None, reads to end.", default=None, ge=1)),
135
+ head=(Optional[int], Field(description="Read only the first N lines. Alternative to offset/limit.", default=None, ge=1)),
136
+ tail=(Optional[int], Field(description="Read only the last N lines. Alternative to offset/limit.", default=None, ge=1)),
137
+ )
138
+
139
+ ReadFileChunkInput = create_model(
140
+ "ReadFileChunkInput",
141
+ file_path=(str, Field(description="Path to the file to read")),
142
+ branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
143
+ start_line=(int, Field(description="Starting line number (1-indexed, inclusive)", ge=1)),
144
+ end_line=(Optional[int], Field(description="Ending line number (1-indexed, inclusive). If None, reads to end.", default=None, ge=1)),
145
+ )
146
+
147
+ ReadMultipleFilesInput = create_model(
148
+ "ReadMultipleFilesInput",
149
+ file_paths=(List[str], Field(description="List of file paths to read", min_length=1)),
150
+ branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
151
+ offset=(Optional[int], Field(description="Starting line number for all files (1-indexed)", default=None, ge=1)),
152
+ limit=(Optional[int], Field(description="Number of lines to read from offset for all files", default=None, ge=1)),
153
+ )
154
+
155
+ EditFileInput = create_model(
156
+ "EditFileInput",
157
+ file_path=(str, Field(description="Path to the file to edit. Must be a text file (markdown, txt, csv, json, xml, html, yaml, etc.)")),
158
+ file_query=(str, Field(description="""Edit instructions with OLD/NEW markers. Format:
159
+ OLD <<<<
160
+ old content to replace
161
+ >>>> OLD
162
+ NEW <<<<
163
+ new content
164
+ >>>> NEW
165
+
166
+ Multiple OLD/NEW pairs can be provided for multiple edits.""")),
167
+ branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
168
+ commit_message=(Optional[str], Field(description="Commit message for the change (VCS toolkits only)", default=None)),
169
+ )
170
+
171
+ SearchFileInput = create_model(
172
+ "SearchFileInput",
173
+ file_path=(str, Field(description="Path to the file to search")),
174
+ pattern=(str, Field(description="Search pattern. Treated as regex by default unless is_regex=False.")),
175
+ branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
176
+ is_regex=(bool, Field(description="Whether pattern is a regex. Default is True for flexible matching.", default=True)),
177
+ context_lines=(int, Field(description="Number of lines before/after match to include for context", default=2, ge=0)),
178
+ )
122
179
 
123
- class BaseToolApiWrapper(BaseModel):
124
180
 
181
+ class BaseToolApiWrapper(BaseModel):
182
+
183
+ # Optional RunnableConfig for CLI/standalone usage (allows dispatch_custom_event to work)
184
+ _runnable_config: Optional[Dict[str, Any]] = None
185
+ # toolkit id propagated from backend
186
+ toolkit_id: int = 0
125
187
  def get_available_tools(self):
126
188
  raise NotImplementedError("Subclasses should implement this method")
127
189
 
128
- def _log_tool_event(self, message: str, tool_name: str = None):
129
- """Log data and dispatch custom event for the tool"""
190
+ def set_runnable_config(self, config: Optional[Dict[str, Any]]) -> None:
191
+ """
192
+ Set the RunnableConfig for dispatching custom events.
193
+
194
+ This is required when running outside of a LangChain agent context
195
+ (e.g., from CLI). Without a config containing a run_id,
196
+ dispatch_custom_event will fail with "Unable to dispatch an adhoc event
197
+ without a parent run id".
198
+
199
+ Args:
200
+ config: A RunnableConfig dict with at least {'run_id': uuid}
201
+ """
202
+ self._runnable_config = config
203
+
204
+ def _log_tool_event(self, message: str, tool_name: str = None, config: Optional[Dict[str, Any]] = None):
205
+ """Log data and dispatch custom event for the tool.
206
+
207
+ Args:
208
+ message: The message to log
209
+ tool_name: Name of the tool (defaults to 'tool_progress')
210
+ config: Optional RunnableConfig. If not provided, uses self._runnable_config.
211
+ Required when running outside a LangChain agent context.
212
+ """
130
213
 
131
214
  try:
132
215
  from langchain_core.callbacks import dispatch_custom_event
@@ -135,6 +218,10 @@ class BaseToolApiWrapper(BaseModel):
135
218
  tool_name = 'tool_progress'
136
219
 
137
220
  logger.info(message)
221
+
222
+ # Use provided config, fall back to instance config
223
+ effective_config = config or self._runnable_config
224
+
138
225
  dispatch_custom_event(
139
226
  name="thinking_step",
140
227
  data={
@@ -142,14 +229,14 @@ class BaseToolApiWrapper(BaseModel):
142
229
  "tool_name": tool_name,
143
230
  "toolkit": self.__class__.__name__,
144
231
  },
232
+ config=effective_config,
145
233
  )
146
234
  except Exception as e:
147
235
  logger.warning(f"Failed to dispatch progress event: {str(e)}")
148
236
 
149
237
 
150
238
  def run(self, mode: str, *args: Any, **kwargs: Any):
151
- if TOOLKIT_SPLITTER in mode:
152
- mode = mode.rsplit(TOOLKIT_SPLITTER, maxsplit=1)[1]
239
+ # Mode is now the clean tool name (no prefix to remove)
153
240
  for tool in self.get_available_tools():
154
241
  if tool["name"] == mode:
155
242
  try:
@@ -158,6 +245,11 @@ class BaseToolApiWrapper(BaseModel):
158
245
  # execution = str(execution)
159
246
  return execution
160
247
  except Exception as e:
248
+ # Re-raise McpAuthorizationRequired directly without wrapping
249
+ from alita_sdk.runtime.utils.mcp_oauth import McpAuthorizationRequired
250
+ if isinstance(e, McpAuthorizationRequired):
251
+ raise
252
+
161
253
  # Catch all tool execution exceptions and provide user-friendly error messages
162
254
  error_type = type(e).__name__
163
255
  error_message = str(e)
@@ -317,12 +409,12 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
317
409
  #
318
410
  docs = base_chunker(file_content_generator=docs, config=base_chunking_config)
319
411
  #
320
- collection_suffix = kwargs.get("collection_suffix")
412
+ index_name = kwargs.get("index_name")
321
413
  progress_step = kwargs.get("progress_step")
322
414
  clean_index = kwargs.get("clean_index")
323
415
  vs = self._init_vector_store()
324
416
  #
325
- return vs.index_documents(docs, collection_suffix=collection_suffix, progress_step=progress_step, clean_index=clean_index)
417
+ return vs.index_documents(docs, index_name=index_name, progress_step=progress_step, clean_index=clean_index)
326
418
 
327
419
  def _process_documents(self, documents: List[Document]) -> Generator[Document, None, None]:
328
420
  """
@@ -392,10 +484,10 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
392
484
  )
393
485
  return self._vector_store
394
486
 
395
- def remove_index(self, collection_suffix: str = ""):
487
+ def remove_index(self, index_name: str = ""):
396
488
  """Cleans the indexed data in the collection."""
397
- self._init_vector_store()._clean_collection(collection_suffix=collection_suffix)
398
- return (f"Collection '{collection_suffix}' has been removed from the vector store.\n"
489
+ self._init_vector_store()._clean_collection(index_name=index_name)
490
+ return (f"Collection '{index_name}' has been removed from the vector store.\n"
399
491
  f"Available collections: {self.list_collections()}")
400
492
 
401
493
  def list_collections(self):
@@ -403,19 +495,19 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
403
495
  vectorstore_wrapper = self._init_vector_store()
404
496
  return vectorstore_wrapper.list_collections()
405
497
 
406
- def _build_collection_filter(self, filter: dict | str, collection_suffix: str = "") -> dict:
498
+ def _build_collection_filter(self, filter: dict | str, index_name: str = "") -> dict:
407
499
  """Builds a filter for the collection based on the provided suffix."""
408
500
 
409
501
  filter = filter if isinstance(filter, dict) else json.loads(filter)
410
- if collection_suffix:
502
+ if index_name:
411
503
  filter.update({"collection": {
412
- "$eq": collection_suffix.strip()
504
+ "$eq": index_name.strip()
413
505
  }})
414
506
  return filter
415
507
 
416
508
  def search_index(self,
417
509
  query: str,
418
- collection_suffix: str = "",
510
+ index_name: str = "",
419
511
  filter: dict | str = {}, cut_off: float = 0.5,
420
512
  search_top: int = 10, reranker: dict = {},
421
513
  full_text_search: Optional[Dict[str, Any]] = None,
@@ -424,7 +516,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
424
516
  **kwargs):
425
517
  """ Searches indexed documents in the vector store."""
426
518
  vectorstore = self._init_vector_store()
427
- filter = self._build_collection_filter(filter, collection_suffix)
519
+ filter = self._build_collection_filter(filter, index_name)
428
520
  found_docs = vectorstore.search_documents(
429
521
  query,
430
522
  doctype=self.doctype,
@@ -441,7 +533,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
441
533
  def stepback_search_index(self,
442
534
  query: str,
443
535
  messages: List[Dict[str, Any]] = [],
444
- collection_suffix: str = "",
536
+ index_name: str = "",
445
537
  filter: dict | str = {}, cut_off: float = 0.5,
446
538
  search_top: int = 10, reranker: dict = {},
447
539
  full_text_search: Optional[Dict[str, Any]] = None,
@@ -450,7 +542,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
450
542
  **kwargs):
451
543
  """ Searches indexed documents in the vector store."""
452
544
 
453
- filter = self._build_collection_filter(filter, collection_suffix)
545
+ filter = self._build_collection_filter(filter, index_name)
454
546
  vectorstore = self._init_vector_store()
455
547
  found_docs = vectorstore.stepback_search(
456
548
  query,
@@ -468,7 +560,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
468
560
  def stepback_summary_index(self,
469
561
  query: str,
470
562
  messages: List[Dict[str, Any]] = [],
471
- collection_suffix: str = "",
563
+ index_name: str = "",
472
564
  filter: dict | str = {}, cut_off: float = 0.5,
473
565
  search_top: int = 10, reranker: dict = {},
474
566
  full_text_search: Optional[Dict[str, Any]] = None,
@@ -477,7 +569,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
477
569
  **kwargs):
478
570
  """ Generates a summary of indexed documents using stepback technique."""
479
571
  vectorstore = self._init_vector_store()
480
- filter = self._build_collection_filter(filter, collection_suffix)
572
+ filter = self._build_collection_filter(filter, index_name)
481
573
 
482
574
  found_docs = vectorstore.stepback_summary(
483
575
  query,
@@ -547,11 +639,281 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
547
639
  def _get_files(self):
548
640
  raise NotImplementedError("Subclasses should implement this method")
549
641
 
550
- def _read_file(self, file_path: str, branch: str):
642
+ def _read_file(
643
+ self,
644
+ file_path: str,
645
+ branch: str = None,
646
+ offset: Optional[int] = None,
647
+ limit: Optional[int] = None,
648
+ head: Optional[int] = None,
649
+ tail: Optional[int] = None,
650
+ **kwargs # Allow subclasses to have additional parameters
651
+ ) -> str:
652
+ """
653
+ Read file content with optional partial read support.
654
+
655
+ Subclasses should implement this method. If they don't support partial reads,
656
+ they can accept **kwargs and ignore offset/limit/head/tail parameters - the base
657
+ class high-level methods will apply slicing client-side.
658
+
659
+ Args:
660
+ file_path: Path to the file
661
+ branch: Branch name (None for active branch)
662
+ offset: Starting line number (1-indexed)
663
+ limit: Number of lines to read from offset
664
+ head: Read only first N lines
665
+ tail: Read only last N lines
666
+ **kwargs: Additional toolkit-specific parameters (e.g., repo_name for GitHub)
667
+
668
+ Returns:
669
+ File content as string
670
+ """
551
671
  raise NotImplementedError("Subclasses should implement this method")
672
+
673
+ def _write_file(
674
+ self,
675
+ file_path: str,
676
+ content: str,
677
+ branch: str = None,
678
+ commit_message: str = None
679
+ ) -> str:
680
+ """
681
+ Write content to a file.
682
+
683
+ Subclasses should implement this method to enable edit_file functionality.
684
+ For VCS toolkits, this may involve creating or updating files with commits.
685
+
686
+ Args:
687
+ file_path: Path to the file
688
+ content: New file content
689
+ branch: Branch name (None for active branch)
690
+ commit_message: Commit message (VCS toolkits only)
691
+
692
+ Returns:
693
+ Success message
694
+ """
695
+ raise NotImplementedError("Subclasses should implement _write_file to enable editing")
552
696
 
553
697
  def _file_commit_hash(self, file_path: str, branch: str):
554
698
  pass
699
+
700
+ def read_file_chunk(
701
+ self,
702
+ file_path: str,
703
+ start_line: int,
704
+ end_line: Optional[int] = None,
705
+ branch: str = None
706
+ ) -> str:
707
+ """
708
+ Read a specific range of lines from a file.
709
+
710
+ Args:
711
+ file_path: Path to the file
712
+ start_line: Starting line number (1-indexed, inclusive)
713
+ end_line: Ending line number (1-indexed, inclusive). If None, reads to end.
714
+ branch: Branch name (None for active branch)
715
+
716
+ Returns:
717
+ File content for the specified line range
718
+ """
719
+ from .utils.text_operations import apply_line_slice
720
+
721
+ # Calculate offset and limit from start_line and end_line
722
+ offset = start_line
723
+ limit = (end_line - start_line + 1) if end_line is not None else None
724
+
725
+ # Read the file with offset/limit
726
+ content = self._read_file(file_path, branch, offset=offset, limit=limit)
727
+
728
+ # Apply client-side slicing if toolkit doesn't support partial reads
729
+ # (toolkit's _read_file will return full content if it ignores offset/limit)
730
+ return apply_line_slice(content, offset=offset, limit=limit)
731
+
732
+ def read_multiple_files(
733
+ self,
734
+ file_paths: List[str],
735
+ branch: str = None,
736
+ offset: Optional[int] = None,
737
+ limit: Optional[int] = None
738
+ ) -> Dict[str, str]:
739
+ """
740
+ Read multiple files in batch.
741
+
742
+ Args:
743
+ file_paths: List of file paths to read
744
+ branch: Branch name (None for active branch)
745
+ offset: Starting line number for all files (1-indexed)
746
+ limit: Number of lines to read from offset for all files
747
+
748
+ Returns:
749
+ Dictionary mapping file paths to their content (or error messages)
750
+ """
751
+ results = {}
752
+
753
+ for file_path in file_paths:
754
+ try:
755
+ content = self._read_file(
756
+ file_path,
757
+ branch,
758
+ offset=offset,
759
+ limit=limit
760
+ )
761
+ results[file_path] = content
762
+ except Exception as e:
763
+ results[file_path] = f"Error reading file: {str(e)}"
764
+ logger.error(f"Failed to read {file_path}: {e}")
765
+
766
+ return results
767
+
768
+ def search_file(
769
+ self,
770
+ file_path: str,
771
+ pattern: str,
772
+ branch: str = None,
773
+ is_regex: bool = True,
774
+ context_lines: int = 2
775
+ ) -> str:
776
+ """
777
+ Search for pattern in file content with context.
778
+
779
+ Args:
780
+ file_path: Path to the file
781
+ pattern: Search pattern (regex if is_regex=True, else literal)
782
+ branch: Branch name (None for active branch)
783
+ is_regex: Whether pattern is regex (default True)
784
+ context_lines: Lines of context before/after matches (default 2)
785
+
786
+ Returns:
787
+ Formatted string with search results and context
788
+ """
789
+ from .utils.text_operations import search_in_content
790
+
791
+ # Read full file content
792
+ content = self._read_file(file_path, branch)
793
+
794
+ # Search for pattern
795
+ matches = search_in_content(content, pattern, is_regex, context_lines)
796
+
797
+ if not matches:
798
+ return f"No matches found for pattern '{pattern}' in {file_path}"
799
+
800
+ # Format results
801
+ result_lines = [f"Found {len(matches)} match(es) for pattern '{pattern}' in {file_path}:\n"]
802
+
803
+ for i, match in enumerate(matches, 1):
804
+ result_lines.append(f"\n--- Match {i} at line {match['line_number']} ---")
805
+
806
+ # Context before
807
+ if match['context_before']:
808
+ for line in match['context_before']:
809
+ result_lines.append(f" {line}")
810
+
811
+ # Matching line (highlighted)
812
+ result_lines.append(f"> {match['line_content']}")
813
+
814
+ # Context after
815
+ if match['context_after']:
816
+ for line in match['context_after']:
817
+ result_lines.append(f" {line}")
818
+
819
+ return "\n".join(result_lines)
820
+
821
+ def edit_file(
822
+ self,
823
+ file_path: str,
824
+ file_query: str,
825
+ branch: str = None,
826
+ commit_message: str = None
827
+ ) -> str:
828
+ """
829
+ Edit file using OLD/NEW markers for precise replacements.
830
+
831
+ Only works with text files (markdown, txt, csv, json, xml, html, yaml, code files).
832
+
833
+ Args:
834
+ file_path: Path to the file to edit
835
+ file_query: Edit instructions with OLD/NEW markers
836
+ branch: Branch name (None for active branch)
837
+ commit_message: Commit message (VCS toolkits only)
838
+
839
+ Returns:
840
+ Success message or error
841
+
842
+ Raises:
843
+ ToolException: If file is not text-editable or edit fails
844
+ """
845
+ from .utils.text_operations import parse_old_new_markers, is_text_editable
846
+ from langchain_core.callbacks import dispatch_custom_event
847
+
848
+ # Validate file is text-editable
849
+ if not is_text_editable(file_path):
850
+ raise ToolException(
851
+ f"Cannot edit binary/document file '{file_path}'. "
852
+ f"Supported text formats: markdown, txt, csv, json, xml, html, yaml, code files."
853
+ )
854
+
855
+ # Parse OLD/NEW markers
856
+ edits = parse_old_new_markers(file_query)
857
+ if not edits:
858
+ raise ToolException(
859
+ "No OLD/NEW marker pairs found in file_query. "
860
+ "Format: OLD <<<< old text >>>> OLD NEW <<<< new text >>>> NEW"
861
+ )
862
+
863
+ # Read current file content
864
+ try:
865
+ current_content = self._read_file(file_path, branch)
866
+ except Exception as e:
867
+ raise ToolException(f"Failed to read file {file_path}: {e}")
868
+
869
+ # Apply all edits
870
+ updated_content = current_content
871
+ for old_text, new_text in edits:
872
+ if not old_text.strip():
873
+ continue
874
+
875
+ if old_text not in updated_content:
876
+ logger.warning(
877
+ f"Old content not found in {file_path}. "
878
+ f"Looking for: {old_text[:100]}..."
879
+ )
880
+ continue
881
+
882
+ updated_content = updated_content.replace(old_text, new_text)
883
+
884
+ # Check if any changes were made
885
+ if current_content == updated_content:
886
+ return (
887
+ f"No changes made to {file_path}. "
888
+ "Old content was not found or is empty. "
889
+ "Use read_file or search_file to verify current content."
890
+ )
891
+
892
+ # Write updated content
893
+ try:
894
+ result = self._write_file(file_path, updated_content, branch, commit_message)
895
+ except NotImplementedError:
896
+ raise ToolException(
897
+ f"Editing not supported for this toolkit. "
898
+ f"The _write_file method is not implemented."
899
+ )
900
+ except Exception as e:
901
+ raise ToolException(f"Failed to write file {file_path}: {e}")
902
+
903
+ # Dispatch file modification event
904
+ try:
905
+ dispatch_custom_event("file_modified", {
906
+ "message": f"File '{file_path}' edited successfully",
907
+ "filename": file_path,
908
+ "tool_name": "edit_file",
909
+ "toolkit": self.__class__.__name__,
910
+ "operation_type": "modify",
911
+ "edits_applied": len(edits)
912
+ })
913
+ except Exception as e:
914
+ logger.warning(f"Failed to dispatch file_modified event: {e}")
915
+
916
+ return result
555
917
 
556
918
  def __handle_get_files(self, path: str, branch: str):
557
919
  """
@@ -575,32 +937,46 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
575
937
  raise ValueError("Expected a list of strings")
576
938
  return _files
577
939
 
940
+ def __get_branch(self, branch):
941
+ return (branch or getattr(self, 'active_branch', None)
942
+ or getattr(self, '_active_branch', None) or getattr(self, 'branch', None))
943
+
578
944
  def loader(self,
579
945
  branch: Optional[str] = None,
580
946
  whitelist: Optional[List[str]] = None,
581
- blacklist: Optional[List[str]] = None) -> str:
947
+ blacklist: Optional[List[str]] = None,
948
+ chunked: bool = True) -> Generator[Document, None, None]:
582
949
  """
583
- Generates file content from a branch, respecting whitelist and blacklist patterns.
950
+ Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
584
951
 
585
952
  Parameters:
586
953
  - branch (Optional[str]): Branch for listing files. Defaults to the current branch if None.
587
954
  - whitelist (Optional[List[str]]): File extensions or paths to include. Defaults to all files if None.
588
955
  - blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
956
+ - chunked (bool): If True (default), applies universal chunker based on file type.
957
+ If False, returns raw Documents without chunking.
589
958
 
590
959
  Returns:
591
- - generator: Yields content from files matching the whitelist but not the blacklist.
960
+ - generator: Yields Documents from files matching the whitelist but not the blacklist.
592
961
 
593
962
  Example:
594
963
  # Use 'feature-branch', include '.py' files, exclude 'test_' files
595
- file_generator = loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*'])
964
+ for doc in loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*']):
965
+ print(doc.page_content)
596
966
 
597
967
  Notes:
598
968
  - Whitelist and blacklist use Unix shell-style wildcards.
599
969
  - Files must match the whitelist and not the blacklist to be included.
970
+ - When chunked=True:
971
+ - .md files → markdown chunker (header-based splitting)
972
+ - .py/.js/.ts/etc → code parser (TreeSitter-based)
973
+ - .json files → JSON chunker
974
+ - other files → default text chunker
600
975
  """
601
- from .chunkers.code.codeparser import parse_code_files_for_db
976
+ from langchain_core.documents import Document
977
+ import hashlib
602
978
 
603
- _files = self.__handle_get_files("", branch or self.active_branch or self._active_branch)
979
+ _files = self.__handle_get_files("", self.__get_branch(branch))
604
980
  self._log_tool_event(message="Listing files in branch", tool_name="loader")
605
981
  logger.info(f"Files in branch: {_files}")
606
982
 
@@ -616,28 +992,55 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
616
992
  or any(file_path.endswith(f'.{pattern}') for pattern in blacklist))
617
993
  return False
618
994
 
619
- def file_content_generator():
995
+ def raw_document_generator() -> Generator[Document, None, None]:
996
+ """Yields raw Documents without chunking."""
620
997
  self._log_tool_event(message="Reading the files", tool_name="loader")
621
- # log the progress of file reading
622
998
  total_files = len(_files)
999
+ processed = 0
1000
+
623
1001
  for idx, file in enumerate(_files, 1):
624
1002
  if is_whitelisted(file) and not is_blacklisted(file):
625
- # read file ONLY if it matches whitelist and does not match blacklist
626
- file_content = self._read_file(file, branch=branch or self.active_branch or self._active_branch)
627
- # hash the file content to ensure uniqueness
628
- import hashlib
1003
+ try:
1004
+ file_content = self._read_file(file, self.__get_branch(branch))
1005
+ except Exception as e:
1006
+ logger.error(f"Failed to read file {file}: {e}")
1007
+ continue
1008
+
1009
+ if not file_content:
1010
+ continue
1011
+
1012
+ # Hash the file content for uniqueness tracking
629
1013
  file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
630
- yield {"file_name": file,
631
- "file_content": file_content,
632
- "commit_hash": file_hash}
1014
+ processed += 1
1015
+
1016
+ yield Document(
1017
+ page_content=file_content,
1018
+ metadata={
1019
+ 'file_path': file,
1020
+ 'file_name': file,
1021
+ 'source': file,
1022
+ 'commit_hash': file_hash,
1023
+ }
1024
+ )
1025
+
633
1026
  if idx % 10 == 0 or idx == total_files:
634
- self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
635
- self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
1027
+ self._log_tool_event(
1028
+ message=f"{idx} out of {total_files} files checked, {processed} matched",
1029
+ tool_name="loader"
1030
+ )
1031
+
1032
+ self._log_tool_event(message=f"{processed} files loaded", tool_name="loader")
636
1033
 
637
- return parse_code_files_for_db(file_content_generator())
1034
+ if not chunked:
1035
+ # Return raw documents without chunking
1036
+ return raw_document_generator()
1037
+
1038
+ # Apply universal chunker based on file type
1039
+ from .chunkers.universal_chunker import universal_chunker
1040
+ return universal_chunker(raw_document_generator())
638
1041
 
639
1042
  def index_data(self,
640
- collection_suffix: str,
1043
+ index_name: str,
641
1044
  branch: Optional[str] = None,
642
1045
  whitelist: Optional[List[str]] = None,
643
1046
  blacklist: Optional[List[str]] = None,
@@ -651,8 +1054,9 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
651
1054
  )
652
1055
  vectorstore = self._init_vector_store()
653
1056
  clean_index = kwargs.get('clean_index', False)
654
- return vectorstore.index_documents(documents, collection_suffix=collection_suffix,
655
- clean_index=clean_index, is_code=True)
1057
+ return vectorstore.index_documents(documents, index_name=index_name,
1058
+ clean_index=clean_index, is_code=True,
1059
+ progress_step=kwargs.get('progress_step', 5))
656
1060
 
657
1061
  def _get_vector_search_tools(self):
658
1062
  """
@@ -686,4 +1090,82 @@ def extend_with_vector_tools(method):
686
1090
  #
687
1091
  return tools
688
1092
 
689
- return wrapper
1093
+ return wrapper
1094
+
1095
+
1096
+ def extend_with_file_operations(method):
1097
+ """
1098
+ Decorator to automatically add file operation tools to toolkits that implement
1099
+ _read_file and _write_file methods.
1100
+
1101
+ Adds:
1102
+ - read_file_chunk: Read specific line ranges
1103
+ - read_multiple_files: Batch read files
1104
+ - search_file: Search for patterns in files
1105
+ - edit_file: Edit files using OLD/NEW markers
1106
+ """
1107
+ def wrapper(self, *args, **kwargs):
1108
+ tools = method(self, *args, **kwargs)
1109
+
1110
+ # Only add file operations if toolkit has implemented the required methods
1111
+ # Check for both _read_file and _write_file methods
1112
+ has_file_ops = (hasattr(self, '_read_file') and callable(getattr(self, '_read_file')) and
1113
+ hasattr(self, '_write_file') and callable(getattr(self, '_write_file')))
1114
+
1115
+ if has_file_ops:
1116
+ # Import schemas from elitea_base
1117
+ from . import elitea_base
1118
+
1119
+ file_operation_tools = [
1120
+ {
1121
+ "name": "read_file_chunk",
1122
+ "mode": "read_file_chunk",
1123
+ "ref": self.read_file_chunk,
1124
+ "description": self.read_file_chunk.__doc__,
1125
+ "args_schema": elitea_base.ReadFileChunkInput
1126
+ },
1127
+ {
1128
+ "name": "read_multiple_files",
1129
+ "mode": "read_multiple_files",
1130
+ "ref": self.read_multiple_files,
1131
+ "description": self.read_multiple_files.__doc__,
1132
+ "args_schema": elitea_base.ReadMultipleFilesInput
1133
+ },
1134
+ {
1135
+ "name": "search_file",
1136
+ "mode": "search_file",
1137
+ "ref": self.search_file,
1138
+ "description": self.search_file.__doc__,
1139
+ "args_schema": elitea_base.SearchFileInput
1140
+ },
1141
+ {
1142
+ "name": "edit_file",
1143
+ "mode": "edit_file",
1144
+ "ref": self.edit_file,
1145
+ "description": self.edit_file.__doc__,
1146
+ "args_schema": elitea_base.EditFileInput
1147
+ },
1148
+ ]
1149
+
1150
+ tools.extend(file_operation_tools)
1151
+
1152
+ return tools
1153
+
1154
+ return wrapper
1155
+
1156
+
1157
+ def filter_missconfigured_index_tools(method):
1158
+ def wrapper(self, *args, **kwargs):
1159
+ toolkit = method(self, *args, **kwargs)
1160
+
1161
+ # Validate index tools misconfiguration and exclude them if necessary
1162
+ is_index_toolkit = any(tool.name in INDEX_TOOL_NAMES for tool in toolkit.tools)
1163
+ is_index_configuration_missing = not (kwargs.get('embedding_model')
1164
+ and kwargs.get('pgvector_configuration'))
1165
+
1166
+ if is_index_toolkit and is_index_configuration_missing:
1167
+ toolkit.tools = [tool for tool in toolkit.tools if tool.name not in INDEX_TOOL_NAMES]
1168
+
1169
+ return toolkit
1170
+
1171
+ return wrapper