alita-sdk 0.3.462__py3-none-any.whl → 0.3.627__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. alita_sdk/cli/agent/__init__.py +5 -0
  2. alita_sdk/cli/agent/default.py +258 -0
  3. alita_sdk/cli/agent_executor.py +15 -3
  4. alita_sdk/cli/agent_loader.py +56 -8
  5. alita_sdk/cli/agent_ui.py +93 -31
  6. alita_sdk/cli/agents.py +2274 -230
  7. alita_sdk/cli/callbacks.py +96 -25
  8. alita_sdk/cli/cli.py +10 -1
  9. alita_sdk/cli/config.py +162 -9
  10. alita_sdk/cli/context/__init__.py +30 -0
  11. alita_sdk/cli/context/cleanup.py +198 -0
  12. alita_sdk/cli/context/manager.py +731 -0
  13. alita_sdk/cli/context/message.py +285 -0
  14. alita_sdk/cli/context/strategies.py +289 -0
  15. alita_sdk/cli/context/token_estimation.py +127 -0
  16. alita_sdk/cli/input_handler.py +419 -0
  17. alita_sdk/cli/inventory.py +1073 -0
  18. alita_sdk/cli/testcases/__init__.py +94 -0
  19. alita_sdk/cli/testcases/data_generation.py +119 -0
  20. alita_sdk/cli/testcases/discovery.py +96 -0
  21. alita_sdk/cli/testcases/executor.py +84 -0
  22. alita_sdk/cli/testcases/logger.py +85 -0
  23. alita_sdk/cli/testcases/parser.py +172 -0
  24. alita_sdk/cli/testcases/prompts.py +91 -0
  25. alita_sdk/cli/testcases/reporting.py +125 -0
  26. alita_sdk/cli/testcases/setup.py +108 -0
  27. alita_sdk/cli/testcases/test_runner.py +282 -0
  28. alita_sdk/cli/testcases/utils.py +39 -0
  29. alita_sdk/cli/testcases/validation.py +90 -0
  30. alita_sdk/cli/testcases/workflow.py +196 -0
  31. alita_sdk/cli/toolkit.py +14 -17
  32. alita_sdk/cli/toolkit_loader.py +35 -5
  33. alita_sdk/cli/tools/__init__.py +36 -2
  34. alita_sdk/cli/tools/approval.py +224 -0
  35. alita_sdk/cli/tools/filesystem.py +910 -64
  36. alita_sdk/cli/tools/planning.py +389 -0
  37. alita_sdk/cli/tools/terminal.py +414 -0
  38. alita_sdk/community/__init__.py +72 -12
  39. alita_sdk/community/inventory/__init__.py +236 -0
  40. alita_sdk/community/inventory/config.py +257 -0
  41. alita_sdk/community/inventory/enrichment.py +2137 -0
  42. alita_sdk/community/inventory/extractors.py +1469 -0
  43. alita_sdk/community/inventory/ingestion.py +3172 -0
  44. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  45. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  46. alita_sdk/community/inventory/parsers/base.py +295 -0
  47. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  48. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  49. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  50. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  51. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  52. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  53. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  54. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  55. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  56. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  57. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  58. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  59. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  60. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  61. alita_sdk/community/inventory/patterns/loader.py +348 -0
  62. alita_sdk/community/inventory/patterns/registry.py +198 -0
  63. alita_sdk/community/inventory/presets.py +535 -0
  64. alita_sdk/community/inventory/retrieval.py +1403 -0
  65. alita_sdk/community/inventory/toolkit.py +173 -0
  66. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  67. alita_sdk/community/inventory/visualize.py +1370 -0
  68. alita_sdk/configurations/__init__.py +1 -1
  69. alita_sdk/configurations/ado.py +141 -20
  70. alita_sdk/configurations/bitbucket.py +0 -3
  71. alita_sdk/configurations/confluence.py +76 -42
  72. alita_sdk/configurations/figma.py +76 -0
  73. alita_sdk/configurations/gitlab.py +17 -5
  74. alita_sdk/configurations/openapi.py +329 -0
  75. alita_sdk/configurations/qtest.py +72 -1
  76. alita_sdk/configurations/report_portal.py +96 -0
  77. alita_sdk/configurations/sharepoint.py +148 -0
  78. alita_sdk/configurations/testio.py +83 -0
  79. alita_sdk/runtime/clients/artifact.py +3 -3
  80. alita_sdk/runtime/clients/client.py +353 -48
  81. alita_sdk/runtime/clients/sandbox_client.py +0 -21
  82. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  83. alita_sdk/runtime/langchain/assistant.py +123 -26
  84. alita_sdk/runtime/langchain/constants.py +642 -1
  85. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  86. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  87. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +6 -3
  88. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
  89. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
  90. alita_sdk/runtime/langchain/document_loaders/constants.py +12 -7
  91. alita_sdk/runtime/langchain/langraph_agent.py +279 -73
  92. alita_sdk/runtime/langchain/utils.py +82 -15
  93. alita_sdk/runtime/llms/preloaded.py +2 -6
  94. alita_sdk/runtime/skills/__init__.py +91 -0
  95. alita_sdk/runtime/skills/callbacks.py +498 -0
  96. alita_sdk/runtime/skills/discovery.py +540 -0
  97. alita_sdk/runtime/skills/executor.py +610 -0
  98. alita_sdk/runtime/skills/input_builder.py +371 -0
  99. alita_sdk/runtime/skills/models.py +330 -0
  100. alita_sdk/runtime/skills/registry.py +355 -0
  101. alita_sdk/runtime/skills/skill_runner.py +330 -0
  102. alita_sdk/runtime/toolkits/__init__.py +7 -0
  103. alita_sdk/runtime/toolkits/application.py +21 -9
  104. alita_sdk/runtime/toolkits/artifact.py +15 -5
  105. alita_sdk/runtime/toolkits/datasource.py +13 -6
  106. alita_sdk/runtime/toolkits/mcp.py +139 -251
  107. alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
  108. alita_sdk/runtime/toolkits/planning.py +178 -0
  109. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  110. alita_sdk/runtime/toolkits/subgraph.py +251 -6
  111. alita_sdk/runtime/toolkits/tools.py +238 -32
  112. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  113. alita_sdk/runtime/tools/__init__.py +3 -1
  114. alita_sdk/runtime/tools/application.py +20 -6
  115. alita_sdk/runtime/tools/artifact.py +511 -28
  116. alita_sdk/runtime/tools/data_analysis.py +183 -0
  117. alita_sdk/runtime/tools/function.py +43 -15
  118. alita_sdk/runtime/tools/image_generation.py +50 -44
  119. alita_sdk/runtime/tools/llm.py +852 -67
  120. alita_sdk/runtime/tools/loop.py +3 -1
  121. alita_sdk/runtime/tools/loop_output.py +3 -1
  122. alita_sdk/runtime/tools/mcp_remote_tool.py +25 -10
  123. alita_sdk/runtime/tools/mcp_server_tool.py +7 -6
  124. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  125. alita_sdk/runtime/tools/planning/models.py +246 -0
  126. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  127. alita_sdk/runtime/tools/router.py +2 -4
  128. alita_sdk/runtime/tools/sandbox.py +9 -6
  129. alita_sdk/runtime/tools/skill_router.py +776 -0
  130. alita_sdk/runtime/tools/tool.py +3 -1
  131. alita_sdk/runtime/tools/vectorstore.py +7 -2
  132. alita_sdk/runtime/tools/vectorstore_base.py +51 -11
  133. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  134. alita_sdk/runtime/utils/constants.py +5 -1
  135. alita_sdk/runtime/utils/mcp_client.py +492 -0
  136. alita_sdk/runtime/utils/mcp_oauth.py +202 -5
  137. alita_sdk/runtime/utils/mcp_sse_client.py +36 -7
  138. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  139. alita_sdk/runtime/utils/serialization.py +155 -0
  140. alita_sdk/runtime/utils/streamlit.py +6 -10
  141. alita_sdk/runtime/utils/toolkit_utils.py +16 -5
  142. alita_sdk/runtime/utils/utils.py +36 -0
  143. alita_sdk/tools/__init__.py +113 -29
  144. alita_sdk/tools/ado/repos/__init__.py +51 -33
  145. alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
  146. alita_sdk/tools/ado/test_plan/__init__.py +25 -9
  147. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
  148. alita_sdk/tools/ado/utils.py +1 -18
  149. alita_sdk/tools/ado/wiki/__init__.py +25 -8
  150. alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
  151. alita_sdk/tools/ado/work_item/__init__.py +26 -9
  152. alita_sdk/tools/ado/work_item/ado_wrapper.py +56 -3
  153. alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
  154. alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
  155. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  156. alita_sdk/tools/azure_ai/search/__init__.py +11 -8
  157. alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
  158. alita_sdk/tools/base/tool.py +5 -1
  159. alita_sdk/tools/base_indexer_toolkit.py +170 -45
  160. alita_sdk/tools/bitbucket/__init__.py +17 -12
  161. alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
  162. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
  163. alita_sdk/tools/browser/__init__.py +5 -4
  164. alita_sdk/tools/carrier/__init__.py +5 -6
  165. alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
  166. alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
  167. alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
  168. alita_sdk/tools/chunkers/__init__.py +3 -1
  169. alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
  170. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  171. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  172. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  173. alita_sdk/tools/cloud/aws/__init__.py +10 -7
  174. alita_sdk/tools/cloud/azure/__init__.py +10 -7
  175. alita_sdk/tools/cloud/gcp/__init__.py +10 -7
  176. alita_sdk/tools/cloud/k8s/__init__.py +10 -7
  177. alita_sdk/tools/code/linter/__init__.py +10 -8
  178. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  179. alita_sdk/tools/code/sonar/__init__.py +10 -7
  180. alita_sdk/tools/code_indexer_toolkit.py +73 -23
  181. alita_sdk/tools/confluence/__init__.py +21 -15
  182. alita_sdk/tools/confluence/api_wrapper.py +78 -23
  183. alita_sdk/tools/confluence/loader.py +4 -2
  184. alita_sdk/tools/custom_open_api/__init__.py +12 -5
  185. alita_sdk/tools/elastic/__init__.py +11 -8
  186. alita_sdk/tools/elitea_base.py +493 -30
  187. alita_sdk/tools/figma/__init__.py +58 -11
  188. alita_sdk/tools/figma/api_wrapper.py +1235 -143
  189. alita_sdk/tools/figma/figma_client.py +73 -0
  190. alita_sdk/tools/figma/toon_tools.py +2748 -0
  191. alita_sdk/tools/github/__init__.py +13 -14
  192. alita_sdk/tools/github/github_client.py +224 -100
  193. alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
  194. alita_sdk/tools/github/schemas.py +14 -5
  195. alita_sdk/tools/github/tool.py +5 -1
  196. alita_sdk/tools/github/tool_prompts.py +9 -22
  197. alita_sdk/tools/gitlab/__init__.py +15 -11
  198. alita_sdk/tools/gitlab/api_wrapper.py +207 -41
  199. alita_sdk/tools/gitlab_org/__init__.py +10 -8
  200. alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
  201. alita_sdk/tools/google/bigquery/__init__.py +13 -12
  202. alita_sdk/tools/google/bigquery/tool.py +5 -1
  203. alita_sdk/tools/google_places/__init__.py +10 -8
  204. alita_sdk/tools/google_places/api_wrapper.py +1 -1
  205. alita_sdk/tools/jira/__init__.py +17 -11
  206. alita_sdk/tools/jira/api_wrapper.py +91 -40
  207. alita_sdk/tools/keycloak/__init__.py +11 -8
  208. alita_sdk/tools/localgit/__init__.py +9 -3
  209. alita_sdk/tools/localgit/local_git.py +62 -54
  210. alita_sdk/tools/localgit/tool.py +5 -1
  211. alita_sdk/tools/memory/__init__.py +11 -3
  212. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  213. alita_sdk/tools/ocr/__init__.py +11 -8
  214. alita_sdk/tools/openapi/__init__.py +490 -114
  215. alita_sdk/tools/openapi/api_wrapper.py +1368 -0
  216. alita_sdk/tools/openapi/tool.py +20 -0
  217. alita_sdk/tools/pandas/__init__.py +20 -12
  218. alita_sdk/tools/pandas/api_wrapper.py +38 -25
  219. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  220. alita_sdk/tools/postman/__init__.py +11 -11
  221. alita_sdk/tools/pptx/__init__.py +10 -9
  222. alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
  223. alita_sdk/tools/qtest/__init__.py +30 -10
  224. alita_sdk/tools/qtest/api_wrapper.py +430 -13
  225. alita_sdk/tools/rally/__init__.py +10 -8
  226. alita_sdk/tools/rally/api_wrapper.py +1 -1
  227. alita_sdk/tools/report_portal/__init__.py +12 -9
  228. alita_sdk/tools/salesforce/__init__.py +10 -9
  229. alita_sdk/tools/servicenow/__init__.py +17 -14
  230. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  231. alita_sdk/tools/sharepoint/__init__.py +10 -8
  232. alita_sdk/tools/sharepoint/api_wrapper.py +4 -4
  233. alita_sdk/tools/slack/__init__.py +10 -8
  234. alita_sdk/tools/slack/api_wrapper.py +2 -2
  235. alita_sdk/tools/sql/__init__.py +11 -9
  236. alita_sdk/tools/testio/__init__.py +10 -8
  237. alita_sdk/tools/testrail/__init__.py +11 -8
  238. alita_sdk/tools/testrail/api_wrapper.py +1 -1
  239. alita_sdk/tools/utils/__init__.py +9 -4
  240. alita_sdk/tools/utils/content_parser.py +77 -3
  241. alita_sdk/tools/utils/text_operations.py +410 -0
  242. alita_sdk/tools/utils/tool_prompts.py +79 -0
  243. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
  244. alita_sdk/tools/xray/__init__.py +12 -9
  245. alita_sdk/tools/yagmail/__init__.py +9 -3
  246. alita_sdk/tools/zephyr/__init__.py +9 -7
  247. alita_sdk/tools/zephyr_enterprise/__init__.py +11 -8
  248. alita_sdk/tools/zephyr_essential/__init__.py +10 -8
  249. alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
  250. alita_sdk/tools/zephyr_essential/client.py +2 -2
  251. alita_sdk/tools/zephyr_scale/__init__.py +11 -9
  252. alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
  253. alita_sdk/tools/zephyr_squad/__init__.py +10 -8
  254. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +147 -7
  255. alita_sdk-0.3.627.dist-info/RECORD +468 -0
  256. alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
  257. alita_sdk-0.3.462.dist-info/RECORD +0 -384
  258. alita_sdk-0.3.462.dist-info/entry_points.txt +0 -2
  259. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
  260. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
  261. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import base64
1
2
  import hashlib
2
3
  import io
3
4
  import json
@@ -13,13 +14,262 @@ from pydantic import create_model, Field, model_validator
13
14
 
14
15
  from ...tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
15
16
  from ...tools.utils.available_tools_decorator import extend_with_parent_available_tools
16
- from ...runtime.utils.utils import IndexerKeywords
17
+ from ...tools.elitea_base import extend_with_file_operations, BaseCodeToolApiWrapper
18
+ from ...runtime.utils.utils import IndexerKeywords, resolve_image_from_cache
17
19
 
18
20
 
19
21
  class ArtifactWrapper(NonCodeIndexerToolkit):
20
22
  bucket: str
21
23
  artifact: Optional[Any] = None
22
-
24
+
25
+ # Override file operation methods to support bucket_name parameter
26
+ # (instead of importing from BaseCodeToolApiWrapper which uses 'branch')
27
+
28
+ def read_file_chunk(
29
+ self,
30
+ file_path: str,
31
+ start_line: int,
32
+ end_line: Optional[int] = None,
33
+ bucket_name: str = None
34
+ ) -> str:
35
+ """
36
+ Read a specific range of lines from a file in an artifact bucket.
37
+
38
+ Args:
39
+ file_path: Path to the file to read
40
+ start_line: Starting line number (1-indexed, inclusive)
41
+ end_line: Ending line number (1-indexed, inclusive). If None, reads to end.
42
+ bucket_name: Bucket name. If not provided, uses toolkit-configured default bucket.
43
+
44
+ Returns:
45
+ File content for the specified line range
46
+ """
47
+ from ...tools.utils.text_operations import apply_line_slice
48
+
49
+ # Calculate offset and limit from start_line and end_line
50
+ offset = start_line
51
+ limit = (end_line - start_line + 1) if end_line is not None else None
52
+
53
+ # Read the file with bucket_name support
54
+ content = self._read_file(file_path, branch=None, bucket_name=bucket_name, offset=offset, limit=limit)
55
+
56
+ # Apply client-side slicing if toolkit doesn't support partial reads
57
+ return apply_line_slice(content, offset=offset, limit=limit)
58
+
59
+ def read_multiple_files(
60
+ self,
61
+ file_paths: List[str],
62
+ bucket_name: str = None,
63
+ offset: Optional[int] = None,
64
+ limit: Optional[int] = None
65
+ ) -> dict:
66
+ """
67
+ Read multiple files in batch from an artifact bucket.
68
+
69
+ Args:
70
+ file_paths: List of file paths to read
71
+ bucket_name: Bucket name. If not provided, uses toolkit-configured default bucket.
72
+ offset: Starting line number for all files (1-indexed)
73
+ limit: Number of lines to read from offset for all files
74
+
75
+ Returns:
76
+ Dict mapping file paths to their content
77
+ """
78
+ from ...tools.utils.text_operations import apply_line_slice
79
+
80
+ results = {}
81
+ for path in file_paths:
82
+ try:
83
+ content = self._read_file(path, branch=None, bucket_name=bucket_name, offset=offset, limit=limit)
84
+ results[path] = apply_line_slice(content, offset=offset, limit=limit)
85
+ except Exception as e:
86
+ results[path] = f"Error reading file: {str(e)}"
87
+ return results
88
+
89
+ def search_file(
90
+ self,
91
+ file_path: str,
92
+ pattern: str,
93
+ bucket_name: str = None,
94
+ is_regex: bool = True,
95
+ context_lines: int = 2
96
+ ) -> str:
97
+ """
98
+ Search for a pattern in a file from an artifact bucket.
99
+
100
+ Args:
101
+ file_path: Path to the file to search
102
+ pattern: Search pattern. Treated as regex by default unless is_regex=False.
103
+ bucket_name: Bucket name. If not provided, uses toolkit-configured default bucket.
104
+ is_regex: Whether pattern is a regex. Default is True for flexible matching.
105
+ context_lines: Number of lines before/after match to include for context
106
+
107
+ Returns:
108
+ Formatted string with match results and context
109
+ """
110
+ from ...tools.utils.text_operations import search_in_content
111
+
112
+ content = self._read_file(file_path, branch=None, bucket_name=bucket_name)
113
+ matches = search_in_content(content, pattern, is_regex=is_regex, context_lines=context_lines)
114
+
115
+ if not matches:
116
+ return f"No matches found for pattern '{pattern}' in {file_path}"
117
+
118
+ # Format results
119
+ results = [f"Found {len(matches)} match(es) in {file_path}:\n"]
120
+ for match in matches:
121
+ results.append(f"\n--- Line {match['line_number']} ---")
122
+ if match['context_before']:
123
+ results.append("\n".join(f" {l}" for l in match['context_before']))
124
+ results.append(f"> {match['line_content']}")
125
+ if match['context_after']:
126
+ results.append("\n".join(f" {l}" for l in match['context_after']))
127
+
128
+ return "\n".join(results)
129
+
130
+ def edit_file(
131
+ self,
132
+ file_path: str,
133
+ file_query: str,
134
+ bucket_name: str = None,
135
+ commit_message: str = None
136
+ ) -> str:
137
+ """
138
+ Edit a file in an artifact bucket using OLD/NEW markers.
139
+
140
+ Args:
141
+ file_path: Path to the file to edit. Must be a text file.
142
+ file_query: Edit instructions with OLD/NEW markers.
143
+ bucket_name: Bucket name. If not provided, uses toolkit-configured default bucket.
144
+ commit_message: Not used for artifacts (kept for API consistency)
145
+
146
+ Returns:
147
+ Success message or error description
148
+ """
149
+ from ...tools.utils.text_operations import parse_old_new_markers, is_text_editable, try_apply_edit
150
+ from langchain_core.tools import ToolException
151
+
152
+ # Validate file type
153
+ if not is_text_editable(file_path):
154
+ raise ToolException(f"File '{file_path}' is not a text-editable file type")
155
+
156
+ # Read current content
157
+ content = self._read_file(file_path, branch=None, bucket_name=bucket_name)
158
+
159
+ # Parse edit instructions
160
+ edits = parse_old_new_markers(file_query)
161
+ if not edits:
162
+ raise ToolException("No valid OLD/NEW marker pairs found in edit instructions")
163
+
164
+ # Apply edits
165
+ updated_content = content
166
+ applied_count = 0
167
+ for old_text, new_text in edits:
168
+ updated_content, used_fallback = try_apply_edit(updated_content, old_text, new_text, file_path)
169
+ if updated_content != content or used_fallback:
170
+ applied_count += 1
171
+ content = updated_content
172
+
173
+ if applied_count == 0:
174
+ return f"No edits were applied to {file_path}. The OLD blocks may not match the file content."
175
+
176
+ # Write updated content
177
+ self._write_file(file_path, updated_content, branch=None, commit_message=commit_message, bucket_name=bucket_name)
178
+
179
+ return f"Successfully applied {applied_count} edit(s) to {file_path}"
180
+
181
+ def _get_file_operation_schemas(self):
182
+ """
183
+ Returns custom schemas for file operations that use bucket_name instead of branch.
184
+
185
+ This method is called by the @extend_with_file_operations decorator to get
186
+ toolkit-specific schemas for file operation tools.
187
+ """
188
+ # Artifact-specific schemas with bucket_name instead of branch
189
+ ArtifactReadFileChunkInput = create_model(
190
+ "ArtifactReadFileChunkInput",
191
+ file_path=(str, Field(description="Path to the file to read")),
192
+ bucket_name=(Optional[str], Field(
193
+ description="Bucket name. If not provided, uses toolkit-configured default bucket.",
194
+ default=None
195
+ )),
196
+ start_line=(int, Field(description="Starting line number (1-indexed, inclusive)", ge=1)),
197
+ end_line=(Optional[int], Field(
198
+ description="Ending line number (1-indexed, inclusive). If None, reads to end.",
199
+ default=None,
200
+ ge=1
201
+ )),
202
+ )
203
+
204
+ ArtifactReadMultipleFilesInput = create_model(
205
+ "ArtifactReadMultipleFilesInput",
206
+ file_paths=(List[str], Field(description="List of file paths to read", min_length=1)),
207
+ bucket_name=(Optional[str], Field(
208
+ description="Bucket name. If not provided, uses toolkit-configured default bucket.",
209
+ default=None
210
+ )),
211
+ offset=(Optional[int], Field(
212
+ description="Starting line number for all files (1-indexed)",
213
+ default=None,
214
+ ge=1
215
+ )),
216
+ limit=(Optional[int], Field(
217
+ description="Number of lines to read from offset for all files",
218
+ default=None,
219
+ ge=1
220
+ )),
221
+ )
222
+
223
+ ArtifactSearchFileInput = create_model(
224
+ "ArtifactSearchFileInput",
225
+ file_path=(str, Field(description="Path to the file to search")),
226
+ pattern=(str, Field(description="Search pattern. Treated as regex by default unless is_regex=False.")),
227
+ bucket_name=(Optional[str], Field(
228
+ description="Bucket name. If not provided, uses toolkit-configured default bucket.",
229
+ default=None
230
+ )),
231
+ is_regex=(bool, Field(
232
+ description="Whether pattern is a regex. Default is True for flexible matching.",
233
+ default=True
234
+ )),
235
+ context_lines=(int, Field(
236
+ description="Number of lines before/after match to include for context",
237
+ default=2,
238
+ ge=0
239
+ )),
240
+ )
241
+
242
+ ArtifactEditFileInput = create_model(
243
+ "ArtifactEditFileInput",
244
+ file_path=(str, Field(
245
+ description="Path to the file to edit. Must be a text file (markdown, txt, csv, json, xml, html, yaml, etc.)"
246
+ )),
247
+ file_query=(str, Field(description="""Edit instructions with OLD/NEW markers. Format:
248
+ OLD <<<<
249
+ old content to replace
250
+ >>>> OLD
251
+ NEW <<<<
252
+ new content
253
+ >>>> NEW
254
+
255
+ Multiple OLD/NEW pairs can be provided for multiple edits.""")),
256
+ bucket_name=(Optional[str], Field(
257
+ description="Bucket name. If not provided, uses toolkit-configured default bucket.",
258
+ default=None
259
+ )),
260
+ commit_message=(Optional[str], Field(
261
+ description="Not used for artifacts (kept for API consistency)",
262
+ default=None
263
+ )),
264
+ )
265
+
266
+ return {
267
+ "read_file_chunk": ArtifactReadFileChunkInput,
268
+ "read_multiple_files": ArtifactReadMultipleFilesInput,
269
+ "search_file": ArtifactSearchFileInput,
270
+ "edit_file": ArtifactEditFileInput,
271
+ }
272
+
23
273
  @model_validator(mode='before')
24
274
  @classmethod
25
275
  def validate_toolkit(cls, values):
@@ -31,20 +281,46 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
31
281
  return super().validate_toolkit(values)
32
282
 
33
283
  def list_files(self, bucket_name = None, return_as_string = True):
34
- return self.artifact.list(bucket_name, return_as_string)
284
+ """List all files in the artifact bucket with API download links."""
285
+ result = self.artifact.list(bucket_name, return_as_string=False)
286
+
287
+ # Add API download link to each file
288
+ if isinstance(result, dict) and 'rows' in result:
289
+ bucket = bucket_name or self.bucket
290
+
291
+ # Get base_url and project_id from alita client
292
+ base_url = getattr(self.alita, 'base_url', '').rstrip('/')
293
+ project_id = getattr(self.alita, 'project_id', '')
294
+
295
+ for file_info in result['rows']:
296
+ if 'name' in file_info:
297
+ # Generate API download link
298
+ file_name = file_info['name']
299
+ file_info['link'] = f"{base_url}/api/v2/artifacts/artifact/default/{project_id}/{bucket}/{file_name}"
300
+
301
+ return str(result) if return_as_string else result
35
302
 
36
303
  def create_file(self, filename: str, filedata: str, bucket_name = None):
37
- if filename.endswith(".xlsx"):
304
+ # Sanitize filename to prevent regex errors during indexing
305
+ sanitized_filename, was_modified = self._sanitize_filename(filename)
306
+ if was_modified:
307
+ logging.warning(f"Filename sanitized: '{filename}' -> '{sanitized_filename}'")
308
+
309
+ # Auto-detect and extract base64 from image_url structures (from image_generation tool)
310
+ # Returns tuple: (processed_data, is_from_image_generation)
311
+ filedata, is_from_image_generation = self._extract_base64_if_needed(filedata)
312
+
313
+ if sanitized_filename.endswith(".xlsx"):
38
314
  data = json.loads(filedata)
39
315
  filedata = self.create_xlsx_filedata(data)
40
316
 
41
- result = self.artifact.create(filename, filedata, bucket_name)
317
+ result = self.artifact.create(sanitized_filename, filedata, bucket_name)
42
318
 
43
- # Dispatch custom event for file creation
44
- self._log_tool_event(
45
- tool_name="file_modified",
46
- message="""
47
- {
319
+ # Skip file_modified event for images from image_generation tool
320
+ # These are already tracked in the tool output and don't need duplicate events
321
+ if not is_from_image_generation:
322
+ # Dispatch custom event for file creation
323
+ dispatch_custom_event("file_modified", {
48
324
  "message": f"File '{filename}' created successfully",
49
325
  "filename": filename,
50
326
  "tool_name": "createFile",
@@ -53,9 +329,73 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
53
329
  "meta": {
54
330
  "bucket": bucket_name or self.bucket
55
331
  }
56
- }""")
332
+ })
57
333
 
58
334
  return result
335
+
336
+ @staticmethod
337
+ def _sanitize_filename(filename: str) -> tuple:
338
+ """Sanitize filename for safe storage and regex pattern matching."""
339
+ from pathlib import Path
340
+
341
+ if not filename or not filename.strip():
342
+ return "unnamed_file", True
343
+
344
+ original = filename
345
+ path_obj = Path(filename)
346
+ name = path_obj.stem
347
+ extension = path_obj.suffix
348
+
349
+ # Whitelist: alphanumeric, underscore, hyphen, space, Unicode letters/digits
350
+ sanitized_name = re.sub(r'[^\w\s-]', '', name, flags=re.UNICODE)
351
+ sanitized_name = re.sub(r'[-\s]+', '-', sanitized_name)
352
+ sanitized_name = sanitized_name.strip('-').strip()
353
+
354
+ if not sanitized_name:
355
+ sanitized_name = "file"
356
+
357
+ if extension:
358
+ extension = re.sub(r'[^\w.-]', '', extension, flags=re.UNICODE)
359
+
360
+ sanitized = sanitized_name + extension
361
+ return sanitized, (sanitized != original)
362
+
363
+ def _extract_base64_if_needed(self, filedata: str) -> tuple[str | bytes, bool]:
364
+ """
365
+ Resolve cached_image_id references from cache and decode to binary data.
366
+
367
+ Requires JSON format with cached_image_id field: {"cached_image_id": "img_xxx"}
368
+ LLM must extract specific cached_image_id from generate_image response.
369
+
370
+ Returns:
371
+ tuple: (processed_data, is_from_image_generation)
372
+ - processed_data: Original filedata or resolved binary image data
373
+ - is_from_image_generation: True if data came from image_generation cache
374
+ """
375
+ if not filedata or not isinstance(filedata, str):
376
+ return filedata, False
377
+
378
+ # Require JSON format - fail fast if not JSON
379
+ if '{' not in filedata:
380
+ return filedata, False
381
+
382
+ try:
383
+ data = json.loads(filedata)
384
+ except json.JSONDecodeError:
385
+ # Not valid JSON, return as-is (regular file content)
386
+ return filedata, False
387
+
388
+ if not isinstance(data, dict):
389
+ return filedata, False
390
+
391
+ # Only accept direct cached_image_id format: {"cached_image_id": "img_xxx"}
392
+ # LLM must parse generate_image response and extract specific cached_image_id
393
+ if 'cached_image_id' in data:
394
+ binary_data = resolve_image_from_cache(self.alita, data['cached_image_id'])
395
+ return binary_data, True # Mark as from image_generation
396
+
397
+ # If JSON doesn't have cached_image_id, treat as regular file content
398
+ return filedata, False
59
399
 
60
400
  def create_xlsx_filedata(self, data: dict[str, list[list]]) -> bytes:
61
401
  try:
@@ -99,9 +439,112 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
99
439
  sheet_name=sheet_name,
100
440
  excel_by_sheets=excel_by_sheets,
101
441
  llm=self.llm)
442
+
443
+ def _read_file(
444
+ self,
445
+ file_path: str,
446
+ branch: str = None,
447
+ bucket_name: str = None,
448
+ **kwargs
449
+ ) -> str:
450
+ """
451
+ Read a file from artifact bucket with optional partial read support.
452
+
453
+ Parameters:
454
+ file_path: Name of the file in the bucket
455
+ branch: Not used for artifacts (kept for API consistency)
456
+ bucket_name: Name of the bucket (uses default if None)
457
+ **kwargs: Additional parameters (offset, limit, head, tail) - currently ignored,
458
+ partial read handled client-side by base class methods
459
+
460
+ Returns:
461
+ File content as string
462
+ """
463
+ return self.read_file(filename=file_path, bucket_name=bucket_name)
464
+
465
+ def _write_file(
466
+ self,
467
+ file_path: str,
468
+ content: str,
469
+ branch: str = None,
470
+ commit_message: str = None,
471
+ bucket_name: str = None
472
+ ) -> str:
473
+ """
474
+ Write content to a file (create or overwrite).
475
+
476
+ Parameters:
477
+ file_path: Name of the file in the bucket
478
+ content: New file content
479
+ branch: Not used for artifacts (kept for API consistency)
480
+ commit_message: Not used for artifacts (kept for API consistency)
481
+ bucket_name: Name of the bucket (uses default if None)
482
+
483
+ Returns:
484
+ Success message
485
+ """
486
+ try:
487
+ # Sanitize filename
488
+ sanitized_filename, was_modified = self._sanitize_filename(file_path)
489
+ if was_modified:
490
+ logging.warning(f"Filename sanitized: '{file_path}' -> '{sanitized_filename}'")
491
+
492
+ # Check if file exists
493
+ try:
494
+ self.artifact.get(artifact_name=sanitized_filename, bucket_name=bucket_name, llm=self.llm)
495
+ # File exists, overwrite it
496
+ result = self.artifact.overwrite(sanitized_filename, content, bucket_name)
497
+
498
+ # Dispatch custom event
499
+ dispatch_custom_event("file_modified", {
500
+ "message": f"File '{sanitized_filename}' updated successfully",
501
+ "filename": sanitized_filename,
502
+ "tool_name": "edit_file",
503
+ "toolkit": "artifact",
504
+ "operation_type": "modify",
505
+ "meta": {
506
+ "bucket": bucket_name or self.bucket
507
+ }
508
+ })
509
+
510
+ return f"Updated file {sanitized_filename}"
511
+ except:
512
+ # File doesn't exist, create it
513
+ result = self.artifact.create(sanitized_filename, content, bucket_name)
514
+
515
+ # Dispatch custom event
516
+ dispatch_custom_event("file_modified", {
517
+ "message": f"File '{sanitized_filename}' created successfully",
518
+ "filename": sanitized_filename,
519
+ "tool_name": "edit_file",
520
+ "toolkit": "artifact",
521
+ "operation_type": "create",
522
+ "meta": {
523
+ "bucket": bucket_name or self.bucket
524
+ }
525
+ })
526
+
527
+ return f"Created file {sanitized_filename}"
528
+ except Exception as e:
529
+ raise ToolException(f"Unable to write file {file_path}: {str(e)}")
102
530
 
103
531
  def delete_file(self, filename: str, bucket_name = None):
104
- return self.artifact.delete(filename, bucket_name)
532
+ # Check if file exists before attempting deletion
533
+ # S3/MinIO delete is idempotent and won't fail for non-existing files
534
+ try:
535
+ files = self.list_files(bucket_name, return_as_string=False)
536
+ file_names = [f['name'] for f in files.get('rows', [])]
537
+ if filename not in file_names:
538
+ raise ToolException(f'Error (deleteFile): ENOENT: no such file or directory: \'{filename}\'')
539
+ except ToolException:
540
+ raise
541
+ except Exception as e:
542
+ raise ToolException(f'Error (deleteFile): Unable to verify file existence for \'{filename}\': {str(e)}')
543
+
544
+ result = self.artifact.delete(filename, bucket_name)
545
+ if result and isinstance(result, dict) and result.get('error'):
546
+ raise ToolException(f'Error (deleteFile): {result.get("error")} for file \'{filename}\'')
547
+ return f'File "{filename}" deleted successfully.'
105
548
 
106
549
  def append_data(self, filename: str, filedata: str, bucket_name = None):
107
550
  result = self.artifact.append(filename, filedata, bucket_name)
@@ -138,7 +581,11 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
138
581
  return result
139
582
 
140
583
  def create_new_bucket(self, bucket_name: str, expiration_measure = "weeks", expiration_value = 1):
141
- return self.artifact.client.create_bucket(bucket_name, expiration_measure, expiration_value)
584
+ # Sanitize bucket name: replace underscores with hyphens and ensure lowercase
585
+ sanitized_name = bucket_name.replace('_', '-').lower()
586
+ if sanitized_name != bucket_name:
587
+ logging.warning(f"Bucket name '{bucket_name}' was sanitized to '{sanitized_name}' (underscores replaced with hyphens, converted to lowercase)")
588
+ return self.artifact.client.create_bucket(sanitized_name, expiration_measure, expiration_value)
142
589
 
143
590
  def _index_tool_params(self):
144
591
  return {
@@ -160,6 +607,21 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
160
607
 
161
608
  include_extensions = kwargs.get('include_extensions', [])
162
609
  skip_extensions = kwargs.get('skip_extensions', [])
610
+ chunking_config = kwargs.get('chunking_config', {})
611
+
612
+ # Auto-include extensions from chunking_config if include_extensions is specified
613
+ # This allows chunking config to work without manually adding extensions to include_extensions
614
+ if chunking_config and include_extensions:
615
+ for ext_pattern in chunking_config.keys():
616
+ # Normalize extension pattern (both ".cbl" and "*.cbl" should work)
617
+ normalized = ext_pattern if ext_pattern.startswith('*') else f'*{ext_pattern}'
618
+ if normalized not in include_extensions:
619
+ include_extensions.append(normalized)
620
+ self._log_tool_event(
621
+ message=f"Auto-included extension '{normalized}' from chunking_config",
622
+ tool_name="loader"
623
+ )
624
+
163
625
  self._log_tool_event(message=f"Files filtering started. Include extensions: {include_extensions}. "
164
626
  f"Skip extensions: {skip_extensions}", tool_name="loader")
165
627
  # show the progress of filtering
@@ -173,13 +635,13 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
173
635
  file_name = file['name']
174
636
 
175
637
  # Check if file should be skipped based on skip_extensions
176
- if any(re.match(pattern.replace('*', '.*') + '$', file_name, re.IGNORECASE)
638
+ if any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
177
639
  for pattern in skip_extensions):
178
640
  continue
179
641
 
180
642
  # Check if file should be included based on include_extensions
181
643
  # If include_extensions is empty, process all files (that weren't skipped)
182
- if include_extensions and not (any(re.match(pattern.replace('*', '.*') + '$', file_name, re.IGNORECASE)
644
+ if include_extensions and not (any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
183
645
  for pattern in include_extensions)):
184
646
  continue
185
647
 
@@ -207,14 +669,17 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
207
669
  logging.error(f"Failed while parsing the file '{document.metadata['name']}': {e}")
208
670
  yield document
209
671
 
210
- @extend_with_parent_available_tools
672
+ @extend_with_file_operations
211
673
  def get_available_tools(self):
674
+ """Get available tools. Returns all tools for schema; filtering happens at toolkit level."""
212
675
  bucket_name = (Optional[str], Field(description="Name of the bucket to work with."
213
676
  "If bucket is not specified by user directly, the name should be taken from chat history."
214
677
  "If bucket never mentioned in chat, the name will be taken from tool configuration."
215
678
  " ***IMPORTANT*** Underscore `_` is prohibited in bucket name and should be replaced by `-`",
216
679
  default=None))
217
- return [
680
+
681
+ # Basic artifact tools (always available)
682
+ basic_tools = [
218
683
  {
219
684
  "ref": self.list_files,
220
685
  "name": "listFiles",
@@ -229,15 +694,19 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
229
694
  "createFile",
230
695
  filename=(str, Field(description="Filename")),
231
696
  filedata=(str, Field(description="""Stringified content of the file.
232
- Example for .xlsx filedata format:
233
- {
234
- "Sheet1":[
235
- ["Name", "Age", "City"],
236
- ["Alice", 25, "New York"],
237
- ["Bob", 30, "San Francisco"],
238
- ["Charlie", 35, "Los Angeles"]
239
- ]
240
- }
697
+
698
+ Supports three input formats:
699
+
700
+ 1. CACHED IMAGE REFERENCE (for generated/cached images):
701
+ Pass JSON with cached_image_id field: {"cached_image_id": "img_xxx"}
702
+ The tool will automatically resolve and decode the image from cache.
703
+ This is typically used when another tool returns an image reference.
704
+
705
+ 2. EXCEL FILES (.xlsx extension):
706
+ Pass JSON with sheet structure: {"Sheet1": [["Name", "Age"], ["Alice", 25], ["Bob", 30]]}
707
+
708
+ 3. TEXT/OTHER FILES:
709
+ Pass the plain text string directly.
241
710
  """)),
242
711
  bucket_name=bucket_name
243
712
  )
@@ -299,11 +768,25 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
299
768
  "description": "Creates new bucket specified by user.",
300
769
  "args_schema": create_model(
301
770
  "createNewBucket",
302
- bucket_name=(str, Field(description="Bucket name to create. ***IMPORTANT*** Underscore `_` is prohibited in bucket name and should be replaced by `-`.")),
771
+ bucket_name=(str, Field(
772
+ description="Bucket name to create. Must start with lowercase letter and contain only lowercase letters, numbers, and hyphens. Underscores will be automatically converted to hyphens.",
773
+ pattern=r'^[a-z][a-z0-9_-]*$' # Allow underscores in input, will be sanitized
774
+ )),
303
775
  expiration_measure=(Optional[str], Field(description="Measure of expiration time for bucket configuration."
304
776
  "Possible values: `days`, `weeks`, `months`, `years`.",
305
777
  default="weeks")),
306
778
  expiration_value=(Optional[int], Field(description="Expiration time values.", default=1))
307
779
  )
308
780
  }
309
- ]
781
+ ]
782
+
783
+ # Always include indexing tools in available tools list
784
+ # Filtering based on vector store config happens at toolkit level via decorator
785
+ try:
786
+ # Get indexing tools from parent class
787
+ indexing_tools = super(ArtifactWrapper, self).get_available_tools()
788
+ return indexing_tools + basic_tools
789
+ except Exception as e:
790
+ # If getting parent tools fails, log warning and return basic tools only
791
+ logging.warning(f"Failed to load indexing tools: {e}. Only basic artifact tools will be available.")
792
+ return basic_tools