alita-sdk 0.3.257__py3-none-any.whl → 0.3.562__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +155 -0
  6. alita_sdk/cli/agent_loader.py +215 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3601 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1073 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/toolkit.py +327 -0
  23. alita_sdk/cli/toolkit_loader.py +85 -0
  24. alita_sdk/cli/tools/__init__.py +43 -0
  25. alita_sdk/cli/tools/approval.py +224 -0
  26. alita_sdk/cli/tools/filesystem.py +1751 -0
  27. alita_sdk/cli/tools/planning.py +389 -0
  28. alita_sdk/cli/tools/terminal.py +414 -0
  29. alita_sdk/community/__init__.py +72 -12
  30. alita_sdk/community/inventory/__init__.py +236 -0
  31. alita_sdk/community/inventory/config.py +257 -0
  32. alita_sdk/community/inventory/enrichment.py +2137 -0
  33. alita_sdk/community/inventory/extractors.py +1469 -0
  34. alita_sdk/community/inventory/ingestion.py +3172 -0
  35. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  36. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  37. alita_sdk/community/inventory/parsers/base.py +295 -0
  38. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  39. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  40. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  41. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  42. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  43. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  44. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  45. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  46. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  47. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  48. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  49. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  50. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  51. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  52. alita_sdk/community/inventory/patterns/loader.py +348 -0
  53. alita_sdk/community/inventory/patterns/registry.py +198 -0
  54. alita_sdk/community/inventory/presets.py +535 -0
  55. alita_sdk/community/inventory/retrieval.py +1403 -0
  56. alita_sdk/community/inventory/toolkit.py +173 -0
  57. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  58. alita_sdk/community/inventory/visualize.py +1370 -0
  59. alita_sdk/configurations/__init__.py +11 -0
  60. alita_sdk/configurations/ado.py +148 -2
  61. alita_sdk/configurations/azure_search.py +1 -1
  62. alita_sdk/configurations/bigquery.py +1 -1
  63. alita_sdk/configurations/bitbucket.py +94 -2
  64. alita_sdk/configurations/browser.py +18 -0
  65. alita_sdk/configurations/carrier.py +19 -0
  66. alita_sdk/configurations/confluence.py +130 -1
  67. alita_sdk/configurations/delta_lake.py +1 -1
  68. alita_sdk/configurations/figma.py +76 -5
  69. alita_sdk/configurations/github.py +65 -1
  70. alita_sdk/configurations/gitlab.py +81 -0
  71. alita_sdk/configurations/google_places.py +17 -0
  72. alita_sdk/configurations/jira.py +103 -0
  73. alita_sdk/configurations/openapi.py +111 -0
  74. alita_sdk/configurations/postman.py +1 -1
  75. alita_sdk/configurations/qtest.py +72 -3
  76. alita_sdk/configurations/report_portal.py +115 -0
  77. alita_sdk/configurations/salesforce.py +19 -0
  78. alita_sdk/configurations/service_now.py +1 -12
  79. alita_sdk/configurations/sharepoint.py +167 -0
  80. alita_sdk/configurations/sonar.py +18 -0
  81. alita_sdk/configurations/sql.py +20 -0
  82. alita_sdk/configurations/testio.py +101 -0
  83. alita_sdk/configurations/testrail.py +88 -0
  84. alita_sdk/configurations/xray.py +94 -1
  85. alita_sdk/configurations/zephyr_enterprise.py +94 -1
  86. alita_sdk/configurations/zephyr_essential.py +95 -0
  87. alita_sdk/runtime/clients/artifact.py +21 -4
  88. alita_sdk/runtime/clients/client.py +458 -67
  89. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  90. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  91. alita_sdk/runtime/clients/sandbox_client.py +352 -0
  92. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  93. alita_sdk/runtime/langchain/assistant.py +183 -43
  94. alita_sdk/runtime/langchain/constants.py +647 -1
  95. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  96. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
  97. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
  98. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  99. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
  100. alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
  101. alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
  102. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
  103. alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
  104. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
  105. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
  106. alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
  107. alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
  108. alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
  109. alita_sdk/runtime/langchain/langraph_agent.py +407 -92
  110. alita_sdk/runtime/langchain/utils.py +102 -8
  111. alita_sdk/runtime/llms/preloaded.py +2 -6
  112. alita_sdk/runtime/models/mcp_models.py +61 -0
  113. alita_sdk/runtime/skills/__init__.py +91 -0
  114. alita_sdk/runtime/skills/callbacks.py +498 -0
  115. alita_sdk/runtime/skills/discovery.py +540 -0
  116. alita_sdk/runtime/skills/executor.py +610 -0
  117. alita_sdk/runtime/skills/input_builder.py +371 -0
  118. alita_sdk/runtime/skills/models.py +330 -0
  119. alita_sdk/runtime/skills/registry.py +355 -0
  120. alita_sdk/runtime/skills/skill_runner.py +330 -0
  121. alita_sdk/runtime/toolkits/__init__.py +28 -0
  122. alita_sdk/runtime/toolkits/application.py +14 -4
  123. alita_sdk/runtime/toolkits/artifact.py +24 -9
  124. alita_sdk/runtime/toolkits/datasource.py +13 -6
  125. alita_sdk/runtime/toolkits/mcp.py +780 -0
  126. alita_sdk/runtime/toolkits/planning.py +178 -0
  127. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  128. alita_sdk/runtime/toolkits/subgraph.py +11 -6
  129. alita_sdk/runtime/toolkits/tools.py +314 -70
  130. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  131. alita_sdk/runtime/tools/__init__.py +24 -0
  132. alita_sdk/runtime/tools/application.py +16 -4
  133. alita_sdk/runtime/tools/artifact.py +367 -33
  134. alita_sdk/runtime/tools/data_analysis.py +183 -0
  135. alita_sdk/runtime/tools/function.py +100 -4
  136. alita_sdk/runtime/tools/graph.py +81 -0
  137. alita_sdk/runtime/tools/image_generation.py +218 -0
  138. alita_sdk/runtime/tools/llm.py +1013 -177
  139. alita_sdk/runtime/tools/loop.py +3 -1
  140. alita_sdk/runtime/tools/loop_output.py +3 -1
  141. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  142. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  143. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  144. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  145. alita_sdk/runtime/tools/planning/models.py +246 -0
  146. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  147. alita_sdk/runtime/tools/router.py +2 -1
  148. alita_sdk/runtime/tools/sandbox.py +375 -0
  149. alita_sdk/runtime/tools/skill_router.py +776 -0
  150. alita_sdk/runtime/tools/tool.py +3 -1
  151. alita_sdk/runtime/tools/vectorstore.py +69 -65
  152. alita_sdk/runtime/tools/vectorstore_base.py +163 -90
  153. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  154. alita_sdk/runtime/utils/mcp_client.py +492 -0
  155. alita_sdk/runtime/utils/mcp_oauth.py +361 -0
  156. alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
  157. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  158. alita_sdk/runtime/utils/streamlit.py +41 -14
  159. alita_sdk/runtime/utils/toolkit_utils.py +28 -9
  160. alita_sdk/runtime/utils/utils.py +48 -0
  161. alita_sdk/tools/__init__.py +135 -37
  162. alita_sdk/tools/ado/__init__.py +2 -2
  163. alita_sdk/tools/ado/repos/__init__.py +15 -19
  164. alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
  165. alita_sdk/tools/ado/test_plan/__init__.py +26 -8
  166. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
  167. alita_sdk/tools/ado/wiki/__init__.py +27 -12
  168. alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
  169. alita_sdk/tools/ado/work_item/__init__.py +27 -12
  170. alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
  171. alita_sdk/tools/advanced_jira_mining/__init__.py +12 -8
  172. alita_sdk/tools/aws/delta_lake/__init__.py +14 -11
  173. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  174. alita_sdk/tools/azure_ai/search/__init__.py +13 -8
  175. alita_sdk/tools/base/tool.py +5 -1
  176. alita_sdk/tools/base_indexer_toolkit.py +454 -110
  177. alita_sdk/tools/bitbucket/__init__.py +27 -19
  178. alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
  179. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
  180. alita_sdk/tools/browser/__init__.py +41 -16
  181. alita_sdk/tools/browser/crawler.py +3 -1
  182. alita_sdk/tools/browser/utils.py +15 -6
  183. alita_sdk/tools/carrier/__init__.py +18 -17
  184. alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
  185. alita_sdk/tools/carrier/excel_reporter.py +8 -4
  186. alita_sdk/tools/chunkers/__init__.py +3 -1
  187. alita_sdk/tools/chunkers/code/codeparser.py +1 -1
  188. alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
  189. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  190. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  191. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  192. alita_sdk/tools/cloud/aws/__init__.py +11 -7
  193. alita_sdk/tools/cloud/azure/__init__.py +11 -7
  194. alita_sdk/tools/cloud/gcp/__init__.py +11 -7
  195. alita_sdk/tools/cloud/k8s/__init__.py +11 -7
  196. alita_sdk/tools/code/linter/__init__.py +9 -8
  197. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  198. alita_sdk/tools/code/sonar/__init__.py +20 -13
  199. alita_sdk/tools/code_indexer_toolkit.py +199 -0
  200. alita_sdk/tools/confluence/__init__.py +21 -14
  201. alita_sdk/tools/confluence/api_wrapper.py +197 -58
  202. alita_sdk/tools/confluence/loader.py +14 -2
  203. alita_sdk/tools/custom_open_api/__init__.py +11 -5
  204. alita_sdk/tools/elastic/__init__.py +10 -8
  205. alita_sdk/tools/elitea_base.py +546 -64
  206. alita_sdk/tools/figma/__init__.py +11 -8
  207. alita_sdk/tools/figma/api_wrapper.py +352 -153
  208. alita_sdk/tools/github/__init__.py +17 -17
  209. alita_sdk/tools/github/api_wrapper.py +9 -26
  210. alita_sdk/tools/github/github_client.py +81 -12
  211. alita_sdk/tools/github/schemas.py +2 -1
  212. alita_sdk/tools/github/tool.py +5 -1
  213. alita_sdk/tools/gitlab/__init__.py +18 -13
  214. alita_sdk/tools/gitlab/api_wrapper.py +224 -80
  215. alita_sdk/tools/gitlab_org/__init__.py +13 -10
  216. alita_sdk/tools/google/bigquery/__init__.py +13 -13
  217. alita_sdk/tools/google/bigquery/tool.py +5 -1
  218. alita_sdk/tools/google_places/__init__.py +20 -11
  219. alita_sdk/tools/jira/__init__.py +21 -11
  220. alita_sdk/tools/jira/api_wrapper.py +315 -168
  221. alita_sdk/tools/keycloak/__init__.py +10 -8
  222. alita_sdk/tools/localgit/__init__.py +8 -3
  223. alita_sdk/tools/localgit/local_git.py +62 -54
  224. alita_sdk/tools/localgit/tool.py +5 -1
  225. alita_sdk/tools/memory/__init__.py +38 -14
  226. alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
  227. alita_sdk/tools/ocr/__init__.py +10 -8
  228. alita_sdk/tools/openapi/__init__.py +281 -108
  229. alita_sdk/tools/openapi/api_wrapper.py +883 -0
  230. alita_sdk/tools/openapi/tool.py +20 -0
  231. alita_sdk/tools/pandas/__init__.py +18 -11
  232. alita_sdk/tools/pandas/api_wrapper.py +40 -45
  233. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  234. alita_sdk/tools/postman/__init__.py +10 -11
  235. alita_sdk/tools/postman/api_wrapper.py +19 -8
  236. alita_sdk/tools/postman/postman_analysis.py +8 -1
  237. alita_sdk/tools/pptx/__init__.py +10 -10
  238. alita_sdk/tools/qtest/__init__.py +21 -14
  239. alita_sdk/tools/qtest/api_wrapper.py +1784 -88
  240. alita_sdk/tools/rally/__init__.py +12 -10
  241. alita_sdk/tools/report_portal/__init__.py +22 -16
  242. alita_sdk/tools/salesforce/__init__.py +21 -16
  243. alita_sdk/tools/servicenow/__init__.py +20 -16
  244. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  245. alita_sdk/tools/sharepoint/__init__.py +16 -14
  246. alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
  247. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  248. alita_sdk/tools/sharepoint/utils.py +8 -2
  249. alita_sdk/tools/slack/__init__.py +11 -7
  250. alita_sdk/tools/sql/__init__.py +21 -19
  251. alita_sdk/tools/sql/api_wrapper.py +71 -23
  252. alita_sdk/tools/testio/__init__.py +20 -13
  253. alita_sdk/tools/testrail/__init__.py +12 -11
  254. alita_sdk/tools/testrail/api_wrapper.py +214 -46
  255. alita_sdk/tools/utils/__init__.py +28 -4
  256. alita_sdk/tools/utils/content_parser.py +182 -62
  257. alita_sdk/tools/utils/text_operations.py +254 -0
  258. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
  259. alita_sdk/tools/xray/__init__.py +17 -14
  260. alita_sdk/tools/xray/api_wrapper.py +58 -113
  261. alita_sdk/tools/yagmail/__init__.py +8 -3
  262. alita_sdk/tools/zephyr/__init__.py +11 -7
  263. alita_sdk/tools/zephyr_enterprise/__init__.py +15 -9
  264. alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
  265. alita_sdk/tools/zephyr_essential/__init__.py +15 -10
  266. alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
  267. alita_sdk/tools/zephyr_essential/client.py +6 -4
  268. alita_sdk/tools/zephyr_scale/__init__.py +12 -8
  269. alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
  270. alita_sdk/tools/zephyr_squad/__init__.py +11 -7
  271. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/METADATA +184 -37
  272. alita_sdk-0.3.562.dist-info/RECORD +450 -0
  273. alita_sdk-0.3.562.dist-info/entry_points.txt +2 -0
  274. alita_sdk/tools/bitbucket/tools.py +0 -304
  275. alita_sdk-0.3.257.dist-info/RECORD +0 -343
  276. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/WHEEL +0 -0
  277. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/licenses/LICENSE +0 -0
  278. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/top_level.txt +0 -0
@@ -1,23 +1,33 @@
1
+ import base64
1
2
  import hashlib
3
+ import io
2
4
  import json
3
5
  import logging
6
+ import re
4
7
  from typing import Any, Optional, Generator, List
5
8
 
9
+ from langchain_core.callbacks import dispatch_custom_event
6
10
  from langchain_core.documents import Document
7
11
  from langchain_core.tools import ToolException
12
+ from openpyxl.workbook.workbook import Workbook
8
13
  from pydantic import create_model, Field, model_validator
9
14
 
10
- from alita_sdk.tools.elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
15
+ from ...tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
16
+ from ...tools.utils.available_tools_decorator import extend_with_parent_available_tools
17
+ from ...tools.elitea_base import extend_with_file_operations, BaseCodeToolApiWrapper
18
+ from ...runtime.utils.utils import IndexerKeywords, resolve_image_from_cache
11
19
 
12
- try:
13
- from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
14
- except ImportError:
15
- from alita_sdk.langchain.interfaces.llm_processor import get_embeddings
16
20
 
17
- class ArtifactWrapper(BaseVectorStoreToolApiWrapper):
21
+ class ArtifactWrapper(NonCodeIndexerToolkit):
18
22
  bucket: str
19
23
  artifact: Optional[Any] = None
20
24
 
25
+ # Import file operation methods from BaseCodeToolApiWrapper
26
+ read_file_chunk = BaseCodeToolApiWrapper.read_file_chunk
27
+ read_multiple_files = BaseCodeToolApiWrapper.read_multiple_files
28
+ search_file = BaseCodeToolApiWrapper.search_file
29
+ edit_file = BaseCodeToolApiWrapper.edit_file
30
+
21
31
  @model_validator(mode='before')
22
32
  @classmethod
23
33
  def validate_toolkit(cls, values):
@@ -26,13 +36,152 @@ class ArtifactWrapper(BaseVectorStoreToolApiWrapper):
26
36
  if not values.get('bucket'):
27
37
  raise ValueError("Bucket is required.")
28
38
  values["artifact"] = values['alita'].artifact(values['bucket'])
29
- return values
39
+ return super().validate_toolkit(values)
30
40
 
31
41
  def list_files(self, bucket_name = None, return_as_string = True):
32
- return self.artifact.list(bucket_name, return_as_string)
42
+ """List all files in the artifact bucket with API download links."""
43
+ result = self.artifact.list(bucket_name, return_as_string=False)
44
+
45
+ # Add API download link to each file
46
+ if isinstance(result, dict) and 'rows' in result:
47
+ bucket = bucket_name or self.bucket
48
+
49
+ # Get base_url and project_id from alita client
50
+ base_url = getattr(self.alita, 'base_url', '').rstrip('/')
51
+ project_id = getattr(self.alita, 'project_id', '')
52
+
53
+ for file_info in result['rows']:
54
+ if 'name' in file_info:
55
+ # Generate API download link
56
+ file_name = file_info['name']
57
+ file_info['link'] = f"{base_url}/api/v2/artifacts/artifact/default/{project_id}/{bucket}/{file_name}"
58
+
59
+ return str(result) if return_as_string else result
33
60
 
34
61
  def create_file(self, filename: str, filedata: str, bucket_name = None):
35
- return self.artifact.create(filename, filedata, bucket_name)
62
+ # Sanitize filename to prevent regex errors during indexing
63
+ sanitized_filename, was_modified = self._sanitize_filename(filename)
64
+ if was_modified:
65
+ logging.warning(f"Filename sanitized: '{filename}' -> '{sanitized_filename}'")
66
+
67
+ # Auto-detect and extract base64 from image_url structures (from image_generation tool)
68
+ # Returns tuple: (processed_data, is_from_image_generation)
69
+ filedata, is_from_image_generation = self._extract_base64_if_needed(filedata)
70
+
71
+ if sanitized_filename.endswith(".xlsx"):
72
+ data = json.loads(filedata)
73
+ filedata = self.create_xlsx_filedata(data)
74
+
75
+ result = self.artifact.create(sanitized_filename, filedata, bucket_name)
76
+
77
+ # Skip file_modified event for images from image_generation tool
78
+ # These are already tracked in the tool output and don't need duplicate events
79
+ if not is_from_image_generation:
80
+ # Dispatch custom event for file creation
81
+ dispatch_custom_event("file_modified", {
82
+ "message": f"File '{filename}' created successfully",
83
+ "filename": filename,
84
+ "tool_name": "createFile",
85
+ "toolkit": "artifact",
86
+ "operation_type": "create",
87
+ "meta": {
88
+ "bucket": bucket_name or self.bucket
89
+ }
90
+ })
91
+
92
+ return result
93
+
94
+ @staticmethod
95
+ def _sanitize_filename(filename: str) -> tuple:
96
+ """Sanitize filename for safe storage and regex pattern matching."""
97
+ from pathlib import Path
98
+
99
+ if not filename or not filename.strip():
100
+ return "unnamed_file", True
101
+
102
+ original = filename
103
+ path_obj = Path(filename)
104
+ name = path_obj.stem
105
+ extension = path_obj.suffix
106
+
107
+ # Whitelist: alphanumeric, underscore, hyphen, space, Unicode letters/digits
108
+ sanitized_name = re.sub(r'[^\w\s-]', '', name, flags=re.UNICODE)
109
+ sanitized_name = re.sub(r'[-\s]+', '-', sanitized_name)
110
+ sanitized_name = sanitized_name.strip('-').strip()
111
+
112
+ if not sanitized_name:
113
+ sanitized_name = "file"
114
+
115
+ if extension:
116
+ extension = re.sub(r'[^\w.-]', '', extension, flags=re.UNICODE)
117
+
118
+ sanitized = sanitized_name + extension
119
+ return sanitized, (sanitized != original)
120
+
121
+ def _extract_base64_if_needed(self, filedata: str) -> tuple[str | bytes, bool]:
122
+ """
123
+ Resolve cached_image_id references from cache and decode to binary data.
124
+
125
+ Requires JSON format with cached_image_id field: {"cached_image_id": "img_xxx"}
126
+ LLM must extract specific cached_image_id from generate_image response.
127
+
128
+ Returns:
129
+ tuple: (processed_data, is_from_image_generation)
130
+ - processed_data: Original filedata or resolved binary image data
131
+ - is_from_image_generation: True if data came from image_generation cache
132
+ """
133
+ if not filedata or not isinstance(filedata, str):
134
+ return filedata, False
135
+
136
+ # Require JSON format - fail fast if not JSON
137
+ if '{' not in filedata:
138
+ return filedata, False
139
+
140
+ try:
141
+ data = json.loads(filedata)
142
+ except json.JSONDecodeError:
143
+ # Not valid JSON, return as-is (regular file content)
144
+ return filedata, False
145
+
146
+ if not isinstance(data, dict):
147
+ return filedata, False
148
+
149
+ # Only accept direct cached_image_id format: {"cached_image_id": "img_xxx"}
150
+ # LLM must parse generate_image response and extract specific cached_image_id
151
+ if 'cached_image_id' in data:
152
+ binary_data = resolve_image_from_cache(self.alita, data['cached_image_id'])
153
+ return binary_data, True # Mark as from image_generation
154
+
155
+ # If JSON doesn't have cached_image_id, treat as regular file content
156
+ return filedata, False
157
+
158
+ def create_xlsx_filedata(self, data: dict[str, list[list]]) -> bytes:
159
+ try:
160
+ workbook = Workbook()
161
+
162
+ first_sheet = True
163
+ for sheet_name, sheet_data in data.items():
164
+ if first_sheet:
165
+ sheet = workbook.active
166
+ sheet.title = sheet_name
167
+ first_sheet = False
168
+ else:
169
+ sheet = workbook.create_sheet(title=sheet_name)
170
+
171
+ for row in sheet_data:
172
+ sheet.append(row)
173
+
174
+ file_buffer = io.BytesIO()
175
+ workbook.save(file_buffer)
176
+ file_buffer.seek(0)
177
+
178
+ return file_buffer.read()
179
+
180
+ except json.JSONDecodeError:
181
+ raise ValueError("Invalid JSON format for .xlsx file data.")
182
+ except Exception as e:
183
+ raise ValueError(f"Error processing .xlsx file data: {e}")
184
+
36
185
 
37
186
  def read_file(self,
38
187
  filename: str,
@@ -48,31 +197,187 @@ class ArtifactWrapper(BaseVectorStoreToolApiWrapper):
48
197
  sheet_name=sheet_name,
49
198
  excel_by_sheets=excel_by_sheets,
50
199
  llm=self.llm)
200
+
201
+ def _read_file(
202
+ self,
203
+ file_path: str,
204
+ branch: str = None,
205
+ bucket_name: str = None,
206
+ **kwargs
207
+ ) -> str:
208
+ """
209
+ Read a file from artifact bucket with optional partial read support.
210
+
211
+ Parameters:
212
+ file_path: Name of the file in the bucket
213
+ branch: Not used for artifacts (kept for API consistency)
214
+ bucket_name: Name of the bucket (uses default if None)
215
+ **kwargs: Additional parameters (offset, limit, head, tail) - currently ignored,
216
+ partial read handled client-side by base class methods
217
+
218
+ Returns:
219
+ File content as string
220
+ """
221
+ return self.read_file(filename=file_path, bucket_name=bucket_name)
222
+
223
+ def _write_file(
224
+ self,
225
+ file_path: str,
226
+ content: str,
227
+ branch: str = None,
228
+ commit_message: str = None,
229
+ bucket_name: str = None
230
+ ) -> str:
231
+ """
232
+ Write content to a file (create or overwrite).
233
+
234
+ Parameters:
235
+ file_path: Name of the file in the bucket
236
+ content: New file content
237
+ branch: Not used for artifacts (kept for API consistency)
238
+ commit_message: Not used for artifacts (kept for API consistency)
239
+ bucket_name: Name of the bucket (uses default if None)
240
+
241
+ Returns:
242
+ Success message
243
+ """
244
+ try:
245
+ # Sanitize filename
246
+ sanitized_filename, was_modified = self._sanitize_filename(file_path)
247
+ if was_modified:
248
+ logging.warning(f"Filename sanitized: '{file_path}' -> '{sanitized_filename}'")
249
+
250
+ # Check if file exists
251
+ try:
252
+ self.artifact.get(artifact_name=sanitized_filename, bucket_name=bucket_name, llm=self.llm)
253
+ # File exists, overwrite it
254
+ result = self.artifact.overwrite(sanitized_filename, content, bucket_name)
255
+
256
+ # Dispatch custom event
257
+ dispatch_custom_event("file_modified", {
258
+ "message": f"File '{sanitized_filename}' updated successfully",
259
+ "filename": sanitized_filename,
260
+ "tool_name": "edit_file",
261
+ "toolkit": "artifact",
262
+ "operation_type": "modify",
263
+ "meta": {
264
+ "bucket": bucket_name or self.bucket
265
+ }
266
+ })
267
+
268
+ return f"Updated file {sanitized_filename}"
269
+ except:
270
+ # File doesn't exist, create it
271
+ result = self.artifact.create(sanitized_filename, content, bucket_name)
272
+
273
+ # Dispatch custom event
274
+ dispatch_custom_event("file_modified", {
275
+ "message": f"File '{sanitized_filename}' created successfully",
276
+ "filename": sanitized_filename,
277
+ "tool_name": "edit_file",
278
+ "toolkit": "artifact",
279
+ "operation_type": "create",
280
+ "meta": {
281
+ "bucket": bucket_name or self.bucket
282
+ }
283
+ })
284
+
285
+ return f"Created file {sanitized_filename}"
286
+ except Exception as e:
287
+ raise ToolException(f"Unable to write file {file_path}: {str(e)}")
51
288
 
52
289
  def delete_file(self, filename: str, bucket_name = None):
53
290
  return self.artifact.delete(filename, bucket_name)
54
291
 
55
292
  def append_data(self, filename: str, filedata: str, bucket_name = None):
56
- return self.artifact.append(filename, filedata, bucket_name)
293
+ result = self.artifact.append(filename, filedata, bucket_name)
294
+
295
+ # Dispatch custom event for file append
296
+ dispatch_custom_event("file_modified", {
297
+ "message": f"Data appended to file '{filename}' successfully",
298
+ "filename": filename,
299
+ "tool_name": "appendData",
300
+ "toolkit": "artifact",
301
+ "operation_type": "modify",
302
+ "meta": {
303
+ "bucket": bucket_name or self.bucket
304
+ }
305
+ })
306
+
307
+ return result
57
308
 
58
309
  def overwrite_data(self, filename: str, filedata: str, bucket_name = None):
59
- return self.artifact.overwrite(filename, filedata, bucket_name)
310
+ result = self.artifact.overwrite(filename, filedata, bucket_name)
311
+
312
+ # Dispatch custom event for file overwrite
313
+ dispatch_custom_event("file_modified", {
314
+ "message": f"File '{filename}' overwritten successfully",
315
+ "filename": filename,
316
+ "tool_name": "overwriteData",
317
+ "toolkit": "artifact",
318
+ "operation_type": "modify",
319
+ "meta": {
320
+ "bucket": bucket_name or self.bucket
321
+ }
322
+ })
323
+
324
+ return result
60
325
 
61
326
  def create_new_bucket(self, bucket_name: str, expiration_measure = "weeks", expiration_value = 1):
62
- return self.artifact.client.create_bucket(bucket_name, expiration_measure, expiration_value)
327
+ # Sanitize bucket name: replace underscores with hyphens and ensure lowercase
328
+ sanitized_name = bucket_name.replace('_', '-').lower()
329
+ if sanitized_name != bucket_name:
330
+ logging.warning(f"Bucket name '{bucket_name}' was sanitized to '{sanitized_name}' (underscores replaced with hyphens, converted to lowercase)")
331
+ return self.artifact.client.create_bucket(sanitized_name, expiration_measure, expiration_value)
332
+
333
+ def _index_tool_params(self):
334
+ return {
335
+ 'include_extensions': (Optional[List[str]], Field(
336
+ description="List of file extensions to include when processing: i.e. ['*.png', '*.jpg']. "
337
+ "If empty, all files will be processed (except skip_extensions).",
338
+ default=[])),
339
+ 'skip_extensions': (Optional[List[str]], Field(
340
+ description="List of file extensions to skip when processing: i.e. ['*.png', '*.jpg']",
341
+ default=[])),
342
+ }
63
343
 
64
344
  def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
345
+ self._log_tool_event(message=f"Loading the files from artifact's bucket. {kwargs=}", tool_name="loader")
65
346
  try:
66
- all_files = self.list_files(self.bucket, False)
347
+ all_files = self.list_files(self.bucket, False)['rows']
67
348
  except Exception as e:
68
349
  raise ToolException(f"Unable to extract files: {e}")
69
350
 
70
- for file in all_files['rows']:
351
+ include_extensions = kwargs.get('include_extensions', [])
352
+ skip_extensions = kwargs.get('skip_extensions', [])
353
+ self._log_tool_event(message=f"Files filtering started. Include extensions: {include_extensions}. "
354
+ f"Skip extensions: {skip_extensions}", tool_name="loader")
355
+ # show the progress of filtering
356
+ total_files = len(all_files) if isinstance(all_files, list) else 0
357
+ filtered_files_count = 0
358
+ for file in all_files:
359
+ filtered_files_count += 1
360
+ if filtered_files_count % 10 == 0 or filtered_files_count == total_files:
361
+ self._log_tool_event(message=f"Files filtering progress: {filtered_files_count}/{total_files}",
362
+ tool_name="loader")
363
+ file_name = file['name']
364
+
365
+ # Check if file should be skipped based on skip_extensions
366
+ if any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
367
+ for pattern in skip_extensions):
368
+ continue
369
+
370
+ # Check if file should be included based on include_extensions
371
+ # If include_extensions is empty, process all files (that weren't skipped)
372
+ if include_extensions and not (any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
373
+ for pattern in include_extensions)):
374
+ continue
375
+
71
376
  metadata = {
72
377
  ("updated_on" if k == "modified" else k): str(v)
73
378
  for k, v in file.items()
74
379
  }
75
- metadata['id'] = self.get_hash_from_bucket_and_file_name(self.bucket, file['name'])
380
+ metadata['id'] = self.get_hash_from_bucket_and_file_name(self.bucket, file_name)
76
381
  yield Document(page_content="", metadata=metadata)
77
382
 
78
383
  def get_hash_from_bucket_and_file_name(self, bucket, file_name):
@@ -81,27 +386,28 @@ class ArtifactWrapper(BaseVectorStoreToolApiWrapper):
81
386
  hasher.update(file_name.encode('utf-8'))
82
387
  return hasher.hexdigest()
83
388
 
84
- def _process_document(self, document: Document) -> Generator[Document, None, None]:
85
- try:
86
- page_content = self.read_file(document.metadata['name'], is_capture_image=True, excel_by_sheets=True)
87
- except Exception as e:
88
- logging.error(f"Failed while parsing the file 'document.metadata['Path']': {e}")
89
- if isinstance(page_content, dict):
90
- for key, value in page_content.items():
91
- metadata = document.metadata
92
- metadata['page'] = key
93
- yield Document(page_content=str(value), metadata=metadata)
94
- else:
95
- document.page_content = json.dumps(str(page_content))
96
-
97
- @extend_with_vector_tools
389
+ def _extend_data(self, documents: Generator[Document, None, None]):
390
+ for document in documents:
391
+ try:
392
+ page_content = self.artifact.get_content_bytes(artifact_name=document.metadata['name'])
393
+ document.metadata[IndexerKeywords.CONTENT_IN_BYTES.value] = page_content
394
+ document.metadata[IndexerKeywords.CONTENT_FILE_NAME.value] = document.metadata['name']
395
+ yield document
396
+ except Exception as e:
397
+ logging.error(f"Failed while parsing the file '{document.metadata['name']}': {e}")
398
+ yield document
399
+
400
+ @extend_with_file_operations
98
401
  def get_available_tools(self):
402
+ """Get available tools. Returns all tools for schema; filtering happens at toolkit level."""
99
403
  bucket_name = (Optional[str], Field(description="Name of the bucket to work with."
100
404
  "If bucket is not specified by user directly, the name should be taken from chat history."
101
405
  "If bucket never mentioned in chat, the name will be taken from tool configuration."
102
406
  " ***IMPORTANT*** Underscore `_` is prohibited in bucket name and should be replaced by `-`",
103
407
  default=None))
104
- return [
408
+
409
+ # Basic artifact tools (always available)
410
+ basic_tools = [
105
411
  {
106
412
  "ref": self.list_files,
107
413
  "name": "listFiles",
@@ -115,7 +421,21 @@ class ArtifactWrapper(BaseVectorStoreToolApiWrapper):
115
421
  "args_schema": create_model(
116
422
  "createFile",
117
423
  filename=(str, Field(description="Filename")),
118
- filedata=(str, Field(description="Stringified content of the file")),
424
+ filedata=(str, Field(description="""Stringified content of the file.
425
+
426
+ Supports three input formats:
427
+
428
+ 1. CACHED IMAGE REFERENCE (for generated/cached images):
429
+ Pass JSON with cached_image_id field: {"cached_image_id": "img_xxx"}
430
+ The tool will automatically resolve and decode the image from cache.
431
+ This is typically used when another tool returns an image reference.
432
+
433
+ 2. EXCEL FILES (.xlsx extension):
434
+ Pass JSON with sheet structure: {"Sheet1": [["Name", "Age"], ["Alice", 25], ["Bob", 30]]}
435
+
436
+ 3. TEXT/OTHER FILES:
437
+ Pass the plain text string directly.
438
+ """)),
119
439
  bucket_name=bucket_name
120
440
  )
121
441
  },
@@ -176,11 +496,25 @@ class ArtifactWrapper(BaseVectorStoreToolApiWrapper):
176
496
  "description": "Creates new bucket specified by user.",
177
497
  "args_schema": create_model(
178
498
  "createNewBucket",
179
- bucket_name=(str, Field(description="Bucket name to create. ***IMPORTANT*** Underscore `_` is prohibited in bucket name and should be replaced by `-`.")),
499
+ bucket_name=(str, Field(
500
+ description="Bucket name to create. Must start with lowercase letter and contain only lowercase letters, numbers, and hyphens. Underscores will be automatically converted to hyphens.",
501
+ pattern=r'^[a-z][a-z0-9_-]*$' # Allow underscores in input, will be sanitized
502
+ )),
180
503
  expiration_measure=(Optional[str], Field(description="Measure of expiration time for bucket configuration."
181
504
  "Possible values: `days`, `weeks`, `months`, `years`.",
182
505
  default="weeks")),
183
506
  expiration_value=(Optional[int], Field(description="Expiration time values.", default=1))
184
507
  )
185
508
  }
186
- ]
509
+ ]
510
+
511
+ # Always include indexing tools in available tools list
512
+ # Filtering based on vector store config happens at toolkit level via decorator
513
+ try:
514
+ # Get indexing tools from parent class
515
+ indexing_tools = super(ArtifactWrapper, self).get_available_tools()
516
+ return indexing_tools + basic_tools
517
+ except Exception as e:
518
+ # If getting parent tools fails, log warning and return basic tools only
519
+ logging.warning(f"Failed to load indexing tools: {e}. Only basic artifact tools will be available.")
520
+ return basic_tools
@@ -0,0 +1,183 @@
1
+ """
2
+ Data Analysis internal tool for Alita SDK.
3
+
4
+ This tool provides Pandas-based data analysis capabilities as an internal tool,
5
+ accessible through the "Enable internal tools" dropdown menu.
6
+
7
+ It uses the conversation attachment bucket for file storage, providing seamless
8
+ integration with drag-and-drop file uploads in chat.
9
+ """
10
+ import logging
11
+ from typing import Any, List, Literal, Optional, Type
12
+
13
+ from langchain_core.tools import BaseTool, BaseToolkit
14
+ from pydantic import BaseModel, ConfigDict, create_model, Field
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ name = "data_analysis"
19
+
20
+
21
+ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store=None):
22
+ """
23
+ Get data analysis tools for the provided tool configurations.
24
+
25
+ Args:
26
+ tools_list: List of tool configurations
27
+ alita_client: Alita client instance (required for data analysis)
28
+ llm: LLM client instance (required for code generation)
29
+ memory_store: Optional memory store instance (unused)
30
+
31
+ Returns:
32
+ List of data analysis tools
33
+ """
34
+ all_tools = []
35
+
36
+ for tool in tools_list:
37
+ if (tool.get('type') == 'data_analysis' or
38
+ tool.get('toolkit_name') == 'data_analysis'):
39
+ try:
40
+ if not alita_client:
41
+ logger.error("Alita client is required for data analysis tools")
42
+ continue
43
+
44
+ settings = tool.get('settings', {})
45
+ bucket_name = settings.get('bucket_name')
46
+
47
+ if not bucket_name:
48
+ logger.error("bucket_name is required for data analysis tools")
49
+ continue
50
+
51
+ toolkit_instance = DataAnalysisToolkit.get_toolkit(
52
+ alita_client=alita_client,
53
+ llm=llm,
54
+ bucket_name=bucket_name,
55
+ toolkit_name=tool.get('toolkit_name', '')
56
+ )
57
+ all_tools.extend(toolkit_instance.get_tools())
58
+ except Exception as e:
59
+ logger.error(f"Error in data analysis toolkit get_tools: {e}")
60
+ logger.error(f"Tool config: {tool}")
61
+ raise
62
+
63
+ return all_tools
64
+
65
+
66
+ class DataAnalysisToolkit(BaseToolkit):
67
+ """
68
+ Data Analysis toolkit providing Pandas-based data analysis capabilities.
69
+
70
+ This is an internal tool that uses the conversation attachment bucket
71
+ for file storage, enabling seamless integration with chat file uploads.
72
+ """
73
+ tools: List[BaseTool] = []
74
+
75
+ @staticmethod
76
+ def toolkit_config_schema() -> Type[BaseModel]:
77
+ """Get the configuration schema for the data analysis toolkit."""
78
+ # Import PandasWrapper to get available tools schema
79
+ from alita_sdk.tools.pandas.api_wrapper import PandasWrapper
80
+
81
+ selected_tools = {
82
+ x['name']: x['args_schema'].model_json_schema()
83
+ for x in PandasWrapper.model_construct().get_available_tools()
84
+ }
85
+
86
+ return create_model(
87
+ 'data_analysis',
88
+ bucket_name=(
89
+ Optional[str],
90
+ Field(
91
+ default=None,
92
+ title="Bucket name",
93
+ description="Bucket where files are stored (auto-injected from conversation)"
94
+ )
95
+ ),
96
+ selected_tools=(
97
+ List[Literal[tuple(selected_tools)]],
98
+ Field(
99
+ default=[],
100
+ json_schema_extra={'args_schemas': selected_tools}
101
+ )
102
+ ),
103
+ __config__=ConfigDict(json_schema_extra={
104
+ 'metadata': {
105
+ "label": "Data Analysis",
106
+ "icon_url": "data-analysis.svg",
107
+ "hidden": True, # Hidden from regular toolkit menu
108
+ "categories": ["internal_tool"],
109
+ "extra_categories": ["data analysis", "pandas", "dataframes", "data science"],
110
+ }
111
+ })
112
+ )
113
+
114
+ @classmethod
115
+ def get_toolkit(
116
+ cls,
117
+ alita_client=None,
118
+ llm=None,
119
+ bucket_name: str = None,
120
+ toolkit_name: Optional[str] = None,
121
+ selected_tools: Optional[List[str]] = None,
122
+ **kwargs
123
+ ):
124
+ """
125
+ Get toolkit with data analysis tools.
126
+
127
+ Args:
128
+ alita_client: Alita client instance (required)
129
+ llm: LLM for code generation (optional, uses alita_client.llm if not provided)
130
+ bucket_name: Conversation attachment bucket (required)
131
+ toolkit_name: Optional name prefix for tools
132
+ selected_tools: Optional list of tool names to include (default: all)
133
+ **kwargs: Additional arguments
134
+
135
+ Returns:
136
+ DataAnalysisToolkit instance with configured tools
137
+
138
+ Raises:
139
+ ValueError: If alita_client or bucket_name is not provided
140
+ """
141
+ if not alita_client:
142
+ raise ValueError("Alita client is required for data analysis")
143
+
144
+ if not bucket_name:
145
+ raise ValueError("bucket_name is required for data analysis (should be conversation attachment bucket)")
146
+
147
+ # Import the PandasWrapper from existing toolkit
148
+ from alita_sdk.tools.pandas.api_wrapper import PandasWrapper
149
+ from alita_sdk.tools.base.tool import BaseAction
150
+
151
+ # Create wrapper with conversation bucket
152
+ wrapper = PandasWrapper(
153
+ alita=alita_client,
154
+ llm=llm,
155
+ bucket_name=bucket_name
156
+ )
157
+
158
+ # Get tools from wrapper
159
+ available_tools = wrapper.get_available_tools()
160
+ tools = []
161
+
162
+ for tool in available_tools:
163
+ # Filter by selected_tools if provided
164
+ if selected_tools and tool["name"] not in selected_tools:
165
+ continue
166
+
167
+ description = tool["description"]
168
+ if toolkit_name:
169
+ description = f"Toolkit: {toolkit_name}\n{description}"
170
+ description = description[:1000]
171
+
172
+ tools.append(BaseAction(
173
+ api_wrapper=wrapper,
174
+ name=tool["name"],
175
+ description=description,
176
+ args_schema=tool["args_schema"],
177
+ metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
178
+ ))
179
+
180
+ return cls(tools=tools)
181
+
182
+ def get_tools(self):
183
+ return self.tools