alita-sdk 0.3.263__py3-none-any.whl → 0.3.499__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +155 -0
  6. alita_sdk/cli/agent_loader.py +215 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3601 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1256 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/toolkit.py +327 -0
  23. alita_sdk/cli/toolkit_loader.py +85 -0
  24. alita_sdk/cli/tools/__init__.py +43 -0
  25. alita_sdk/cli/tools/approval.py +224 -0
  26. alita_sdk/cli/tools/filesystem.py +1751 -0
  27. alita_sdk/cli/tools/planning.py +389 -0
  28. alita_sdk/cli/tools/terminal.py +414 -0
  29. alita_sdk/community/__init__.py +64 -8
  30. alita_sdk/community/inventory/__init__.py +224 -0
  31. alita_sdk/community/inventory/config.py +257 -0
  32. alita_sdk/community/inventory/enrichment.py +2137 -0
  33. alita_sdk/community/inventory/extractors.py +1469 -0
  34. alita_sdk/community/inventory/ingestion.py +3172 -0
  35. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  36. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  37. alita_sdk/community/inventory/parsers/base.py +295 -0
  38. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  39. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  40. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  41. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  42. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  43. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  44. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  45. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  46. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  47. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  48. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  49. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  50. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  51. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  52. alita_sdk/community/inventory/patterns/loader.py +348 -0
  53. alita_sdk/community/inventory/patterns/registry.py +198 -0
  54. alita_sdk/community/inventory/presets.py +535 -0
  55. alita_sdk/community/inventory/retrieval.py +1403 -0
  56. alita_sdk/community/inventory/toolkit.py +173 -0
  57. alita_sdk/community/inventory/visualize.py +1370 -0
  58. alita_sdk/configurations/__init__.py +10 -0
  59. alita_sdk/configurations/ado.py +4 -2
  60. alita_sdk/configurations/azure_search.py +1 -1
  61. alita_sdk/configurations/bigquery.py +1 -1
  62. alita_sdk/configurations/bitbucket.py +94 -2
  63. alita_sdk/configurations/browser.py +18 -0
  64. alita_sdk/configurations/carrier.py +19 -0
  65. alita_sdk/configurations/confluence.py +96 -1
  66. alita_sdk/configurations/delta_lake.py +1 -1
  67. alita_sdk/configurations/figma.py +0 -5
  68. alita_sdk/configurations/github.py +65 -1
  69. alita_sdk/configurations/gitlab.py +79 -0
  70. alita_sdk/configurations/google_places.py +17 -0
  71. alita_sdk/configurations/jira.py +103 -0
  72. alita_sdk/configurations/postman.py +1 -1
  73. alita_sdk/configurations/qtest.py +1 -3
  74. alita_sdk/configurations/report_portal.py +19 -0
  75. alita_sdk/configurations/salesforce.py +19 -0
  76. alita_sdk/configurations/service_now.py +1 -12
  77. alita_sdk/configurations/sharepoint.py +19 -0
  78. alita_sdk/configurations/sonar.py +18 -0
  79. alita_sdk/configurations/sql.py +20 -0
  80. alita_sdk/configurations/testio.py +18 -0
  81. alita_sdk/configurations/testrail.py +88 -0
  82. alita_sdk/configurations/xray.py +94 -1
  83. alita_sdk/configurations/zephyr_enterprise.py +94 -1
  84. alita_sdk/configurations/zephyr_essential.py +95 -0
  85. alita_sdk/runtime/clients/artifact.py +12 -2
  86. alita_sdk/runtime/clients/client.py +235 -66
  87. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  88. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  89. alita_sdk/runtime/clients/sandbox_client.py +373 -0
  90. alita_sdk/runtime/langchain/assistant.py +123 -17
  91. alita_sdk/runtime/langchain/constants.py +8 -1
  92. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  93. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
  94. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
  95. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +8 -2
  96. alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
  97. alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
  98. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
  99. alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
  100. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
  101. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
  102. alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
  103. alita_sdk/runtime/langchain/document_loaders/constants.py +187 -40
  104. alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
  105. alita_sdk/runtime/langchain/langraph_agent.py +406 -91
  106. alita_sdk/runtime/langchain/utils.py +51 -8
  107. alita_sdk/runtime/llms/preloaded.py +2 -6
  108. alita_sdk/runtime/models/mcp_models.py +61 -0
  109. alita_sdk/runtime/toolkits/__init__.py +26 -0
  110. alita_sdk/runtime/toolkits/application.py +9 -2
  111. alita_sdk/runtime/toolkits/artifact.py +19 -7
  112. alita_sdk/runtime/toolkits/datasource.py +13 -6
  113. alita_sdk/runtime/toolkits/mcp.py +780 -0
  114. alita_sdk/runtime/toolkits/planning.py +178 -0
  115. alita_sdk/runtime/toolkits/subgraph.py +11 -6
  116. alita_sdk/runtime/toolkits/tools.py +214 -60
  117. alita_sdk/runtime/toolkits/vectorstore.py +9 -4
  118. alita_sdk/runtime/tools/__init__.py +22 -0
  119. alita_sdk/runtime/tools/application.py +16 -4
  120. alita_sdk/runtime/tools/artifact.py +312 -19
  121. alita_sdk/runtime/tools/function.py +100 -4
  122. alita_sdk/runtime/tools/graph.py +81 -0
  123. alita_sdk/runtime/tools/image_generation.py +212 -0
  124. alita_sdk/runtime/tools/llm.py +539 -180
  125. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  126. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  127. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  128. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  129. alita_sdk/runtime/tools/planning/models.py +246 -0
  130. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  131. alita_sdk/runtime/tools/router.py +2 -1
  132. alita_sdk/runtime/tools/sandbox.py +375 -0
  133. alita_sdk/runtime/tools/vectorstore.py +62 -63
  134. alita_sdk/runtime/tools/vectorstore_base.py +156 -85
  135. alita_sdk/runtime/utils/AlitaCallback.py +106 -20
  136. alita_sdk/runtime/utils/mcp_client.py +465 -0
  137. alita_sdk/runtime/utils/mcp_oauth.py +244 -0
  138. alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
  139. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  140. alita_sdk/runtime/utils/streamlit.py +41 -14
  141. alita_sdk/runtime/utils/toolkit_utils.py +28 -9
  142. alita_sdk/runtime/utils/utils.py +14 -0
  143. alita_sdk/tools/__init__.py +78 -35
  144. alita_sdk/tools/ado/__init__.py +0 -1
  145. alita_sdk/tools/ado/repos/__init__.py +10 -6
  146. alita_sdk/tools/ado/repos/repos_wrapper.py +12 -11
  147. alita_sdk/tools/ado/test_plan/__init__.py +10 -7
  148. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -23
  149. alita_sdk/tools/ado/wiki/__init__.py +10 -11
  150. alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -28
  151. alita_sdk/tools/ado/work_item/__init__.py +10 -11
  152. alita_sdk/tools/ado/work_item/ado_wrapper.py +63 -10
  153. alita_sdk/tools/advanced_jira_mining/__init__.py +10 -7
  154. alita_sdk/tools/aws/delta_lake/__init__.py +13 -11
  155. alita_sdk/tools/azure_ai/search/__init__.py +11 -7
  156. alita_sdk/tools/base_indexer_toolkit.py +392 -86
  157. alita_sdk/tools/bitbucket/__init__.py +18 -11
  158. alita_sdk/tools/bitbucket/api_wrapper.py +52 -9
  159. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
  160. alita_sdk/tools/browser/__init__.py +40 -16
  161. alita_sdk/tools/browser/crawler.py +3 -1
  162. alita_sdk/tools/browser/utils.py +15 -6
  163. alita_sdk/tools/carrier/__init__.py +17 -17
  164. alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
  165. alita_sdk/tools/carrier/excel_reporter.py +8 -4
  166. alita_sdk/tools/chunkers/__init__.py +3 -1
  167. alita_sdk/tools/chunkers/code/codeparser.py +1 -1
  168. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  169. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  170. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  171. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  172. alita_sdk/tools/cloud/aws/__init__.py +9 -6
  173. alita_sdk/tools/cloud/azure/__init__.py +9 -6
  174. alita_sdk/tools/cloud/gcp/__init__.py +9 -6
  175. alita_sdk/tools/cloud/k8s/__init__.py +9 -6
  176. alita_sdk/tools/code/linter/__init__.py +7 -7
  177. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  178. alita_sdk/tools/code/sonar/__init__.py +18 -12
  179. alita_sdk/tools/code_indexer_toolkit.py +199 -0
  180. alita_sdk/tools/confluence/__init__.py +14 -11
  181. alita_sdk/tools/confluence/api_wrapper.py +198 -58
  182. alita_sdk/tools/confluence/loader.py +10 -0
  183. alita_sdk/tools/custom_open_api/__init__.py +9 -4
  184. alita_sdk/tools/elastic/__init__.py +8 -7
  185. alita_sdk/tools/elitea_base.py +543 -64
  186. alita_sdk/tools/figma/__init__.py +10 -8
  187. alita_sdk/tools/figma/api_wrapper.py +352 -153
  188. alita_sdk/tools/github/__init__.py +13 -11
  189. alita_sdk/tools/github/api_wrapper.py +9 -26
  190. alita_sdk/tools/github/github_client.py +75 -12
  191. alita_sdk/tools/github/schemas.py +2 -1
  192. alita_sdk/tools/gitlab/__init__.py +11 -10
  193. alita_sdk/tools/gitlab/api_wrapper.py +135 -45
  194. alita_sdk/tools/gitlab_org/__init__.py +11 -9
  195. alita_sdk/tools/google/bigquery/__init__.py +12 -13
  196. alita_sdk/tools/google_places/__init__.py +18 -10
  197. alita_sdk/tools/jira/__init__.py +14 -8
  198. alita_sdk/tools/jira/api_wrapper.py +315 -168
  199. alita_sdk/tools/keycloak/__init__.py +8 -7
  200. alita_sdk/tools/localgit/local_git.py +56 -54
  201. alita_sdk/tools/memory/__init__.py +27 -11
  202. alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
  203. alita_sdk/tools/ocr/__init__.py +8 -7
  204. alita_sdk/tools/openapi/__init__.py +10 -1
  205. alita_sdk/tools/pandas/__init__.py +8 -7
  206. alita_sdk/tools/pandas/api_wrapper.py +7 -25
  207. alita_sdk/tools/postman/__init__.py +8 -10
  208. alita_sdk/tools/postman/api_wrapper.py +19 -8
  209. alita_sdk/tools/postman/postman_analysis.py +8 -1
  210. alita_sdk/tools/pptx/__init__.py +8 -9
  211. alita_sdk/tools/qtest/__init__.py +19 -13
  212. alita_sdk/tools/qtest/api_wrapper.py +1784 -88
  213. alita_sdk/tools/rally/__init__.py +10 -9
  214. alita_sdk/tools/report_portal/__init__.py +20 -15
  215. alita_sdk/tools/salesforce/__init__.py +19 -15
  216. alita_sdk/tools/servicenow/__init__.py +14 -11
  217. alita_sdk/tools/sharepoint/__init__.py +14 -13
  218. alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
  219. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  220. alita_sdk/tools/sharepoint/utils.py +8 -2
  221. alita_sdk/tools/slack/__init__.py +10 -7
  222. alita_sdk/tools/sql/__init__.py +19 -18
  223. alita_sdk/tools/sql/api_wrapper.py +71 -23
  224. alita_sdk/tools/testio/__init__.py +18 -12
  225. alita_sdk/tools/testrail/__init__.py +10 -10
  226. alita_sdk/tools/testrail/api_wrapper.py +213 -45
  227. alita_sdk/tools/utils/__init__.py +28 -4
  228. alita_sdk/tools/utils/content_parser.py +181 -61
  229. alita_sdk/tools/utils/text_operations.py +254 -0
  230. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
  231. alita_sdk/tools/xray/__init__.py +12 -7
  232. alita_sdk/tools/xray/api_wrapper.py +58 -113
  233. alita_sdk/tools/zephyr/__init__.py +9 -6
  234. alita_sdk/tools/zephyr_enterprise/__init__.py +13 -8
  235. alita_sdk/tools/zephyr_enterprise/api_wrapper.py +17 -7
  236. alita_sdk/tools/zephyr_essential/__init__.py +13 -9
  237. alita_sdk/tools/zephyr_essential/api_wrapper.py +289 -47
  238. alita_sdk/tools/zephyr_essential/client.py +6 -4
  239. alita_sdk/tools/zephyr_scale/__init__.py +10 -7
  240. alita_sdk/tools/zephyr_scale/api_wrapper.py +6 -2
  241. alita_sdk/tools/zephyr_squad/__init__.py +9 -6
  242. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +180 -33
  243. alita_sdk-0.3.499.dist-info/RECORD +433 -0
  244. alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
  245. alita_sdk-0.3.263.dist-info/RECORD +0 -342
  246. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
  247. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
  248. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,9 @@
1
1
  import json
2
2
 
3
3
  from ..utils.utils import clean_string
4
- from langchain_core.tools import BaseTool
5
- from langchain_core.messages import BaseMessage, AIMessage, ToolCall
6
- from typing import Any, Type, Optional, Union
4
+ from langchain_core.tools import BaseTool, ToolException
5
+ from langchain_core.messages import BaseMessage, AIMessage, HumanMessage
6
+ from typing import Any, Type, Optional
7
7
  from pydantic import create_model, field_validator, BaseModel
8
8
  from pydantic.fields import FieldInfo
9
9
  from ..langchain.mixedAgentRenderes import convert_message_to_json
@@ -31,7 +31,12 @@ def formulate_query(kwargs):
31
31
  chat_history = []
32
32
  for each in kwargs.get('chat_history')[:]:
33
33
  chat_history.append(AIMessage(each))
34
- result = {"input": kwargs.get('task'), "chat_history": chat_history}
34
+ user_task = kwargs.get('task')
35
+ if not user_task:
36
+ raise ToolException("Task is required to invoke the application. "
37
+ "Check the provided input (some errors may happen on previous steps).")
38
+ input_message = HumanMessage(content=user_task)
39
+ result = {"input": [input_message], "chat_history": chat_history}
35
40
  for key, value in kwargs.items():
36
41
  if key not in ("task", "chat_history"):
37
42
  result[key] = value
@@ -45,6 +50,8 @@ class Application(BaseTool):
45
50
  application: Any
46
51
  args_schema: Type[BaseModel] = applicationToolSchema
47
52
  return_type: str = "str"
53
+ client: Any
54
+ args_runnable: dict = {}
48
55
 
49
56
  @field_validator('name', mode='before')
50
57
  @classmethod
@@ -61,6 +68,11 @@ class Application(BaseTool):
61
68
  return self._run(*config, **all_kwargs)
62
69
 
63
70
  def _run(self, *args, **kwargs):
71
+ if self.client and self.args_runnable:
72
+ # Recreate new LanggraphAgentRunnable in order to reflect the current input_mapping (it can be dynamic for pipelines).
73
+ # Actually, for pipelines agent toolkits LanggraphAgentRunnable is created (for LLMNode) before pipeline's schema parsing.
74
+ application_variables = {k: {"name": k, "value": v} for k, v in kwargs.items()}
75
+ self.application = self.client.application(**self.args_runnable, application_variables=application_variables)
64
76
  response = self.application.invoke(formulate_query(kwargs))
65
77
  if self.return_type == "str":
66
78
  return response["output"]
@@ -1,14 +1,20 @@
1
1
  import hashlib
2
+ import io
2
3
  import json
3
4
  import logging
4
- from typing import Any, Optional, Generator
5
+ import re
6
+ from typing import Any, Optional, Generator, List
5
7
 
8
+ from langchain_core.callbacks import dispatch_custom_event
6
9
  from langchain_core.documents import Document
7
10
  from langchain_core.tools import ToolException
11
+ from openpyxl.workbook.workbook import Workbook
8
12
  from pydantic import create_model, Field, model_validator
9
13
 
10
- from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
11
- from alita_sdk.tools.utils.available_tools_decorator import extend_with_parent_available_tools
14
+ from ...tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
15
+ from ...tools.utils.available_tools_decorator import extend_with_parent_available_tools
16
+ from ...tools.elitea_base import extend_with_file_operations
17
+ from ...runtime.utils.utils import IndexerKeywords
12
18
 
13
19
 
14
20
  class ArtifactWrapper(NonCodeIndexerToolkit):
@@ -26,10 +32,105 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
26
32
  return super().validate_toolkit(values)
27
33
 
28
34
  def list_files(self, bucket_name = None, return_as_string = True):
29
- return self.artifact.list(bucket_name, return_as_string)
35
+ """List all files in the artifact bucket with API download links."""
36
+ result = self.artifact.list(bucket_name, return_as_string=False)
37
+
38
+ # Add API download link to each file
39
+ if isinstance(result, dict) and 'rows' in result:
40
+ bucket = bucket_name or self.bucket
41
+
42
+ # Get base_url and project_id from alita client
43
+ base_url = getattr(self.alita, 'base_url', '').rstrip('/')
44
+ project_id = getattr(self.alita, 'project_id', '')
45
+
46
+ for file_info in result['rows']:
47
+ if 'name' in file_info:
48
+ # Generate API download link
49
+ file_name = file_info['name']
50
+ file_info['link'] = f"{base_url}/api/v2/artifacts/artifact/default/{project_id}/{bucket}/{file_name}"
51
+
52
+ return str(result) if return_as_string else result
30
53
 
31
54
  def create_file(self, filename: str, filedata: str, bucket_name = None):
32
- return self.artifact.create(filename, filedata, bucket_name)
55
+ # Sanitize filename to prevent regex errors during indexing
56
+ sanitized_filename, was_modified = self._sanitize_filename(filename)
57
+ if was_modified:
58
+ logging.warning(f"Filename sanitized: '{filename}' -> '{sanitized_filename}'")
59
+
60
+ if sanitized_filename.endswith(".xlsx"):
61
+ data = json.loads(filedata)
62
+ filedata = self.create_xlsx_filedata(data)
63
+
64
+ result = self.artifact.create(sanitized_filename, filedata, bucket_name)
65
+
66
+ # Dispatch custom event for file creation
67
+ dispatch_custom_event("file_modified", {
68
+ "message": f"File '{filename}' created successfully",
69
+ "filename": filename,
70
+ "tool_name": "createFile",
71
+ "toolkit": "artifact",
72
+ "operation_type": "create",
73
+ "meta": {
74
+ "bucket": bucket_name or self.bucket
75
+ }
76
+ })
77
+
78
+ return result
79
+
80
+ @staticmethod
81
+ def _sanitize_filename(filename: str) -> tuple:
82
+ """Sanitize filename for safe storage and regex pattern matching."""
83
+ from pathlib import Path
84
+
85
+ if not filename or not filename.strip():
86
+ return "unnamed_file", True
87
+
88
+ original = filename
89
+ path_obj = Path(filename)
90
+ name = path_obj.stem
91
+ extension = path_obj.suffix
92
+
93
+ # Whitelist: alphanumeric, underscore, hyphen, space, Unicode letters/digits
94
+ sanitized_name = re.sub(r'[^\w\s-]', '', name, flags=re.UNICODE)
95
+ sanitized_name = re.sub(r'[-\s]+', '-', sanitized_name)
96
+ sanitized_name = sanitized_name.strip('-').strip()
97
+
98
+ if not sanitized_name:
99
+ sanitized_name = "file"
100
+
101
+ if extension:
102
+ extension = re.sub(r'[^\w.-]', '', extension, flags=re.UNICODE)
103
+
104
+ sanitized = sanitized_name + extension
105
+ return sanitized, (sanitized != original)
106
+
107
+ def create_xlsx_filedata(self, data: dict[str, list[list]]) -> bytes:
108
+ try:
109
+ workbook = Workbook()
110
+
111
+ first_sheet = True
112
+ for sheet_name, sheet_data in data.items():
113
+ if first_sheet:
114
+ sheet = workbook.active
115
+ sheet.title = sheet_name
116
+ first_sheet = False
117
+ else:
118
+ sheet = workbook.create_sheet(title=sheet_name)
119
+
120
+ for row in sheet_data:
121
+ sheet.append(row)
122
+
123
+ file_buffer = io.BytesIO()
124
+ workbook.save(file_buffer)
125
+ file_buffer.seek(0)
126
+
127
+ return file_buffer.read()
128
+
129
+ except json.JSONDecodeError:
130
+ raise ValueError("Invalid JSON format for .xlsx file data.")
131
+ except Exception as e:
132
+ raise ValueError(f"Error processing .xlsx file data: {e}")
133
+
33
134
 
34
135
  def read_file(self,
35
136
  filename: str,
@@ -45,31 +146,187 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
45
146
  sheet_name=sheet_name,
46
147
  excel_by_sheets=excel_by_sheets,
47
148
  llm=self.llm)
149
+
150
+ def _read_file(
151
+ self,
152
+ file_path: str,
153
+ branch: str = None,
154
+ bucket_name: str = None,
155
+ **kwargs
156
+ ) -> str:
157
+ """
158
+ Read a file from artifact bucket with optional partial read support.
159
+
160
+ Parameters:
161
+ file_path: Name of the file in the bucket
162
+ branch: Not used for artifacts (kept for API consistency)
163
+ bucket_name: Name of the bucket (uses default if None)
164
+ **kwargs: Additional parameters (offset, limit, head, tail) - currently ignored,
165
+ partial read handled client-side by base class methods
166
+
167
+ Returns:
168
+ File content as string
169
+ """
170
+ return self.read_file(filename=file_path, bucket_name=bucket_name)
171
+
172
+ def _write_file(
173
+ self,
174
+ file_path: str,
175
+ content: str,
176
+ branch: str = None,
177
+ commit_message: str = None,
178
+ bucket_name: str = None
179
+ ) -> str:
180
+ """
181
+ Write content to a file (create or overwrite).
182
+
183
+ Parameters:
184
+ file_path: Name of the file in the bucket
185
+ content: New file content
186
+ branch: Not used for artifacts (kept for API consistency)
187
+ commit_message: Not used for artifacts (kept for API consistency)
188
+ bucket_name: Name of the bucket (uses default if None)
189
+
190
+ Returns:
191
+ Success message
192
+ """
193
+ try:
194
+ # Sanitize filename
195
+ sanitized_filename, was_modified = self._sanitize_filename(file_path)
196
+ if was_modified:
197
+ logging.warning(f"Filename sanitized: '{file_path}' -> '{sanitized_filename}'")
198
+
199
+ # Check if file exists
200
+ try:
201
+ self.artifact.get(artifact_name=sanitized_filename, bucket_name=bucket_name, llm=self.llm)
202
+ # File exists, overwrite it
203
+ result = self.artifact.overwrite(sanitized_filename, content, bucket_name)
204
+
205
+ # Dispatch custom event
206
+ dispatch_custom_event("file_modified", {
207
+ "message": f"File '{sanitized_filename}' updated successfully",
208
+ "filename": sanitized_filename,
209
+ "tool_name": "edit_file",
210
+ "toolkit": "artifact",
211
+ "operation_type": "modify",
212
+ "meta": {
213
+ "bucket": bucket_name or self.bucket
214
+ }
215
+ })
216
+
217
+ return f"Updated file {sanitized_filename}"
218
+ except:
219
+ # File doesn't exist, create it
220
+ result = self.artifact.create(sanitized_filename, content, bucket_name)
221
+
222
+ # Dispatch custom event
223
+ dispatch_custom_event("file_modified", {
224
+ "message": f"File '{sanitized_filename}' created successfully",
225
+ "filename": sanitized_filename,
226
+ "tool_name": "edit_file",
227
+ "toolkit": "artifact",
228
+ "operation_type": "create",
229
+ "meta": {
230
+ "bucket": bucket_name or self.bucket
231
+ }
232
+ })
233
+
234
+ return f"Created file {sanitized_filename}"
235
+ except Exception as e:
236
+ raise ToolException(f"Unable to write file {file_path}: {str(e)}")
48
237
 
49
238
  def delete_file(self, filename: str, bucket_name = None):
50
239
  return self.artifact.delete(filename, bucket_name)
51
240
 
52
241
  def append_data(self, filename: str, filedata: str, bucket_name = None):
53
- return self.artifact.append(filename, filedata, bucket_name)
242
+ result = self.artifact.append(filename, filedata, bucket_name)
243
+
244
+ # Dispatch custom event for file append
245
+ dispatch_custom_event("file_modified", {
246
+ "message": f"Data appended to file '{filename}' successfully",
247
+ "filename": filename,
248
+ "tool_name": "appendData",
249
+ "toolkit": "artifact",
250
+ "operation_type": "modify",
251
+ "meta": {
252
+ "bucket": bucket_name or self.bucket
253
+ }
254
+ })
255
+
256
+ return result
54
257
 
55
258
  def overwrite_data(self, filename: str, filedata: str, bucket_name = None):
56
- return self.artifact.overwrite(filename, filedata, bucket_name)
259
+ result = self.artifact.overwrite(filename, filedata, bucket_name)
260
+
261
+ # Dispatch custom event for file overwrite
262
+ dispatch_custom_event("file_modified", {
263
+ "message": f"File '{filename}' overwritten successfully",
264
+ "filename": filename,
265
+ "tool_name": "overwriteData",
266
+ "toolkit": "artifact",
267
+ "operation_type": "modify",
268
+ "meta": {
269
+ "bucket": bucket_name or self.bucket
270
+ }
271
+ })
272
+
273
+ return result
57
274
 
58
275
  def create_new_bucket(self, bucket_name: str, expiration_measure = "weeks", expiration_value = 1):
59
- return self.artifact.client.create_bucket(bucket_name, expiration_measure, expiration_value)
276
+ # Sanitize bucket name: replace underscores with hyphens and ensure lowercase
277
+ sanitized_name = bucket_name.replace('_', '-').lower()
278
+ if sanitized_name != bucket_name:
279
+ logging.warning(f"Bucket name '{bucket_name}' was sanitized to '{sanitized_name}' (underscores replaced with hyphens, converted to lowercase)")
280
+ return self.artifact.client.create_bucket(sanitized_name, expiration_measure, expiration_value)
281
+
282
+ def _index_tool_params(self):
283
+ return {
284
+ 'include_extensions': (Optional[List[str]], Field(
285
+ description="List of file extensions to include when processing: i.e. ['*.png', '*.jpg']. "
286
+ "If empty, all files will be processed (except skip_extensions).",
287
+ default=[])),
288
+ 'skip_extensions': (Optional[List[str]], Field(
289
+ description="List of file extensions to skip when processing: i.e. ['*.png', '*.jpg']",
290
+ default=[])),
291
+ }
60
292
 
61
293
  def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
294
+ self._log_tool_event(message=f"Loading the files from artifact's bucket. {kwargs=}", tool_name="loader")
62
295
  try:
63
- all_files = self.list_files(self.bucket, False)
296
+ all_files = self.list_files(self.bucket, False)['rows']
64
297
  except Exception as e:
65
298
  raise ToolException(f"Unable to extract files: {e}")
66
299
 
67
- for file in all_files['rows']:
300
+ include_extensions = kwargs.get('include_extensions', [])
301
+ skip_extensions = kwargs.get('skip_extensions', [])
302
+ self._log_tool_event(message=f"Files filtering started. Include extensions: {include_extensions}. "
303
+ f"Skip extensions: {skip_extensions}", tool_name="loader")
304
+ # show the progress of filtering
305
+ total_files = len(all_files) if isinstance(all_files, list) else 0
306
+ filtered_files_count = 0
307
+ for file in all_files:
308
+ filtered_files_count += 1
309
+ if filtered_files_count % 10 == 0 or filtered_files_count == total_files:
310
+ self._log_tool_event(message=f"Files filtering progress: {filtered_files_count}/{total_files}",
311
+ tool_name="loader")
312
+ file_name = file['name']
313
+
314
+ # Check if file should be skipped based on skip_extensions
315
+ if any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
316
+ for pattern in skip_extensions):
317
+ continue
318
+
319
+ # Check if file should be included based on include_extensions
320
+ # If include_extensions is empty, process all files (that weren't skipped)
321
+ if include_extensions and not (any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
322
+ for pattern in include_extensions)):
323
+ continue
324
+
68
325
  metadata = {
69
326
  ("updated_on" if k == "modified" else k): str(v)
70
327
  for k, v in file.items()
71
328
  }
72
- metadata['id'] = self.get_hash_from_bucket_and_file_name(self.bucket, file['name'])
329
+ metadata['id'] = self.get_hash_from_bucket_and_file_name(self.bucket, file_name)
73
330
  yield Document(page_content="", metadata=metadata)
74
331
 
75
332
  def get_hash_from_bucket_and_file_name(self, bucket, file_name):
@@ -82,21 +339,24 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
82
339
  for document in documents:
83
340
  try:
84
341
  page_content = self.artifact.get_content_bytes(artifact_name=document.metadata['name'])
85
- document.metadata['loader_content'] = page_content
86
- document.metadata['loader_content_type'] = document.metadata['name']
342
+ document.metadata[IndexerKeywords.CONTENT_IN_BYTES.value] = page_content
343
+ document.metadata[IndexerKeywords.CONTENT_FILE_NAME.value] = document.metadata['name']
87
344
  yield document
88
345
  except Exception as e:
89
- logging.error(f"Failed while parsing the file '{document.metadata['name']}': {e}")
346
+ logger.error(f"Failed while parsing the file '{document.metadata['name']}': {e}")
90
347
  yield document
91
348
 
92
- @extend_with_parent_available_tools
349
+ @extend_with_file_operations
93
350
  def get_available_tools(self):
351
+ """Get available tools, including indexing tools only if vector store is configured."""
94
352
  bucket_name = (Optional[str], Field(description="Name of the bucket to work with."
95
353
  "If bucket is not specified by user directly, the name should be taken from chat history."
96
354
  "If bucket never mentioned in chat, the name will be taken from tool configuration."
97
355
  " ***IMPORTANT*** Underscore `_` is prohibited in bucket name and should be replaced by `-`",
98
356
  default=None))
99
- return [
357
+
358
+ # Basic artifact tools (always available)
359
+ basic_tools = [
100
360
  {
101
361
  "ref": self.list_files,
102
362
  "name": "listFiles",
@@ -110,7 +370,17 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
110
370
  "args_schema": create_model(
111
371
  "createFile",
112
372
  filename=(str, Field(description="Filename")),
113
- filedata=(str, Field(description="Stringified content of the file")),
373
+ filedata=(str, Field(description="""Stringified content of the file.
374
+ Example for .xlsx filedata format:
375
+ {
376
+ "Sheet1":[
377
+ ["Name", "Age", "City"],
378
+ ["Alice", 25, "New York"],
379
+ ["Bob", 30, "San Francisco"],
380
+ ["Charlie", 35, "Los Angeles"]
381
+ ]
382
+ }
383
+ """)),
114
384
  bucket_name=bucket_name
115
385
  )
116
386
  },
@@ -171,11 +441,34 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
171
441
  "description": "Creates new bucket specified by user.",
172
442
  "args_schema": create_model(
173
443
  "createNewBucket",
174
- bucket_name=(str, Field(description="Bucket name to create. ***IMPORTANT*** Underscore `_` is prohibited in bucket name and should be replaced by `-`.")),
444
+ bucket_name=(str, Field(
445
+ description="Bucket name to create. Must start with lowercase letter and contain only lowercase letters, numbers, and hyphens. Underscores will be automatically converted to hyphens.",
446
+ pattern=r'^[a-z][a-z0-9_-]*$' # Allow underscores in input, will be sanitized
447
+ )),
175
448
  expiration_measure=(Optional[str], Field(description="Measure of expiration time for bucket configuration."
176
449
  "Possible values: `days`, `weeks`, `months`, `years`.",
177
450
  default="weeks")),
178
451
  expiration_value=(Optional[int], Field(description="Expiration time values.", default=1))
179
452
  )
180
453
  }
181
- ]
454
+ ]
455
+
456
+ # Add indexing tools only if vector store is configured
457
+ has_vector_config = (
458
+ hasattr(self, 'embedding_model') and self.embedding_model and
459
+ hasattr(self, 'pgvector_configuration') and self.pgvector_configuration
460
+ )
461
+
462
+ if has_vector_config:
463
+ try:
464
+ # Get indexing tools from parent class
465
+ indexing_tools = super(ArtifactWrapper, self).get_available_tools()
466
+ return indexing_tools + basic_tools
467
+ except Exception as e:
468
+ # If getting parent tools fails, log warning and return basic tools only
469
+ logging.warning(f"Failed to load indexing tools: {e}. Only basic artifact tools will be available.")
470
+ return basic_tools
471
+ else:
472
+ # No vector store config, return basic tools only
473
+ logging.info("Vector store not configured. Indexing tools (index_data, search_index, etc.) are not available.")
474
+ return basic_tools
@@ -1,18 +1,33 @@
1
+ import json
1
2
  import logging
3
+ from copy import deepcopy
2
4
  from json import dumps
3
5
 
4
6
  from langchain_core.callbacks import dispatch_custom_event
5
7
  from langchain_core.messages import ToolCall
6
8
  from langchain_core.runnables import RunnableConfig
7
- from langchain_core.tools import BaseTool
8
- from typing import Any, Optional, Union, Annotated
9
+ from langchain_core.tools import BaseTool, ToolException
10
+ from typing import Any, Optional, Union
9
11
  from langchain_core.utils.function_calling import convert_to_openai_tool
10
12
  from pydantic import ValidationError
13
+
11
14
  from ..langchain.utils import propagate_the_input_mapping
12
15
 
13
16
  logger = logging.getLogger(__name__)
14
17
 
15
18
 
19
+ def replace_escaped_newlines(data):
20
+ """
21
+ Replace \\n with \n in all string values recursively.
22
+ Required for sanitization of state variables in code node
23
+ """
24
+ if isinstance(data, dict):
25
+ return {key: replace_escaped_newlines(value) for key, value in data.items()}
26
+ elif isinstance(data, str):
27
+ return data.replace('\\n', '\n')
28
+ else:
29
+ return data
30
+
16
31
  class FunctionTool(BaseTool):
17
32
  name: str = 'FunctionalTool'
18
33
  description: str = 'This is direct call node for tools'
@@ -21,6 +36,61 @@ class FunctionTool(BaseTool):
21
36
  input_variables: Optional[list[str]] = None
22
37
  input_mapping: Optional[dict[str, dict]] = None
23
38
  output_variables: Optional[list[str]] = None
39
+ structured_output: Optional[bool] = False
40
+ alita_client: Optional[Any] = None
41
+
42
+ def _prepare_pyodide_input(self, state: Union[str, dict, ToolCall]) -> str:
43
+ """Prepare input for PyodideSandboxTool by injecting state into the code block."""
44
+ # add state into the code block here since it might be changed during the execution of the code
45
+ state_copy = replace_escaped_newlines(deepcopy(state))
46
+
47
+ del state_copy['messages'] # remove messages to avoid issues with pickling without langchain-core
48
+ # inject state into the code block as alita_state variable
49
+ state_json = json.dumps(state_copy, ensure_ascii=False)
50
+ pyodide_predata = f'#state dict\nimport json\nalita_state = json.loads({json.dumps(state_json)})\n'
51
+
52
+ return pyodide_predata
53
+
54
+ def _handle_pyodide_output(self, tool_result: Any) -> dict:
55
+ """Handle output processing for PyodideSandboxTool results."""
56
+ tool_result_converted = {}
57
+
58
+ if self.output_variables:
59
+ for var in self.output_variables:
60
+ if var == "messages":
61
+ tool_result_converted.update(
62
+ {"messages": [{"role": "assistant", "content": dumps(tool_result)}]})
63
+ continue
64
+ if isinstance(tool_result, dict) and var in tool_result:
65
+ tool_result_converted[var] = tool_result[var]
66
+ else:
67
+ # handler in case user points to a var that is not in the output of the tool
68
+ tool_result_converted[var] = tool_result.get('result',
69
+ tool_result.get('error') if tool_result.get('error')
70
+ else 'Execution result is missing')
71
+ else:
72
+ tool_result_converted.update({"messages": [{"role": "assistant", "content": dumps(tool_result)}]})
73
+
74
+ if self.structured_output:
75
+ # execute code tool and update state variables
76
+ try:
77
+ result_value = tool_result.get('result', {})
78
+ if isinstance(result_value, dict):
79
+ tool_result_converted.update(result_value)
80
+ elif isinstance(result_value, list):
81
+ # Handle list case - could wrap in a key or handle differently based on requirements
82
+ tool_result_converted.update({"result": result_value})
83
+ else:
84
+ # Handle JSON string case
85
+ tool_result_converted.update(json.loads(result_value))
86
+ except json.JSONDecodeError:
87
+ logger.error(f"JSONDecodeError: {tool_result}")
88
+
89
+ return tool_result_converted
90
+
91
+ def _is_pyodide_tool(self) -> bool:
92
+ """Check if the current tool is a PyodideSandboxTool."""
93
+ return self.tool.name.lower() == 'pyodide_sandbox'
24
94
 
25
95
  def invoke(
26
96
  self,
@@ -31,8 +101,15 @@ class FunctionTool(BaseTool):
31
101
  params = convert_to_openai_tool(self.tool).get(
32
102
  'function', {'parameters': {}}).get(
33
103
  'parameters', {'properties': {}}).get('properties', {})
104
+
34
105
  func_args = propagate_the_input_mapping(input_mapping=self.input_mapping, input_variables=self.input_variables,
35
106
  state=state)
107
+
108
+ # special handler for PyodideSandboxTool
109
+ if self._is_pyodide_tool():
110
+ # replace new lines in strings in code block
111
+ code = func_args['code'].replace('\\n', '\\\\n')
112
+ func_args['code'] = f"{self._prepare_pyodide_input(state)}\n{code}"
36
113
  try:
37
114
  tool_result = self.tool.invoke(func_args, config, **kwargs)
38
115
  dispatch_custom_event(
@@ -44,11 +121,30 @@ class FunctionTool(BaseTool):
44
121
  }, config=config
45
122
  )
46
123
  logger.info(f"ToolNode response: {tool_result}")
124
+
125
+ # handler for PyodideSandboxTool
126
+ if self._is_pyodide_tool():
127
+ return self._handle_pyodide_output(tool_result)
128
+
47
129
  if not self.output_variables:
48
130
  return {"messages": [{"role": "assistant", "content": dumps(tool_result)}]}
49
131
  else:
50
- if self.output_variables[0] == "messages":
51
- return {"messages": [{"role": "assistant", "content": dumps(tool_result)}]}
132
+ if "messages" in self.output_variables:
133
+ messages_dict = {
134
+ "messages": [{
135
+ "role": "assistant",
136
+ "content": dumps(tool_result)
137
+ if not isinstance(tool_result, ToolException) and not isinstance(tool_result, str)
138
+ else str(tool_result)
139
+ }]
140
+ }
141
+ for var in self.output_variables:
142
+ if var != "messages":
143
+ if isinstance(tool_result, dict) and var in tool_result:
144
+ messages_dict[var] = tool_result[var]
145
+ else:
146
+ messages_dict[var] = tool_result
147
+ return messages_dict
52
148
  else:
53
149
  return { self.output_variables[0]: tool_result }
54
150
  except ValidationError: