alita-sdk 0.3.379__py3-none-any.whl → 0.3.627__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +156 -0
  6. alita_sdk/cli/agent_loader.py +245 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3113 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1073 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/testcases/__init__.py +94 -0
  23. alita_sdk/cli/testcases/data_generation.py +119 -0
  24. alita_sdk/cli/testcases/discovery.py +96 -0
  25. alita_sdk/cli/testcases/executor.py +84 -0
  26. alita_sdk/cli/testcases/logger.py +85 -0
  27. alita_sdk/cli/testcases/parser.py +172 -0
  28. alita_sdk/cli/testcases/prompts.py +91 -0
  29. alita_sdk/cli/testcases/reporting.py +125 -0
  30. alita_sdk/cli/testcases/setup.py +108 -0
  31. alita_sdk/cli/testcases/test_runner.py +282 -0
  32. alita_sdk/cli/testcases/utils.py +39 -0
  33. alita_sdk/cli/testcases/validation.py +90 -0
  34. alita_sdk/cli/testcases/workflow.py +196 -0
  35. alita_sdk/cli/toolkit.py +327 -0
  36. alita_sdk/cli/toolkit_loader.py +85 -0
  37. alita_sdk/cli/tools/__init__.py +43 -0
  38. alita_sdk/cli/tools/approval.py +224 -0
  39. alita_sdk/cli/tools/filesystem.py +1751 -0
  40. alita_sdk/cli/tools/planning.py +389 -0
  41. alita_sdk/cli/tools/terminal.py +414 -0
  42. alita_sdk/community/__init__.py +72 -12
  43. alita_sdk/community/inventory/__init__.py +236 -0
  44. alita_sdk/community/inventory/config.py +257 -0
  45. alita_sdk/community/inventory/enrichment.py +2137 -0
  46. alita_sdk/community/inventory/extractors.py +1469 -0
  47. alita_sdk/community/inventory/ingestion.py +3172 -0
  48. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  49. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  50. alita_sdk/community/inventory/parsers/base.py +295 -0
  51. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  52. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  53. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  54. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  55. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  56. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  57. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  58. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  59. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  60. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  61. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  62. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  63. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  64. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  65. alita_sdk/community/inventory/patterns/loader.py +348 -0
  66. alita_sdk/community/inventory/patterns/registry.py +198 -0
  67. alita_sdk/community/inventory/presets.py +535 -0
  68. alita_sdk/community/inventory/retrieval.py +1403 -0
  69. alita_sdk/community/inventory/toolkit.py +173 -0
  70. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  71. alita_sdk/community/inventory/visualize.py +1370 -0
  72. alita_sdk/configurations/__init__.py +1 -1
  73. alita_sdk/configurations/ado.py +141 -20
  74. alita_sdk/configurations/bitbucket.py +94 -2
  75. alita_sdk/configurations/confluence.py +130 -1
  76. alita_sdk/configurations/figma.py +76 -0
  77. alita_sdk/configurations/gitlab.py +91 -0
  78. alita_sdk/configurations/jira.py +103 -0
  79. alita_sdk/configurations/openapi.py +329 -0
  80. alita_sdk/configurations/qtest.py +72 -1
  81. alita_sdk/configurations/report_portal.py +96 -0
  82. alita_sdk/configurations/sharepoint.py +148 -0
  83. alita_sdk/configurations/testio.py +83 -0
  84. alita_sdk/configurations/testrail.py +88 -0
  85. alita_sdk/configurations/xray.py +93 -0
  86. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  87. alita_sdk/configurations/zephyr_essential.py +75 -0
  88. alita_sdk/runtime/clients/artifact.py +3 -3
  89. alita_sdk/runtime/clients/client.py +388 -46
  90. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  91. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  92. alita_sdk/runtime/clients/sandbox_client.py +8 -21
  93. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  94. alita_sdk/runtime/langchain/assistant.py +157 -39
  95. alita_sdk/runtime/langchain/constants.py +647 -1
  96. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  97. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  98. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  99. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -4
  100. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
  101. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
  102. alita_sdk/runtime/langchain/document_loaders/constants.py +40 -19
  103. alita_sdk/runtime/langchain/langraph_agent.py +405 -84
  104. alita_sdk/runtime/langchain/utils.py +106 -7
  105. alita_sdk/runtime/llms/preloaded.py +2 -6
  106. alita_sdk/runtime/models/mcp_models.py +61 -0
  107. alita_sdk/runtime/skills/__init__.py +91 -0
  108. alita_sdk/runtime/skills/callbacks.py +498 -0
  109. alita_sdk/runtime/skills/discovery.py +540 -0
  110. alita_sdk/runtime/skills/executor.py +610 -0
  111. alita_sdk/runtime/skills/input_builder.py +371 -0
  112. alita_sdk/runtime/skills/models.py +330 -0
  113. alita_sdk/runtime/skills/registry.py +355 -0
  114. alita_sdk/runtime/skills/skill_runner.py +330 -0
  115. alita_sdk/runtime/toolkits/__init__.py +31 -0
  116. alita_sdk/runtime/toolkits/application.py +29 -10
  117. alita_sdk/runtime/toolkits/artifact.py +20 -11
  118. alita_sdk/runtime/toolkits/datasource.py +13 -6
  119. alita_sdk/runtime/toolkits/mcp.py +783 -0
  120. alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
  121. alita_sdk/runtime/toolkits/planning.py +178 -0
  122. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  123. alita_sdk/runtime/toolkits/subgraph.py +251 -6
  124. alita_sdk/runtime/toolkits/tools.py +356 -69
  125. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  126. alita_sdk/runtime/tools/__init__.py +10 -3
  127. alita_sdk/runtime/tools/application.py +27 -6
  128. alita_sdk/runtime/tools/artifact.py +511 -28
  129. alita_sdk/runtime/tools/data_analysis.py +183 -0
  130. alita_sdk/runtime/tools/function.py +67 -35
  131. alita_sdk/runtime/tools/graph.py +10 -4
  132. alita_sdk/runtime/tools/image_generation.py +148 -46
  133. alita_sdk/runtime/tools/llm.py +1003 -128
  134. alita_sdk/runtime/tools/loop.py +3 -1
  135. alita_sdk/runtime/tools/loop_output.py +3 -1
  136. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  137. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  138. alita_sdk/runtime/tools/mcp_server_tool.py +8 -5
  139. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  140. alita_sdk/runtime/tools/planning/models.py +246 -0
  141. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  142. alita_sdk/runtime/tools/router.py +2 -4
  143. alita_sdk/runtime/tools/sandbox.py +65 -48
  144. alita_sdk/runtime/tools/skill_router.py +776 -0
  145. alita_sdk/runtime/tools/tool.py +3 -1
  146. alita_sdk/runtime/tools/vectorstore.py +9 -3
  147. alita_sdk/runtime/tools/vectorstore_base.py +70 -14
  148. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  149. alita_sdk/runtime/utils/constants.py +5 -1
  150. alita_sdk/runtime/utils/mcp_client.py +492 -0
  151. alita_sdk/runtime/utils/mcp_oauth.py +361 -0
  152. alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
  153. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  154. alita_sdk/runtime/utils/serialization.py +155 -0
  155. alita_sdk/runtime/utils/streamlit.py +40 -13
  156. alita_sdk/runtime/utils/toolkit_utils.py +30 -9
  157. alita_sdk/runtime/utils/utils.py +36 -0
  158. alita_sdk/tools/__init__.py +134 -35
  159. alita_sdk/tools/ado/repos/__init__.py +51 -32
  160. alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
  161. alita_sdk/tools/ado/test_plan/__init__.py +25 -9
  162. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
  163. alita_sdk/tools/ado/utils.py +1 -18
  164. alita_sdk/tools/ado/wiki/__init__.py +25 -12
  165. alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
  166. alita_sdk/tools/ado/work_item/__init__.py +26 -13
  167. alita_sdk/tools/ado/work_item/ado_wrapper.py +73 -11
  168. alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
  169. alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
  170. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  171. alita_sdk/tools/azure_ai/search/__init__.py +11 -8
  172. alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
  173. alita_sdk/tools/base/tool.py +5 -1
  174. alita_sdk/tools/base_indexer_toolkit.py +271 -84
  175. alita_sdk/tools/bitbucket/__init__.py +17 -11
  176. alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
  177. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
  178. alita_sdk/tools/browser/__init__.py +5 -4
  179. alita_sdk/tools/carrier/__init__.py +5 -6
  180. alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
  181. alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
  182. alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
  183. alita_sdk/tools/chunkers/__init__.py +3 -1
  184. alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
  185. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  186. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  187. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  188. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  189. alita_sdk/tools/cloud/aws/__init__.py +10 -7
  190. alita_sdk/tools/cloud/azure/__init__.py +10 -7
  191. alita_sdk/tools/cloud/gcp/__init__.py +10 -7
  192. alita_sdk/tools/cloud/k8s/__init__.py +10 -7
  193. alita_sdk/tools/code/linter/__init__.py +10 -8
  194. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  195. alita_sdk/tools/code/sonar/__init__.py +11 -8
  196. alita_sdk/tools/code_indexer_toolkit.py +82 -22
  197. alita_sdk/tools/confluence/__init__.py +22 -16
  198. alita_sdk/tools/confluence/api_wrapper.py +107 -30
  199. alita_sdk/tools/confluence/loader.py +14 -2
  200. alita_sdk/tools/custom_open_api/__init__.py +12 -5
  201. alita_sdk/tools/elastic/__init__.py +11 -8
  202. alita_sdk/tools/elitea_base.py +493 -30
  203. alita_sdk/tools/figma/__init__.py +58 -11
  204. alita_sdk/tools/figma/api_wrapper.py +1235 -143
  205. alita_sdk/tools/figma/figma_client.py +73 -0
  206. alita_sdk/tools/figma/toon_tools.py +2748 -0
  207. alita_sdk/tools/github/__init__.py +14 -15
  208. alita_sdk/tools/github/github_client.py +224 -100
  209. alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
  210. alita_sdk/tools/github/schemas.py +14 -5
  211. alita_sdk/tools/github/tool.py +5 -1
  212. alita_sdk/tools/github/tool_prompts.py +9 -22
  213. alita_sdk/tools/gitlab/__init__.py +16 -11
  214. alita_sdk/tools/gitlab/api_wrapper.py +218 -48
  215. alita_sdk/tools/gitlab_org/__init__.py +10 -9
  216. alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
  217. alita_sdk/tools/google/bigquery/__init__.py +13 -12
  218. alita_sdk/tools/google/bigquery/tool.py +5 -1
  219. alita_sdk/tools/google_places/__init__.py +11 -8
  220. alita_sdk/tools/google_places/api_wrapper.py +1 -1
  221. alita_sdk/tools/jira/__init__.py +17 -10
  222. alita_sdk/tools/jira/api_wrapper.py +92 -41
  223. alita_sdk/tools/keycloak/__init__.py +11 -8
  224. alita_sdk/tools/localgit/__init__.py +9 -3
  225. alita_sdk/tools/localgit/local_git.py +62 -54
  226. alita_sdk/tools/localgit/tool.py +5 -1
  227. alita_sdk/tools/memory/__init__.py +12 -4
  228. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  229. alita_sdk/tools/ocr/__init__.py +11 -8
  230. alita_sdk/tools/openapi/__init__.py +491 -106
  231. alita_sdk/tools/openapi/api_wrapper.py +1368 -0
  232. alita_sdk/tools/openapi/tool.py +20 -0
  233. alita_sdk/tools/pandas/__init__.py +20 -12
  234. alita_sdk/tools/pandas/api_wrapper.py +38 -25
  235. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  236. alita_sdk/tools/postman/__init__.py +10 -9
  237. alita_sdk/tools/pptx/__init__.py +11 -10
  238. alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
  239. alita_sdk/tools/qtest/__init__.py +31 -11
  240. alita_sdk/tools/qtest/api_wrapper.py +2135 -86
  241. alita_sdk/tools/rally/__init__.py +10 -9
  242. alita_sdk/tools/rally/api_wrapper.py +1 -1
  243. alita_sdk/tools/report_portal/__init__.py +12 -8
  244. alita_sdk/tools/salesforce/__init__.py +10 -8
  245. alita_sdk/tools/servicenow/__init__.py +17 -15
  246. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  247. alita_sdk/tools/sharepoint/__init__.py +10 -7
  248. alita_sdk/tools/sharepoint/api_wrapper.py +129 -38
  249. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  250. alita_sdk/tools/sharepoint/utils.py +8 -2
  251. alita_sdk/tools/slack/__init__.py +10 -7
  252. alita_sdk/tools/slack/api_wrapper.py +2 -2
  253. alita_sdk/tools/sql/__init__.py +12 -9
  254. alita_sdk/tools/testio/__init__.py +10 -7
  255. alita_sdk/tools/testrail/__init__.py +11 -10
  256. alita_sdk/tools/testrail/api_wrapper.py +1 -1
  257. alita_sdk/tools/utils/__init__.py +9 -4
  258. alita_sdk/tools/utils/content_parser.py +103 -18
  259. alita_sdk/tools/utils/text_operations.py +410 -0
  260. alita_sdk/tools/utils/tool_prompts.py +79 -0
  261. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +30 -13
  262. alita_sdk/tools/xray/__init__.py +13 -9
  263. alita_sdk/tools/yagmail/__init__.py +9 -3
  264. alita_sdk/tools/zephyr/__init__.py +10 -7
  265. alita_sdk/tools/zephyr_enterprise/__init__.py +11 -7
  266. alita_sdk/tools/zephyr_essential/__init__.py +10 -7
  267. alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
  268. alita_sdk/tools/zephyr_essential/client.py +2 -2
  269. alita_sdk/tools/zephyr_scale/__init__.py +11 -8
  270. alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
  271. alita_sdk/tools/zephyr_squad/__init__.py +10 -7
  272. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +154 -8
  273. alita_sdk-0.3.627.dist-info/RECORD +468 -0
  274. alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
  275. alita_sdk-0.3.379.dist-info/RECORD +0 -360
  276. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
  277. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
  278. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,10 @@
1
1
  from datetime import datetime, timezone
2
+ from urllib.parse import unquote, urlparse, quote
2
3
 
3
4
  import jwt
4
5
  import requests
6
+ from botocore.response import get_response
7
+
5
8
 
6
9
  class SharepointAuthorizationHelper:
7
10
 
@@ -54,4 +57,191 @@ class SharepointAuthorizationHelper:
54
57
  except jwt.ExpiredSignatureError:
55
58
  return False
56
59
  except jwt.InvalidTokenError:
57
- return False
60
+ return False
61
+
62
+ def _validate_response(self, response, required_field, error_prefix=None):
63
+ if response.status_code != 200:
64
+ raise RuntimeError(f"{error_prefix or 'Request'} failed: {response.status_code} {response.text}")
65
+ json_data = response.json()
66
+ if required_field not in json_data:
67
+ raise KeyError(f"'{required_field}' missing in response")
68
+ return json_data[required_field]
69
+
70
+ def generate_token_and_site_id(self, site_url: str) -> tuple[str, str]:
71
+ try:
72
+ parsed = urlparse(site_url)
73
+ domain = parsed.hostname
74
+ site_path = parsed.path.strip('/')
75
+ if not domain or not site_path:
76
+ raise ValueError(f"site_url missing domain or site path: {site_url}")
77
+ app_name = domain.split('.')[0]
78
+ openid_config_url = f"https://login.microsoftonline.com/{app_name}.onmicrosoft.com/v2.0/.well-known/openid-configuration"
79
+ response = requests.get(openid_config_url)
80
+ token_url = self._validate_response(response, required_field="token_endpoint", error_prefix="OpenID config")
81
+ token_data = {
82
+ "grant_type": "client_credentials",
83
+ "client_id": self.client_id,
84
+ "client_secret": self.client_secret,
85
+ "scope": "https://graph.microsoft.com/.default"
86
+ }
87
+ token_response = requests.post(token_url, data=token_data)
88
+ access_token = self._validate_response(token_response, required_field="access_token", error_prefix="Token request")
89
+ graph_site_url = f"https://graph.microsoft.com/v1.0/sites/{domain}:/{site_path}"
90
+ headers = {"Authorization": f"Bearer {access_token}"}
91
+ site_response = requests.get(graph_site_url, headers=headers)
92
+ site_id = self._validate_response(site_response, required_field="id", error_prefix="Site info")
93
+ return access_token, site_id
94
+ except Exception as e:
95
+ raise RuntimeError(f"Error while obtaining access_token and site_id: {e}")
96
+
97
+ def get_files_list(self, site_url: str, folder_name: str = None, limit_files: int = 100):
98
+ if not site_url or not site_url.startswith("https://"):
99
+ raise ValueError(f"Invalid site_url format: {site_url}")
100
+ if limit_files is not None and (not isinstance(limit_files, int) or limit_files <= 0):
101
+ raise ValueError(f"limit_files must be a positive integer, got: {limit_files}")
102
+ try:
103
+ access_token, site_id = self.generate_token_and_site_id(site_url)
104
+ headers = {"Authorization": f"Bearer {access_token}"}
105
+ drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
106
+ drives_response = requests.get(drives_url, headers=headers)
107
+ drives = self._validate_response(drives_response, required_field="value", error_prefix="Drives request")
108
+ result = []
109
+ def _recurse_drive(drive_id, drive_path, parent_folder, limit_files):
110
+ # Escape folder_name for URL safety if present
111
+ if parent_folder:
112
+ safe_folder_name = quote(parent_folder.strip('/'), safe="/")
113
+ url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root:/{safe_folder_name}:/children?$top={limit_files}"
114
+ else:
115
+ url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root/children?$top={limit_files}"
116
+ response = requests.get(url, headers=headers)
117
+ if response.status_code != 200:
118
+ return []
119
+ files_json = response.json()
120
+ if "value" not in files_json:
121
+ return []
122
+ files = []
123
+ for file in files_json["value"]:
124
+ file_name = file.get('name', '')
125
+ # Build full path reflecting nested folders
126
+ if parent_folder:
127
+ full_path = '/' + '/'.join([drive_path.strip('/'), parent_folder.strip('/'), file_name.strip('/')])
128
+ else:
129
+ full_path = '/' + '/'.join([drive_path.strip('/'), file_name.strip('/')])
130
+ temp_props = {
131
+ 'Name': file_name,
132
+ 'Path': full_path,
133
+ 'Created': file.get('createdDateTime'),
134
+ 'Modified': file.get('lastModifiedDateTime'),
135
+ 'Link': file.get('webUrl'),
136
+ 'id': file.get('id')
137
+ }
138
+ if not all([temp_props['Name'], temp_props['Path'], temp_props['id']]):
139
+ continue # skip files with missing required fields
140
+ if 'folder' in file:
141
+ # Recursively extract files from this folder
142
+ inner_folder = parent_folder + '/' + file_name if parent_folder else file_name
143
+ inner_files = _recurse_drive(drive_id, drive_path, inner_folder, limit_files)
144
+ files.extend(inner_files)
145
+ else:
146
+ files.append(temp_props)
147
+ if limit_files is not None and len(result) + len(files) >= limit_files:
148
+ return files[:limit_files - len(result)]
149
+ return files
150
+ #
151
+ site_segments = [seg for seg in site_url.strip('/').split('/') if seg][-2:]
152
+ full_path_prefix = '/'.join(site_segments)
153
+ #
154
+ for drive in drives:
155
+ drive_id = drive.get("id")
156
+ drive_path = unquote(urlparse(drive.get("webUrl")).path) if drive.get("webUrl") else ""
157
+ if not drive_id:
158
+ continue # skip drives without id
159
+ #
160
+ sub_folder = folder_name
161
+ if folder_name:
162
+ folder_path = folder_name.strip('/')
163
+ expected_prefix = drive_path.strip('/')#f'{full_path_prefix}/{library_type}'
164
+ if folder_path.startswith(full_path_prefix):
165
+ if folder_path.startswith(expected_prefix):
166
+ sub_folder = folder_path.removeprefix(f'{expected_prefix}').strip('/')#target_folder_url = folder_path.removeprefix(f'{full_path_prefix}/')
167
+ else:
168
+ # ignore full path folder which is not targeted to current drive
169
+ continue
170
+ #
171
+ files = _recurse_drive(drive_id, drive_path, sub_folder, limit_files)
172
+ result.extend(files)
173
+ if limit_files is not None and len(result) >= limit_files:
174
+ return result[:limit_files]
175
+ return result
176
+ except Exception as e:
177
+ raise RuntimeError(f"Error in get_files_list: {e}")
178
+
179
+ def get_file_content(self, site_url: str, path: str):
180
+ try:
181
+ access_token, site_id = self.generate_token_and_site_id(site_url)
182
+ headers = {"Authorization": f"Bearer {access_token}"}
183
+ drives_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
184
+ drives_response = requests.get(drives_url, headers=headers)
185
+ drives = self._validate_response(drives_response, required_field="value", error_prefix="Drives request")
186
+ path = path.strip('/')
187
+ #
188
+ for drive in drives:
189
+ drive_path = unquote(urlparse(drive.get("webUrl")).path).strip('/')
190
+ if not drive_path or not path.startswith(drive_path):
191
+ continue
192
+ drive_id = drive.get("id")
193
+ if not drive_id:
194
+ continue
195
+ path = path.replace(drive_path, '').strip('/')
196
+ safe_path = quote(path, safe="")
197
+ url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{safe_path}:/content"
198
+ response = requests.get(url, headers=headers)
199
+ if response.status_code == 200:
200
+ return response.content
201
+ raise RuntimeError(f"File '{path}' not found in any private or shared documents.")
202
+ except Exception as e:
203
+ raise RuntimeError(f"Error in get_file_content: {e}")
204
+
205
+ def get_list_items(self, site_url: str, list_title: str, limit: int = 1000):
206
+ """Fallback Graph API method to read SharePoint list items by list title.
207
+
208
+ Returns a list of dictionaries representing list item fields.
209
+ """
210
+ if not site_url or not site_url.startswith("https://"):
211
+ raise ValueError(f"Invalid site_url format: {site_url}")
212
+ try:
213
+ access_token, site_id = self.generate_token_and_site_id(site_url)
214
+ headers = {"Authorization": f"Bearer {access_token}"}
215
+ lists_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists"
216
+ response = requests.get(lists_url, headers=headers)
217
+ if response.status_code != 200:
218
+ raise RuntimeError(f"Lists request failed: {response.status_code} {response.text}")
219
+ lists_json = response.json()
220
+ lists = lists_json.get("value", [])
221
+ target_list = None
222
+ normalized_title = list_title.strip().lower()
223
+ for lst in lists:
224
+ # displayName is the user-visible title. name can differ (internal name)
225
+ display_name = (lst.get("displayName") or lst.get("name") or '').strip().lower()
226
+ if display_name == normalized_title:
227
+ target_list = lst
228
+ break
229
+ if not target_list:
230
+ raise RuntimeError(f"List '{list_title}' not found via Graph API.")
231
+ list_id = target_list.get('id')
232
+ if not list_id:
233
+ raise RuntimeError(f"List '{list_title}' missing id field.")
234
+ items_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items?expand=fields&$top={limit}"
235
+ items_response = requests.get(items_url, headers=headers)
236
+ if items_response.status_code != 200:
237
+ raise RuntimeError(f"List items request failed: {items_response.status_code} {items_response.text}")
238
+ items_json = items_response.json()
239
+ values = items_json.get('value', [])
240
+ result = []
241
+ for item in values:
242
+ fields = item.get('fields', {})
243
+ if fields:
244
+ result.append(fields)
245
+ return result
246
+ except Exception as e:
247
+ raise RuntimeError(f"Error in get_list_items: {e}")
@@ -1,5 +1,7 @@
1
- from docx import Document
1
+ import re
2
2
  from io import BytesIO
3
+ from docx import Document
4
+
3
5
 
4
6
  def read_docx_from_bytes(file_content):
5
7
  """Read and return content from a .docx file using a byte stream."""
@@ -11,4 +13,8 @@ def read_docx_from_bytes(file_content):
11
13
  return '\n'.join(text)
12
14
  except Exception as e:
13
15
  print(f"Error reading .docx from bytes: {e}")
14
- return ""
16
+ return ""
17
+
18
+
19
+ def decode_sharepoint_string(s):
20
+ return re.sub(r'_x([0-9A-Fa-f]{4})_', lambda m: chr(int(m.group(1), 16)), s)
@@ -12,9 +12,10 @@ from pydantic import create_model, BaseModel, Field
12
12
  from ..base.tool import BaseAction
13
13
 
14
14
  from .api_wrapper import SlackApiWrapper
15
- from ..utils import TOOLKIT_SPLITTER, clean_string, get_max_toolkit_length, check_connection_response
15
+ from ..utils import clean_string, get_max_toolkit_length, check_connection_response
16
16
  from slack_sdk.errors import SlackApiError
17
17
  from slack_sdk import WebClient
18
+ from ...runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
18
19
 
19
20
  name = "slack"
20
21
 
@@ -28,12 +29,10 @@ def get_tools(tool):
28
29
 
29
30
  class SlackToolkit(BaseToolkit):
30
31
  tools: List[BaseTool] = []
31
- toolkit_max_length: int = 0
32
32
 
33
33
  @staticmethod
34
34
  def toolkit_config_schema() -> BaseModel:
35
35
  selected_tools = {x['name']: x['args_schema'].schema() for x in SlackApiWrapper.model_construct().get_available_tools()}
36
- SlackToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
37
36
 
38
37
  @check_connection_response
39
38
  def check_connection(self):
@@ -78,17 +77,21 @@ class SlackToolkit(BaseToolkit):
78
77
  **kwargs['slack_configuration'],
79
78
  }
80
79
  slack_api_wrapper = SlackApiWrapper(**wrapper_payload)
81
- prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
82
80
  available_tools = slack_api_wrapper.get_available_tools()
83
81
  tools = []
84
82
  for tool in available_tools:
85
83
  if selected_tools and tool["name"] not in selected_tools:
86
84
  continue
87
- tools.append(BaseAction(
85
+ description = f"Slack Tool: {tool['description']}"
86
+ if toolkit_name:
87
+ description = f"{description}\nToolkit: {toolkit_name}"
88
+ description = description[:1000]
89
+ tools.append(BaseAction(
88
90
  api_wrapper=slack_api_wrapper,
89
- name=prefix + tool["name"],
90
- description=f"Slack Tool: {tool['description']}",
91
+ name=tool["name"],
92
+ description=description,
91
93
  args_schema=tool["args_schema"],
94
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
92
95
  ))
93
96
  return cls(tools=tools)
94
97
 
@@ -17,8 +17,8 @@ SendMessageModel = create_model(
17
17
 
18
18
  ReadMessagesModel = create_model(
19
19
  "ReadMessagesModel",
20
- channel_id=(Optional[str], Field(default=None,description="Channel ID, user ID, or conversation ID to read messages from. (like C12345678 for public channels, D12345678 for DMs)")),
21
- limit=(int, Field(default=10, description="The number of messages to fetch (default is 10)."))
20
+ channel_id=(Optional[str], Field(default=None,description="Channel ID, user ID, or conversation ID to read messages from. (like C12345678 for public channels, D12345678 for DMs)")),
21
+ limit=(int, Field(default=10, description="The number of messages to fetch (default is 10).", gt=0))
22
22
  )
23
23
 
24
24
  CreateChannelModel = create_model(
@@ -7,8 +7,9 @@ from .api_wrapper import SQLApiWrapper
7
7
  from ..base.tool import BaseAction
8
8
  from .models import SQLDialect
9
9
  from ..elitea_base import filter_missconfigured_index_tools
10
- from ..utils import TOOLKIT_SPLITTER, clean_string, get_max_toolkit_length
10
+ from ..utils import clean_string, get_max_toolkit_length
11
11
  from ...configurations.sql import SqlConfiguration
12
+ from ...runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
12
13
 
13
14
  name = "sql"
14
15
 
@@ -24,17 +25,15 @@ def get_tools(tool):
24
25
 
25
26
  class SQLToolkit(BaseToolkit):
26
27
  tools: list[BaseTool] = []
27
- toolkit_max_length: int = 0
28
28
 
29
29
  @staticmethod
30
30
  def toolkit_config_schema() -> BaseModel:
31
31
  selected_tools = {x['name']: x['args_schema'].schema() for x in SQLApiWrapper.model_construct().get_available_tools()}
32
- SQLToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
33
32
  supported_dialects = (d.value for d in SQLDialect)
34
33
  return create_model(
35
34
  name,
36
- dialect=(Literal[tuple(supported_dialects)], Field(description="Database dialect (mysql or postgres)")),
37
- database_name=(str, Field(description="Database name", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': SQLToolkit.toolkit_max_length})),
35
+ dialect=(Literal[tuple(supported_dialects)], Field(default=SQLDialect.POSTGRES.value, description="Database dialect (mysql or postgres)")),
36
+ database_name=(str, Field(description="Database name")),
38
37
  sql_configuration=(SqlConfiguration, Field(description="SQL Configuration", json_schema_extra={'configuration_types': ['sql']})),
39
38
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
40
39
  __config__=ConfigDict(json_schema_extra=
@@ -56,17 +55,21 @@ class SQLToolkit(BaseToolkit):
56
55
  **kwargs.get('sql_configuration', {}),
57
56
  }
58
57
  sql_api_wrapper = SQLApiWrapper(**wrapper_payload)
59
- prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
60
58
  available_tools = sql_api_wrapper.get_available_tools()
61
59
  tools = []
62
60
  for tool in available_tools:
63
61
  if selected_tools and tool["name"] not in selected_tools:
64
62
  continue
63
+ description = f"{tool['description']}\nDatabase: {sql_api_wrapper.database_name}. Host: {sql_api_wrapper.host}"
64
+ if toolkit_name:
65
+ description = f"{description}\nToolkit: {toolkit_name}"
66
+ description = description[:1000]
65
67
  tools.append(BaseAction(
66
68
  api_wrapper=sql_api_wrapper,
67
- name=prefix + tool["name"],
68
- description=f"{tool['description']}\nDatabase: {sql_api_wrapper.database_name}. Host: {sql_api_wrapper.host}",
69
- args_schema=tool["args_schema"]
69
+ name=tool["name"],
70
+ description=description,
71
+ args_schema=tool["args_schema"],
72
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
70
73
  ))
71
74
  return cls(tools=tools)
72
75
 
@@ -6,8 +6,9 @@ from pydantic import create_model, BaseModel, ConfigDict, Field
6
6
  from .api_wrapper import TestIOApiWrapper
7
7
  from ..base.tool import BaseAction
8
8
  from ..elitea_base import filter_missconfigured_index_tools
9
- from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
9
+ from ..utils import clean_string, get_max_toolkit_length
10
10
  from ...configurations.testio import TestIOConfiguration
11
+ from ...runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
11
12
 
12
13
  name = "testio"
13
14
 
@@ -19,8 +20,6 @@ def get_tools(tool):
19
20
  ).get_tools()
20
21
 
21
22
 
22
- TOOLKIT_MAX_LENGTH = 25
23
-
24
23
  class TestIOToolkit(BaseToolkit):
25
24
  tools: list[BaseTool] = []
26
25
 
@@ -47,17 +46,21 @@ class TestIOToolkit(BaseToolkit):
47
46
  **kwargs.get('testio_configuration', {}),
48
47
  }
49
48
  testio_api_wrapper = TestIOApiWrapper(**wrapper_payload)
50
- prefix = clean_string(toolkit_name, TOOLKIT_MAX_LENGTH) + TOOLKIT_SPLITTER if toolkit_name else ''
51
49
  available_tools = testio_api_wrapper.get_available_tools()
52
50
  tools = []
53
51
  for tool in available_tools:
54
52
  if selected_tools and tool["name"] not in selected_tools:
55
53
  continue
54
+ description = tool["description"]
55
+ if toolkit_name:
56
+ description = f"Toolkit: {toolkit_name}\n{description}"
57
+ description = description[:1000]
56
58
  tools.append(BaseAction(
57
59
  api_wrapper=testio_api_wrapper,
58
- name=prefix + tool["name"],
59
- description=tool["description"],
60
- args_schema=tool["args_schema"]
60
+ name=tool["name"],
61
+ description=description,
62
+ args_schema=tool["args_schema"],
63
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
61
64
  ))
62
65
  return cls(tools=tools)
63
66
 
@@ -7,9 +7,10 @@ import requests
7
7
  from .api_wrapper import TestrailAPIWrapper
8
8
  from ..base.tool import BaseAction
9
9
  from ..elitea_base import filter_missconfigured_index_tools
10
- from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, check_connection_response
10
+ from ..utils import clean_string, get_max_toolkit_length, check_connection_response
11
11
  from ...configurations.testrail import TestRailConfiguration
12
12
  from ...configurations.pgvector import PgVectorConfiguration
13
+ from ...runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
13
14
 
14
15
  name = "testrail"
15
16
 
@@ -31,17 +32,12 @@ def get_tools(tool):
31
32
 
32
33
  class TestrailToolkit(BaseToolkit):
33
34
  tools: List[BaseTool] = []
34
- toolkit_max_length: int = 0
35
35
 
36
36
  @staticmethod
37
37
  def toolkit_config_schema() -> BaseModel:
38
38
  selected_tools = {x['name']: x['args_schema'].schema() for x in TestrailAPIWrapper.model_construct().get_available_tools()}
39
- TestrailToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
40
39
  m = create_model(
41
40
  name,
42
- name=(str, Field(description="Toolkit name", json_schema_extra={
43
- 'toolkit_name': True,
44
- "max_length": TestrailToolkit.toolkit_max_length})),
45
41
  testrail_configuration=(Optional[TestRailConfiguration], Field(description="TestRail Configuration", json_schema_extra={'configuration_types': ['testrail']})),
46
42
  pgvector_configuration=(Optional[PgVectorConfiguration], Field(default = None,
47
43
  description="PgVector Configuration", json_schema_extra={'configuration_types': ['pgvector']})),
@@ -79,18 +75,23 @@ class TestrailToolkit(BaseToolkit):
79
75
  **(kwargs.get('pgvector_configuration') or {}),
80
76
  }
81
77
  testrail_api_wrapper = TestrailAPIWrapper(**wrapper_payload)
82
- prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
83
78
  available_tools = testrail_api_wrapper.get_available_tools()
84
79
  tools = []
85
80
  for tool in available_tools:
86
81
  if selected_tools:
87
82
  if tool["name"] not in selected_tools:
88
83
  continue
84
+ description = tool["description"]
85
+ if toolkit_name:
86
+ description = f"Toolkit: {toolkit_name}\n{description}"
87
+ description = description + "\nTestrail instance: " + testrail_api_wrapper.url
88
+ description = description[:1000]
89
89
  tools.append(BaseAction(
90
90
  api_wrapper=testrail_api_wrapper,
91
- name=prefix + tool["name"],
92
- description=tool["description"] + "\nTestrail instance: " + testrail_api_wrapper.url,
93
- args_schema=tool["args_schema"]
91
+ name=tool["name"],
92
+ description=description,
93
+ args_schema=tool["args_schema"],
94
+ metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
94
95
  ))
95
96
  return cls(tools=tools)
96
97
 
@@ -697,7 +697,7 @@ class TestrailAPIWrapper(NonCodeIndexerToolkit):
697
697
  'id': str(case.get('id', '')),
698
698
  IndexerKeywords.UPDATED_ON.value: case.get('updated_on') or -1,
699
699
  'labels': [lbl['title'] for lbl in case.get('labels', [])],
700
- 'type': case.get('type_id') or -1,
700
+ 'type': "testrail_test_case",
701
701
  'priority': case.get('priority_id') or -1,
702
702
  'milestone': case.get('milestone_id') or -1,
703
703
  'estimate': case.get('estimate') or '',
@@ -7,6 +7,8 @@ import requests
7
7
  from pydantic import create_model, Field
8
8
 
9
9
 
10
+ # DEPRECATED: Tool names no longer use prefixes
11
+ # Kept for backward compatibility only
10
12
  TOOLKIT_SPLITTER = "___"
11
13
  TOOL_NAME_LIMIT = 64
12
14
 
@@ -22,10 +24,13 @@ def clean_string(s: str, max_length: int = 0):
22
24
 
23
25
 
24
26
  def get_max_toolkit_length(selected_tools: Any):
25
- """Calculates the maximum length of the toolkit name based on the selected tools per toolkit."""
26
-
27
- longest_tool_name_length = max(len(tool_name) for tool_name in selected_tools.keys())
28
- return TOOL_NAME_LIMIT - longest_tool_name_length - len(TOOLKIT_SPLITTER)
27
+ """DEPRECATED: Calculates the maximum length of the toolkit name.
28
+
29
+ This function is deprecated as tool names no longer use prefixes.
30
+ Returns a fixed value for backward compatibility.
31
+ """
32
+ # Return a reasonable default since we no longer use prefixes
33
+ return 50
29
34
 
30
35
 
31
36
  def parse_list(list_str: str = None) -> List[str]:
@@ -92,21 +92,32 @@ def parse_file_content(file_name=None, file_content=None, is_capture_image: bool
92
92
  return ToolException(
93
93
  "Not supported type of files entered. Supported types are TXT, DOCX, PDF, PPTX, XLSX and XLS only.")
94
94
 
95
- if hasattr(loader, 'get_content'):
96
- return loader.get_content()
97
- else:
98
- extension = Path(file_path if file_path else file_name).suffix
99
- loader_kwargs = get_loader_kwargs(loaders_map.get(extension), file_name, file_content, is_capture_image, page_number, sheet_name, llm, file_path, excel_by_sheets)
100
- if file_content:
101
- return load_content_from_bytes(file_content=file_content,
102
- extension=extension,
103
- loader_extra_config=loader_kwargs,
104
- llm=llm)
95
+ try:
96
+ if hasattr(loader, 'get_content'):
97
+ return loader.get_content()
105
98
  else:
106
- return load_content(file_path=file_path,
107
- extension=extension,
108
- loader_extra_config=loader_kwargs,
109
- llm=llm)
99
+ extension = Path(file_path if file_path else file_name).suffix
100
+ loader_kwargs = get_loader_kwargs(loaders_map.get(extension), file_name, file_content, is_capture_image, page_number, sheet_name, llm, file_path, excel_by_sheets)
101
+ if file_content:
102
+ return load_content_from_bytes(file_content=file_content,
103
+ extension=extension,
104
+ loader_extra_config=loader_kwargs,
105
+ llm=llm)
106
+ else:
107
+ return load_content(file_path=file_path,
108
+ extension=extension,
109
+ loader_extra_config=loader_kwargs,
110
+ llm=llm)
111
+ except Exception as e:
112
+ # Surface full underlying error message (including nested causes) so that
113
+ # JSONDecodeError or other specific issues are not hidden behind
114
+ # generic RuntimeError messages from loaders.
115
+ root_msg = str(e)
116
+ if getattr(e, "__cause__", None):
117
+ root_msg = f"{root_msg} | Cause: {e.__cause__}"
118
+ return ToolException(
119
+ f"Error reading file ({file_name or file_path}) content. Make sure these types are supported: {root_msg}"
120
+ )
110
121
 
111
122
  def load_file_docs(file_name=None, file_content=None, is_capture_image: bool = False, page_number: int = None,
112
123
  sheet_name: str = None, llm=None, file_path: str = None, excel_by_sheets: bool = False) -> List[Document] | ToolException:
@@ -127,7 +138,38 @@ def load_file_docs(file_name=None, file_content=None, is_capture_image: bool = F
127
138
 
128
139
  def get_loader_kwargs(loader_object, file_name=None, file_content=None, is_capture_image: bool = False, page_number: int = None,
129
140
  sheet_name: str = None, llm=None, file_path: str = None, excel_by_sheets: bool = False, prompt=None):
130
- loader_kwargs = deepcopy(loader_object['kwargs'])
141
+ """Build loader kwargs safely without deepcopying non-picklable objects like LLMs.
142
+
143
+ We avoid copying keys that are going to be overridden by this function anyway
144
+ (file_path, file_content, file_name, extract_images, llm, page_number,
145
+ sheet_name, excel_by_sheets, prompt, row_content, json_documents) to
146
+ prevent errors such as `cannot pickle '_thread.RLock' object` when an LLM
147
+ or client with internal locks is stored in the original kwargs.
148
+ """
149
+ if not loader_object:
150
+ raise ToolException("Loader configuration is missing.")
151
+
152
+ original_kwargs = loader_object.get("kwargs", {}) or {}
153
+
154
+ # Keys that will be overwritten below – skip them when copying
155
+ overridden_keys = {
156
+ "file_path",
157
+ "file_content",
158
+ "file_name",
159
+ "extract_images",
160
+ "llm",
161
+ "page_number",
162
+ "sheet_name",
163
+ "excel_by_sheets",
164
+ "prompt",
165
+ "row_content",
166
+ "json_documents",
167
+ }
168
+
169
+ # Build a safe shallow copy without overridden keys to avoid deepcopy
170
+ # of potentially non-picklable objects (e.g., llm with internal RLock).
171
+ loader_kwargs = {k: v for k, v in original_kwargs.items() if k not in overridden_keys}
172
+
131
173
  loader_kwargs.update({
132
174
  "file_path": file_path,
133
175
  "file_content": file_content,
@@ -153,7 +195,7 @@ def prepare_loader(file_name=None, file_content=None, is_capture_image: bool = F
153
195
 
154
196
  loader_object = loaders_map.get(extension)
155
197
  if not loader_object:
156
- return None
198
+ loader_object = loaders_map.get('.txt') # Default to text loader if no specific loader found
157
199
  loader_kwargs = get_loader_kwargs(loader_object, file_name, file_content, is_capture_image, page_number, sheet_name, llm, file_path, excel_by_sheets, prompt)
158
200
  loader = loader_object['class'](**loader_kwargs)
159
201
  return loader
@@ -209,6 +251,41 @@ def load_content_from_bytes(file_content: bytes, extension: str = None, loader_e
209
251
  if temp_file_path and os.path.exists(temp_file_path):
210
252
  os.remove(temp_file_path)
211
253
 
254
+
255
+ def _load_content_from_bytes_with_prompt(file_content: bytes, extension: str = None, loader_extra_config: dict = None, llm = None, prompt: str = image_processing_prompt) -> str:
256
+ """Internal helper that behaves like load_content_from_bytes but also propagates prompt.
257
+
258
+ This keeps the public load_content_from_bytes API unchanged while allowing newer
259
+ code paths to pass an explicit prompt through to the loader.
260
+ """
261
+ temp_file_path = None
262
+ try:
263
+ with tempfile.NamedTemporaryFile(mode='w+b', delete=False, suffix=extension or '') as temp_file:
264
+ temp_file.write(file_content)
265
+ temp_file.flush()
266
+ temp_file_path = temp_file.name
267
+
268
+ # Use prepare_loader so that prompt and other kwargs are handled consistently
269
+ loader = prepare_loader(
270
+ file_name=None,
271
+ file_content=None,
272
+ is_capture_image=loader_extra_config.get('extract_images') if loader_extra_config else False,
273
+ page_number=loader_extra_config.get('page_number') if loader_extra_config else None,
274
+ sheet_name=loader_extra_config.get('sheet_name') if loader_extra_config else None,
275
+ llm=llm or (loader_extra_config.get('llm') if loader_extra_config else None),
276
+ file_path=temp_file_path,
277
+ excel_by_sheets=loader_extra_config.get('excel_by_sheets') if loader_extra_config else False,
278
+ prompt=prompt or (loader_extra_config.get('prompt') if loader_extra_config else image_processing_prompt),
279
+ )
280
+
281
+ documents = loader.load()
282
+ page_contents = [doc.page_content for doc in documents]
283
+ return "\n".join(page_contents)
284
+ finally:
285
+ if temp_file_path and os.path.exists(temp_file_path):
286
+ os.remove(temp_file_path)
287
+
288
+
212
289
  def process_document_by_type(content, extension_source: str, document: Document = None, llm = None, chunking_config=None) \
213
290
  -> Generator[Document, None, None]:
214
291
  """Process the content of a file based on its type using a configured loader cosidering the origin document."""
@@ -222,10 +299,18 @@ def process_document_by_type(content, extension_source: str, document: Document
222
299
  metadata={**document.metadata, 'chunk_id': 1}
223
300
  )
224
301
  return
302
+ #
303
+ chunks_counter = 0
225
304
  for chunk in chunks:
305
+ chunks_counter += 1
306
+ metadata = {**document.metadata, **chunk.metadata}
307
+ #
308
+ # ensure each chunk has a unique chunk_id
309
+ metadata['chunk_id'] = chunks_counter
310
+ #
226
311
  yield Document(
227
312
  page_content=sanitize_for_postgres(chunk.page_content),
228
- metadata={**document.metadata, **chunk.metadata}
313
+ metadata=metadata
229
314
  )
230
315
 
231
316
 
@@ -327,4 +412,4 @@ def file_extension_by_chunker(chunker_name: str) -> str | None:
327
412
  return ".xml"
328
413
  if name == "csv":
329
414
  return ".csv"
330
- return None
415
+ return None