alita-sdk 0.3.257__py3-none-any.whl → 0.3.562__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +155 -0
  6. alita_sdk/cli/agent_loader.py +215 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3601 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1073 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/toolkit.py +327 -0
  23. alita_sdk/cli/toolkit_loader.py +85 -0
  24. alita_sdk/cli/tools/__init__.py +43 -0
  25. alita_sdk/cli/tools/approval.py +224 -0
  26. alita_sdk/cli/tools/filesystem.py +1751 -0
  27. alita_sdk/cli/tools/planning.py +389 -0
  28. alita_sdk/cli/tools/terminal.py +414 -0
  29. alita_sdk/community/__init__.py +72 -12
  30. alita_sdk/community/inventory/__init__.py +236 -0
  31. alita_sdk/community/inventory/config.py +257 -0
  32. alita_sdk/community/inventory/enrichment.py +2137 -0
  33. alita_sdk/community/inventory/extractors.py +1469 -0
  34. alita_sdk/community/inventory/ingestion.py +3172 -0
  35. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  36. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  37. alita_sdk/community/inventory/parsers/base.py +295 -0
  38. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  39. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  40. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  41. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  42. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  43. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  44. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  45. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  46. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  47. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  48. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  49. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  50. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  51. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  52. alita_sdk/community/inventory/patterns/loader.py +348 -0
  53. alita_sdk/community/inventory/patterns/registry.py +198 -0
  54. alita_sdk/community/inventory/presets.py +535 -0
  55. alita_sdk/community/inventory/retrieval.py +1403 -0
  56. alita_sdk/community/inventory/toolkit.py +173 -0
  57. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  58. alita_sdk/community/inventory/visualize.py +1370 -0
  59. alita_sdk/configurations/__init__.py +11 -0
  60. alita_sdk/configurations/ado.py +148 -2
  61. alita_sdk/configurations/azure_search.py +1 -1
  62. alita_sdk/configurations/bigquery.py +1 -1
  63. alita_sdk/configurations/bitbucket.py +94 -2
  64. alita_sdk/configurations/browser.py +18 -0
  65. alita_sdk/configurations/carrier.py +19 -0
  66. alita_sdk/configurations/confluence.py +130 -1
  67. alita_sdk/configurations/delta_lake.py +1 -1
  68. alita_sdk/configurations/figma.py +76 -5
  69. alita_sdk/configurations/github.py +65 -1
  70. alita_sdk/configurations/gitlab.py +81 -0
  71. alita_sdk/configurations/google_places.py +17 -0
  72. alita_sdk/configurations/jira.py +103 -0
  73. alita_sdk/configurations/openapi.py +111 -0
  74. alita_sdk/configurations/postman.py +1 -1
  75. alita_sdk/configurations/qtest.py +72 -3
  76. alita_sdk/configurations/report_portal.py +115 -0
  77. alita_sdk/configurations/salesforce.py +19 -0
  78. alita_sdk/configurations/service_now.py +1 -12
  79. alita_sdk/configurations/sharepoint.py +167 -0
  80. alita_sdk/configurations/sonar.py +18 -0
  81. alita_sdk/configurations/sql.py +20 -0
  82. alita_sdk/configurations/testio.py +101 -0
  83. alita_sdk/configurations/testrail.py +88 -0
  84. alita_sdk/configurations/xray.py +94 -1
  85. alita_sdk/configurations/zephyr_enterprise.py +94 -1
  86. alita_sdk/configurations/zephyr_essential.py +95 -0
  87. alita_sdk/runtime/clients/artifact.py +21 -4
  88. alita_sdk/runtime/clients/client.py +458 -67
  89. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  90. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  91. alita_sdk/runtime/clients/sandbox_client.py +352 -0
  92. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  93. alita_sdk/runtime/langchain/assistant.py +183 -43
  94. alita_sdk/runtime/langchain/constants.py +647 -1
  95. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  96. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
  97. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
  98. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  99. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
  100. alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
  101. alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
  102. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
  103. alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
  104. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
  105. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
  106. alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
  107. alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
  108. alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
  109. alita_sdk/runtime/langchain/langraph_agent.py +407 -92
  110. alita_sdk/runtime/langchain/utils.py +102 -8
  111. alita_sdk/runtime/llms/preloaded.py +2 -6
  112. alita_sdk/runtime/models/mcp_models.py +61 -0
  113. alita_sdk/runtime/skills/__init__.py +91 -0
  114. alita_sdk/runtime/skills/callbacks.py +498 -0
  115. alita_sdk/runtime/skills/discovery.py +540 -0
  116. alita_sdk/runtime/skills/executor.py +610 -0
  117. alita_sdk/runtime/skills/input_builder.py +371 -0
  118. alita_sdk/runtime/skills/models.py +330 -0
  119. alita_sdk/runtime/skills/registry.py +355 -0
  120. alita_sdk/runtime/skills/skill_runner.py +330 -0
  121. alita_sdk/runtime/toolkits/__init__.py +28 -0
  122. alita_sdk/runtime/toolkits/application.py +14 -4
  123. alita_sdk/runtime/toolkits/artifact.py +24 -9
  124. alita_sdk/runtime/toolkits/datasource.py +13 -6
  125. alita_sdk/runtime/toolkits/mcp.py +780 -0
  126. alita_sdk/runtime/toolkits/planning.py +178 -0
  127. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  128. alita_sdk/runtime/toolkits/subgraph.py +11 -6
  129. alita_sdk/runtime/toolkits/tools.py +314 -70
  130. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  131. alita_sdk/runtime/tools/__init__.py +24 -0
  132. alita_sdk/runtime/tools/application.py +16 -4
  133. alita_sdk/runtime/tools/artifact.py +367 -33
  134. alita_sdk/runtime/tools/data_analysis.py +183 -0
  135. alita_sdk/runtime/tools/function.py +100 -4
  136. alita_sdk/runtime/tools/graph.py +81 -0
  137. alita_sdk/runtime/tools/image_generation.py +218 -0
  138. alita_sdk/runtime/tools/llm.py +1013 -177
  139. alita_sdk/runtime/tools/loop.py +3 -1
  140. alita_sdk/runtime/tools/loop_output.py +3 -1
  141. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  142. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  143. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  144. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  145. alita_sdk/runtime/tools/planning/models.py +246 -0
  146. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  147. alita_sdk/runtime/tools/router.py +2 -1
  148. alita_sdk/runtime/tools/sandbox.py +375 -0
  149. alita_sdk/runtime/tools/skill_router.py +776 -0
  150. alita_sdk/runtime/tools/tool.py +3 -1
  151. alita_sdk/runtime/tools/vectorstore.py +69 -65
  152. alita_sdk/runtime/tools/vectorstore_base.py +163 -90
  153. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  154. alita_sdk/runtime/utils/mcp_client.py +492 -0
  155. alita_sdk/runtime/utils/mcp_oauth.py +361 -0
  156. alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
  157. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  158. alita_sdk/runtime/utils/streamlit.py +41 -14
  159. alita_sdk/runtime/utils/toolkit_utils.py +28 -9
  160. alita_sdk/runtime/utils/utils.py +48 -0
  161. alita_sdk/tools/__init__.py +135 -37
  162. alita_sdk/tools/ado/__init__.py +2 -2
  163. alita_sdk/tools/ado/repos/__init__.py +15 -19
  164. alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
  165. alita_sdk/tools/ado/test_plan/__init__.py +26 -8
  166. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
  167. alita_sdk/tools/ado/wiki/__init__.py +27 -12
  168. alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
  169. alita_sdk/tools/ado/work_item/__init__.py +27 -12
  170. alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
  171. alita_sdk/tools/advanced_jira_mining/__init__.py +12 -8
  172. alita_sdk/tools/aws/delta_lake/__init__.py +14 -11
  173. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  174. alita_sdk/tools/azure_ai/search/__init__.py +13 -8
  175. alita_sdk/tools/base/tool.py +5 -1
  176. alita_sdk/tools/base_indexer_toolkit.py +454 -110
  177. alita_sdk/tools/bitbucket/__init__.py +27 -19
  178. alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
  179. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
  180. alita_sdk/tools/browser/__init__.py +41 -16
  181. alita_sdk/tools/browser/crawler.py +3 -1
  182. alita_sdk/tools/browser/utils.py +15 -6
  183. alita_sdk/tools/carrier/__init__.py +18 -17
  184. alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
  185. alita_sdk/tools/carrier/excel_reporter.py +8 -4
  186. alita_sdk/tools/chunkers/__init__.py +3 -1
  187. alita_sdk/tools/chunkers/code/codeparser.py +1 -1
  188. alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
  189. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  190. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  191. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  192. alita_sdk/tools/cloud/aws/__init__.py +11 -7
  193. alita_sdk/tools/cloud/azure/__init__.py +11 -7
  194. alita_sdk/tools/cloud/gcp/__init__.py +11 -7
  195. alita_sdk/tools/cloud/k8s/__init__.py +11 -7
  196. alita_sdk/tools/code/linter/__init__.py +9 -8
  197. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  198. alita_sdk/tools/code/sonar/__init__.py +20 -13
  199. alita_sdk/tools/code_indexer_toolkit.py +199 -0
  200. alita_sdk/tools/confluence/__init__.py +21 -14
  201. alita_sdk/tools/confluence/api_wrapper.py +197 -58
  202. alita_sdk/tools/confluence/loader.py +14 -2
  203. alita_sdk/tools/custom_open_api/__init__.py +11 -5
  204. alita_sdk/tools/elastic/__init__.py +10 -8
  205. alita_sdk/tools/elitea_base.py +546 -64
  206. alita_sdk/tools/figma/__init__.py +11 -8
  207. alita_sdk/tools/figma/api_wrapper.py +352 -153
  208. alita_sdk/tools/github/__init__.py +17 -17
  209. alita_sdk/tools/github/api_wrapper.py +9 -26
  210. alita_sdk/tools/github/github_client.py +81 -12
  211. alita_sdk/tools/github/schemas.py +2 -1
  212. alita_sdk/tools/github/tool.py +5 -1
  213. alita_sdk/tools/gitlab/__init__.py +18 -13
  214. alita_sdk/tools/gitlab/api_wrapper.py +224 -80
  215. alita_sdk/tools/gitlab_org/__init__.py +13 -10
  216. alita_sdk/tools/google/bigquery/__init__.py +13 -13
  217. alita_sdk/tools/google/bigquery/tool.py +5 -1
  218. alita_sdk/tools/google_places/__init__.py +20 -11
  219. alita_sdk/tools/jira/__init__.py +21 -11
  220. alita_sdk/tools/jira/api_wrapper.py +315 -168
  221. alita_sdk/tools/keycloak/__init__.py +10 -8
  222. alita_sdk/tools/localgit/__init__.py +8 -3
  223. alita_sdk/tools/localgit/local_git.py +62 -54
  224. alita_sdk/tools/localgit/tool.py +5 -1
  225. alita_sdk/tools/memory/__init__.py +38 -14
  226. alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
  227. alita_sdk/tools/ocr/__init__.py +10 -8
  228. alita_sdk/tools/openapi/__init__.py +281 -108
  229. alita_sdk/tools/openapi/api_wrapper.py +883 -0
  230. alita_sdk/tools/openapi/tool.py +20 -0
  231. alita_sdk/tools/pandas/__init__.py +18 -11
  232. alita_sdk/tools/pandas/api_wrapper.py +40 -45
  233. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  234. alita_sdk/tools/postman/__init__.py +10 -11
  235. alita_sdk/tools/postman/api_wrapper.py +19 -8
  236. alita_sdk/tools/postman/postman_analysis.py +8 -1
  237. alita_sdk/tools/pptx/__init__.py +10 -10
  238. alita_sdk/tools/qtest/__init__.py +21 -14
  239. alita_sdk/tools/qtest/api_wrapper.py +1784 -88
  240. alita_sdk/tools/rally/__init__.py +12 -10
  241. alita_sdk/tools/report_portal/__init__.py +22 -16
  242. alita_sdk/tools/salesforce/__init__.py +21 -16
  243. alita_sdk/tools/servicenow/__init__.py +20 -16
  244. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  245. alita_sdk/tools/sharepoint/__init__.py +16 -14
  246. alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
  247. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  248. alita_sdk/tools/sharepoint/utils.py +8 -2
  249. alita_sdk/tools/slack/__init__.py +11 -7
  250. alita_sdk/tools/sql/__init__.py +21 -19
  251. alita_sdk/tools/sql/api_wrapper.py +71 -23
  252. alita_sdk/tools/testio/__init__.py +20 -13
  253. alita_sdk/tools/testrail/__init__.py +12 -11
  254. alita_sdk/tools/testrail/api_wrapper.py +214 -46
  255. alita_sdk/tools/utils/__init__.py +28 -4
  256. alita_sdk/tools/utils/content_parser.py +182 -62
  257. alita_sdk/tools/utils/text_operations.py +254 -0
  258. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
  259. alita_sdk/tools/xray/__init__.py +17 -14
  260. alita_sdk/tools/xray/api_wrapper.py +58 -113
  261. alita_sdk/tools/yagmail/__init__.py +8 -3
  262. alita_sdk/tools/zephyr/__init__.py +11 -7
  263. alita_sdk/tools/zephyr_enterprise/__init__.py +15 -9
  264. alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
  265. alita_sdk/tools/zephyr_essential/__init__.py +15 -10
  266. alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
  267. alita_sdk/tools/zephyr_essential/client.py +6 -4
  268. alita_sdk/tools/zephyr_scale/__init__.py +12 -8
  269. alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
  270. alita_sdk/tools/zephyr_squad/__init__.py +11 -7
  271. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/METADATA +184 -37
  272. alita_sdk-0.3.562.dist-info/RECORD +450 -0
  273. alita_sdk-0.3.562.dist-info/entry_points.txt +2 -0
  274. alita_sdk/tools/bitbucket/tools.py +0 -304
  275. alita_sdk-0.3.257.dist-info/RECORD +0 -343
  276. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/WHEEL +0 -0
  277. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/licenses/LICENSE +0 -0
  278. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/top_level.txt +0 -0
@@ -13,162 +13,310 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from langchain_community.document_loaders import (
16
- UnstructuredMarkdownLoader,
17
16
  AirbyteJSONLoader, UnstructuredHTMLLoader,
18
- PythonLoader)
17
+ UnstructuredXMLLoader)
19
18
 
20
19
  from .AlitaCSVLoader import AlitaCSVLoader
21
20
  from .AlitaDocxMammothLoader import AlitaDocxMammothLoader
22
21
  from .AlitaExcelLoader import AlitaExcelLoader
23
22
  from .AlitaImageLoader import AlitaImageLoader
24
23
  from .AlitaJSONLoader import AlitaJSONLoader
24
+ from .AlitaJSONLinesLoader import AlitaJSONLinesLoader
25
25
  from .AlitaPDFLoader import AlitaPDFLoader
26
26
  from .AlitaPowerPointLoader import AlitaPowerPointLoader
27
27
  from .AlitaTextLoader import AlitaTextLoader
28
+ from .AlitaMarkdownLoader import AlitaMarkdownLoader
29
+ from .AlitaPythonLoader import AlitaPythonLoader
30
+ from enum import Enum
31
+ from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
28
32
 
29
- loaders_map = {
33
+
34
+ class LoaderProperties(Enum):
35
+ LLM = 'use_llm'
36
+ PROMPT_DEFAULT = 'use_default_prompt'
37
+ PROMPT = 'prompt'
38
+
39
+ DEFAULT_ALLOWED_BASE = {'max_tokens': LOADER_MAX_TOKENS_DEFAULT}
40
+
41
+ DEFAULT_ALLOWED_WITH_LLM = {
42
+ **DEFAULT_ALLOWED_BASE,
43
+ LoaderProperties.LLM.value: False,
44
+ LoaderProperties.PROMPT_DEFAULT.value: False,
45
+ LoaderProperties.PROMPT.value: "",
46
+ }
47
+
48
+ DEFAULT_ALLOWED_EXCEL = {**DEFAULT_ALLOWED_WITH_LLM, 'add_header_to_chunks': False, 'header_row_number': 1, 'max_tokens': -1, 'sheet_name': ''}
49
+
50
+ # Image file loaders mapping - directly supported by LLM with image_url
51
+ image_loaders_map = {
30
52
  '.png': {
31
53
  'class': AlitaImageLoader,
54
+ 'mime_type': 'image/png',
32
55
  'is_multimodal_processing': True,
33
- 'kwargs': {}
56
+ 'kwargs': {},
57
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM,
34
58
  },
35
59
  '.jpg': {
36
60
  'class': AlitaImageLoader,
61
+ 'mime_type': 'image/jpeg',
37
62
  'is_multimodal_processing': True,
38
- 'kwargs': {}
63
+ 'kwargs': {},
64
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
39
65
  },
40
66
  '.jpeg': {
41
67
  'class': AlitaImageLoader,
68
+ 'mime_type': 'image/jpeg',
42
69
  'is_multimodal_processing': True,
43
- 'kwargs': {}
70
+ 'kwargs': {},
71
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
44
72
  },
45
73
  '.gif': {
46
74
  'class': AlitaImageLoader,
75
+ 'mime_type': 'image/gif',
47
76
  'is_multimodal_processing': True,
48
- 'kwargs': {}
77
+ 'kwargs': {},
78
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
49
79
  },
80
+ '.webp': {
81
+ 'class': AlitaImageLoader,
82
+ 'mime_type': 'image/webp',
83
+ 'is_multimodal_processing': True,
84
+ 'kwargs': {},
85
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
86
+ }
87
+ }
88
+
89
+ # Image file loaders mapping - require conversion before sending to LLM
90
+ image_loaders_map_converted = {
50
91
  '.bmp': {
51
92
  'class': AlitaImageLoader,
93
+ 'mime_type': 'image/bmp',
52
94
  'is_multimodal_processing': True,
53
- 'kwargs': {}
95
+ 'kwargs': {},
96
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
54
97
  },
55
98
  '.svg': {
56
99
  'class': AlitaImageLoader,
100
+ 'mime_type': 'image/svg+xml',
57
101
  'is_multimodal_processing': True,
58
- 'kwargs': {}
59
- },
102
+ 'kwargs': {},
103
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
104
+ }
105
+ }
106
+
107
+ # Document file loaders mapping
108
+ document_loaders_map = {
60
109
  '.txt': {
61
110
  'class': AlitaTextLoader,
111
+ 'mime_type': 'text/plain',
62
112
  'is_multimodal_processing': False,
63
113
  'kwargs': {
64
114
  'autodetect_encoding': True
65
- }
115
+ },
116
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
66
117
  },
67
118
  '.yml': {
68
119
  'class': AlitaTextLoader,
120
+ 'mime_type': 'application/yaml',
69
121
  'is_multimodal_processing': False,
70
122
  'kwargs': {
71
123
  'autodetect_encoding': True
72
- }
124
+ },
125
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
73
126
  },
74
127
  '.yaml': {
75
128
  'class': AlitaTextLoader,
129
+ 'mime_type': 'application/yaml',
76
130
  'is_multimodal_processing': False,
77
131
  'kwargs': {
78
132
  'autodetect_encoding': True
79
- }
133
+ },
134
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
80
135
  },
81
136
  '.groovy': {
82
137
  'class': AlitaTextLoader,
138
+ 'mime_type': 'text/x-groovy',
83
139
  'is_multimodal_processing': False,
84
140
  'kwargs': {
85
141
  'autodetect_encoding': True
86
- }
142
+ },
143
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
87
144
  },
88
145
  '.md': {
89
- 'class': UnstructuredMarkdownLoader,
146
+ 'class': AlitaMarkdownLoader,
147
+ 'mime_type': 'text/markdown',
90
148
  'is_multimodal_processing': False,
91
- 'kwargs': {}
149
+ 'kwargs': {},
150
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
92
151
  },
93
152
  '.csv': {
94
153
  'class': AlitaCSVLoader,
154
+ 'mime_type': 'text/csv',
95
155
  'is_multimodal_processing': False,
96
156
  'kwargs': {
97
157
  'encoding': 'utf-8',
98
- 'raw_content': False,
158
+ 'raw_content': True,
99
159
  'cleanse': False
100
- }
160
+ },
161
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
101
162
  },
102
163
  '.xlsx': {
103
164
  'class': AlitaExcelLoader,
165
+ 'mime_type': ('application/vnd.openxmlformats-officedocument.'
166
+ 'spreadsheetml.sheet'),
104
167
  'is_multimodal_processing': False,
105
168
  'kwargs': {
106
- 'raw_content': False,
107
- 'cleanse': False
108
- }
169
+ 'add_header_to_chunks': False,
170
+ 'header_row_number': 1,
171
+ 'max_tokens': -1,
172
+ 'sheet_name': ''
173
+ },
174
+ 'allowed_to_override': DEFAULT_ALLOWED_EXCEL
109
175
  },
110
176
  '.xls': {
111
177
  'class': AlitaExcelLoader,
178
+ 'mime_type': 'application/vnd.ms-excel',
112
179
  'is_multimodal_processing': False,
113
180
  'kwargs': {
114
- 'raw_content': False,
181
+ 'excel_by_sheets': True,
182
+ 'raw_content': True,
115
183
  'cleanse': False
116
- }
184
+ },
185
+ 'allowed_to_override': DEFAULT_ALLOWED_EXCEL
117
186
  },
118
187
  '.pdf': {
119
188
  'class': AlitaPDFLoader,
189
+ 'mime_type': 'application/pdf',
120
190
  'is_multimodal_processing': False,
121
- 'kwargs': {}
191
+ 'kwargs': {},
192
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
122
193
  },
123
194
  '.docx': {
124
195
  'class': AlitaDocxMammothLoader,
196
+ 'mime_type': ('application/vnd.openxmlformats-officedocument.'
197
+ 'wordprocessingml.document'),
125
198
  'is_multimodal_processing': True,
126
199
  'kwargs': {
127
200
  'extract_images': True
128
- }
129
- },
130
- '.doc': {
131
- 'class': AlitaTextLoader,
132
- 'is_multimodal_processing': True,
133
- 'kwargs': {}
201
+ },
202
+ 'allowed_to_override': {**DEFAULT_ALLOWED_WITH_LLM, 'mode': 'paged'}
134
203
  },
135
204
  '.json': {
136
205
  'class': AlitaJSONLoader,
206
+ 'mime_type': 'application/json',
137
207
  'is_multimodal_processing': False,
138
- 'kwargs': {}
208
+ 'kwargs': {},
209
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
139
210
  },
140
211
  '.jsonl': {
141
- 'class': AirbyteJSONLoader,
212
+ 'class': AlitaJSONLinesLoader,
213
+ 'mime_type': 'application/jsonl',
142
214
  'is_multimodal_processing': False,
143
- 'kwargs': {}
215
+ 'kwargs': {},
216
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
144
217
  },
145
218
  '.htm': {
146
219
  'class': UnstructuredHTMLLoader,
220
+ 'mime_type': 'text/html',
147
221
  'is_multimodal_processing': False,
148
- 'kwargs': {}
222
+ 'kwargs': {},
223
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
149
224
  },
150
225
  '.html': {
151
226
  'class': UnstructuredHTMLLoader,
227
+ 'mime_type': 'text/html',
228
+ 'is_multimodal_processing': False,
229
+ 'kwargs': {},
230
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
231
+ },
232
+ '.xml': {
233
+ 'class': UnstructuredXMLLoader,
234
+ 'mime_type': 'text/xml',
152
235
  'is_multimodal_processing': False,
153
- 'kwargs': {}
236
+ 'kwargs': {},
237
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
154
238
  },
155
239
  '.ppt': {
156
240
  'class': AlitaPowerPointLoader,
241
+ 'mime_type': 'application/vnd.ms-powerpoint',
157
242
  'is_multimodal_processing': False,
158
243
  'kwargs': {
159
244
  'mode': 'paged'
160
- }
245
+ },
246
+ 'allowed_to_override': {**DEFAULT_ALLOWED_WITH_LLM, 'mode': 'paged'}
161
247
  },
162
248
  '.pptx': {
163
249
  'class': AlitaPowerPointLoader,
250
+ 'mime_type': ('application/vnd.openxmlformats-officedocument.'
251
+ 'presentationml.presentation'),
164
252
  'is_multimodal_processing': False,
165
253
  'kwargs': {
166
254
  'mode': 'paged'
255
+ },
256
+ 'allowed_to_override': {
257
+ **DEFAULT_ALLOWED_WITH_LLM,
258
+ 'mode': 'paged',
259
+ 'pages_per_chunk': 5,
260
+ 'extract_images': False,
167
261
  }
168
262
  },
169
- '.py': {
170
- 'class': PythonLoader,
171
- 'is_multimodal_processing': False,
172
- 'kwargs': {}
173
- }
263
+ # '.py': {
264
+ # 'class': AlitaPythonLoader,
265
+ # 'mime_type': 'text/x-python',
266
+ # 'is_multimodal_processing': False,
267
+ # 'kwargs': {},
268
+ # 'allowed_to_override': DEFAULT_ALLOWED_BASE
269
+ # }
270
+ }
271
+
272
+ code_extensions = [
273
+ '.py', # Python
274
+ '.js', # JavaScript
275
+ '.ts', # TypeScript
276
+ '.java', # Java
277
+ '.cpp', # C++
278
+ '.c', # C
279
+ '.cs', # C#
280
+ '.rb', # Ruby
281
+ '.go', # Go
282
+ '.php', # PHP
283
+ '.swift', # Swift
284
+ '.kt', # Kotlin
285
+ '.rs', # Rust
286
+ '.m', # Objective-C
287
+ '.scala', # Scala
288
+ '.pl', # Perl
289
+ '.sh', # Shell
290
+ '.bat', # Batch
291
+ '.lua', # Lua
292
+ '.r', # R
293
+ '.pas', # Pascal
294
+ '.asm', # Assembly
295
+ '.dart', # Dart
296
+ '.groovy', # Groovy
297
+ '.sql', # SQL
298
+ ]
299
+
300
+ default_loader_config = {
301
+ 'class': AlitaTextLoader,
302
+ 'mime_type': 'text/plain',
303
+ 'is_multimodal_processing': False,
304
+ 'kwargs': {},
305
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
306
+ }
307
+
308
+ code_loaders_map = {ext: default_loader_config for ext in code_extensions}
309
+
310
+ # Combined mapping for backward compatibility
311
+ loaders_map = {
312
+ **image_loaders_map,
313
+ **image_loaders_map_converted,
314
+ **document_loaders_map,
315
+ **code_loaders_map
316
+ }
317
+
318
+ loaders_allowed_to_override = {
319
+ extension: config.get('allowed_to_override')
320
+ for extension, config in loaders_map.items()
321
+ if 'allowed_to_override' in config
174
322
  }
@@ -173,13 +173,15 @@ def get_vectorstore(vectorstore_type, vectorstore_params, embedding_func=None):
173
173
  #
174
174
  raise RuntimeError(f"Unknown VectorStore type: {vectorstore_type}")
175
175
 
176
- def add_documents(vectorstore, documents):
176
+ def add_documents(vectorstore, documents, ids = None) -> list[str]:
177
177
  """ Add documents to vectorstore """
178
178
  if vectorstore is None:
179
179
  return None
180
180
  texts = []
181
181
  metadata = []
182
182
  for document in documents:
183
+ if not document.page_content:
184
+ continue
183
185
  texts.append(document.page_content)
184
186
  for key in document.metadata:
185
187
  if isinstance(document.metadata[key], list):
@@ -187,7 +189,7 @@ def add_documents(vectorstore, documents):
187
189
  if isinstance(document.metadata[key], dict):
188
190
  document.metadata[key] = dumps(document.metadata[key])
189
191
  metadata.append(document.metadata)
190
- vectorstore.add_texts(texts, metadatas=metadata)
192
+ return vectorstore.add_texts(texts, metadatas=metadata, ids=ids)
191
193
 
192
194
 
193
195
  def generateResponse(