alita-sdk 0.3.263__py3-none-any.whl → 0.3.499__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +155 -0
  6. alita_sdk/cli/agent_loader.py +215 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3601 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1256 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/toolkit.py +327 -0
  23. alita_sdk/cli/toolkit_loader.py +85 -0
  24. alita_sdk/cli/tools/__init__.py +43 -0
  25. alita_sdk/cli/tools/approval.py +224 -0
  26. alita_sdk/cli/tools/filesystem.py +1751 -0
  27. alita_sdk/cli/tools/planning.py +389 -0
  28. alita_sdk/cli/tools/terminal.py +414 -0
  29. alita_sdk/community/__init__.py +64 -8
  30. alita_sdk/community/inventory/__init__.py +224 -0
  31. alita_sdk/community/inventory/config.py +257 -0
  32. alita_sdk/community/inventory/enrichment.py +2137 -0
  33. alita_sdk/community/inventory/extractors.py +1469 -0
  34. alita_sdk/community/inventory/ingestion.py +3172 -0
  35. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  36. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  37. alita_sdk/community/inventory/parsers/base.py +295 -0
  38. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  39. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  40. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  41. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  42. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  43. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  44. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  45. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  46. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  47. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  48. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  49. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  50. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  51. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  52. alita_sdk/community/inventory/patterns/loader.py +348 -0
  53. alita_sdk/community/inventory/patterns/registry.py +198 -0
  54. alita_sdk/community/inventory/presets.py +535 -0
  55. alita_sdk/community/inventory/retrieval.py +1403 -0
  56. alita_sdk/community/inventory/toolkit.py +173 -0
  57. alita_sdk/community/inventory/visualize.py +1370 -0
  58. alita_sdk/configurations/__init__.py +10 -0
  59. alita_sdk/configurations/ado.py +4 -2
  60. alita_sdk/configurations/azure_search.py +1 -1
  61. alita_sdk/configurations/bigquery.py +1 -1
  62. alita_sdk/configurations/bitbucket.py +94 -2
  63. alita_sdk/configurations/browser.py +18 -0
  64. alita_sdk/configurations/carrier.py +19 -0
  65. alita_sdk/configurations/confluence.py +96 -1
  66. alita_sdk/configurations/delta_lake.py +1 -1
  67. alita_sdk/configurations/figma.py +0 -5
  68. alita_sdk/configurations/github.py +65 -1
  69. alita_sdk/configurations/gitlab.py +79 -0
  70. alita_sdk/configurations/google_places.py +17 -0
  71. alita_sdk/configurations/jira.py +103 -0
  72. alita_sdk/configurations/postman.py +1 -1
  73. alita_sdk/configurations/qtest.py +1 -3
  74. alita_sdk/configurations/report_portal.py +19 -0
  75. alita_sdk/configurations/salesforce.py +19 -0
  76. alita_sdk/configurations/service_now.py +1 -12
  77. alita_sdk/configurations/sharepoint.py +19 -0
  78. alita_sdk/configurations/sonar.py +18 -0
  79. alita_sdk/configurations/sql.py +20 -0
  80. alita_sdk/configurations/testio.py +18 -0
  81. alita_sdk/configurations/testrail.py +88 -0
  82. alita_sdk/configurations/xray.py +94 -1
  83. alita_sdk/configurations/zephyr_enterprise.py +94 -1
  84. alita_sdk/configurations/zephyr_essential.py +95 -0
  85. alita_sdk/runtime/clients/artifact.py +12 -2
  86. alita_sdk/runtime/clients/client.py +235 -66
  87. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  88. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  89. alita_sdk/runtime/clients/sandbox_client.py +373 -0
  90. alita_sdk/runtime/langchain/assistant.py +123 -17
  91. alita_sdk/runtime/langchain/constants.py +8 -1
  92. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  93. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
  94. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
  95. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +8 -2
  96. alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
  97. alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
  98. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
  99. alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
  100. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
  101. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
  102. alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
  103. alita_sdk/runtime/langchain/document_loaders/constants.py +187 -40
  104. alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
  105. alita_sdk/runtime/langchain/langraph_agent.py +406 -91
  106. alita_sdk/runtime/langchain/utils.py +51 -8
  107. alita_sdk/runtime/llms/preloaded.py +2 -6
  108. alita_sdk/runtime/models/mcp_models.py +61 -0
  109. alita_sdk/runtime/toolkits/__init__.py +26 -0
  110. alita_sdk/runtime/toolkits/application.py +9 -2
  111. alita_sdk/runtime/toolkits/artifact.py +19 -7
  112. alita_sdk/runtime/toolkits/datasource.py +13 -6
  113. alita_sdk/runtime/toolkits/mcp.py +780 -0
  114. alita_sdk/runtime/toolkits/planning.py +178 -0
  115. alita_sdk/runtime/toolkits/subgraph.py +11 -6
  116. alita_sdk/runtime/toolkits/tools.py +214 -60
  117. alita_sdk/runtime/toolkits/vectorstore.py +9 -4
  118. alita_sdk/runtime/tools/__init__.py +22 -0
  119. alita_sdk/runtime/tools/application.py +16 -4
  120. alita_sdk/runtime/tools/artifact.py +312 -19
  121. alita_sdk/runtime/tools/function.py +100 -4
  122. alita_sdk/runtime/tools/graph.py +81 -0
  123. alita_sdk/runtime/tools/image_generation.py +212 -0
  124. alita_sdk/runtime/tools/llm.py +539 -180
  125. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  126. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  127. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  128. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  129. alita_sdk/runtime/tools/planning/models.py +246 -0
  130. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  131. alita_sdk/runtime/tools/router.py +2 -1
  132. alita_sdk/runtime/tools/sandbox.py +375 -0
  133. alita_sdk/runtime/tools/vectorstore.py +62 -63
  134. alita_sdk/runtime/tools/vectorstore_base.py +156 -85
  135. alita_sdk/runtime/utils/AlitaCallback.py +106 -20
  136. alita_sdk/runtime/utils/mcp_client.py +465 -0
  137. alita_sdk/runtime/utils/mcp_oauth.py +244 -0
  138. alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
  139. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  140. alita_sdk/runtime/utils/streamlit.py +41 -14
  141. alita_sdk/runtime/utils/toolkit_utils.py +28 -9
  142. alita_sdk/runtime/utils/utils.py +14 -0
  143. alita_sdk/tools/__init__.py +78 -35
  144. alita_sdk/tools/ado/__init__.py +0 -1
  145. alita_sdk/tools/ado/repos/__init__.py +10 -6
  146. alita_sdk/tools/ado/repos/repos_wrapper.py +12 -11
  147. alita_sdk/tools/ado/test_plan/__init__.py +10 -7
  148. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -23
  149. alita_sdk/tools/ado/wiki/__init__.py +10 -11
  150. alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -28
  151. alita_sdk/tools/ado/work_item/__init__.py +10 -11
  152. alita_sdk/tools/ado/work_item/ado_wrapper.py +63 -10
  153. alita_sdk/tools/advanced_jira_mining/__init__.py +10 -7
  154. alita_sdk/tools/aws/delta_lake/__init__.py +13 -11
  155. alita_sdk/tools/azure_ai/search/__init__.py +11 -7
  156. alita_sdk/tools/base_indexer_toolkit.py +392 -86
  157. alita_sdk/tools/bitbucket/__init__.py +18 -11
  158. alita_sdk/tools/bitbucket/api_wrapper.py +52 -9
  159. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
  160. alita_sdk/tools/browser/__init__.py +40 -16
  161. alita_sdk/tools/browser/crawler.py +3 -1
  162. alita_sdk/tools/browser/utils.py +15 -6
  163. alita_sdk/tools/carrier/__init__.py +17 -17
  164. alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
  165. alita_sdk/tools/carrier/excel_reporter.py +8 -4
  166. alita_sdk/tools/chunkers/__init__.py +3 -1
  167. alita_sdk/tools/chunkers/code/codeparser.py +1 -1
  168. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  169. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  170. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  171. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  172. alita_sdk/tools/cloud/aws/__init__.py +9 -6
  173. alita_sdk/tools/cloud/azure/__init__.py +9 -6
  174. alita_sdk/tools/cloud/gcp/__init__.py +9 -6
  175. alita_sdk/tools/cloud/k8s/__init__.py +9 -6
  176. alita_sdk/tools/code/linter/__init__.py +7 -7
  177. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  178. alita_sdk/tools/code/sonar/__init__.py +18 -12
  179. alita_sdk/tools/code_indexer_toolkit.py +199 -0
  180. alita_sdk/tools/confluence/__init__.py +14 -11
  181. alita_sdk/tools/confluence/api_wrapper.py +198 -58
  182. alita_sdk/tools/confluence/loader.py +10 -0
  183. alita_sdk/tools/custom_open_api/__init__.py +9 -4
  184. alita_sdk/tools/elastic/__init__.py +8 -7
  185. alita_sdk/tools/elitea_base.py +543 -64
  186. alita_sdk/tools/figma/__init__.py +10 -8
  187. alita_sdk/tools/figma/api_wrapper.py +352 -153
  188. alita_sdk/tools/github/__init__.py +13 -11
  189. alita_sdk/tools/github/api_wrapper.py +9 -26
  190. alita_sdk/tools/github/github_client.py +75 -12
  191. alita_sdk/tools/github/schemas.py +2 -1
  192. alita_sdk/tools/gitlab/__init__.py +11 -10
  193. alita_sdk/tools/gitlab/api_wrapper.py +135 -45
  194. alita_sdk/tools/gitlab_org/__init__.py +11 -9
  195. alita_sdk/tools/google/bigquery/__init__.py +12 -13
  196. alita_sdk/tools/google_places/__init__.py +18 -10
  197. alita_sdk/tools/jira/__init__.py +14 -8
  198. alita_sdk/tools/jira/api_wrapper.py +315 -168
  199. alita_sdk/tools/keycloak/__init__.py +8 -7
  200. alita_sdk/tools/localgit/local_git.py +56 -54
  201. alita_sdk/tools/memory/__init__.py +27 -11
  202. alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
  203. alita_sdk/tools/ocr/__init__.py +8 -7
  204. alita_sdk/tools/openapi/__init__.py +10 -1
  205. alita_sdk/tools/pandas/__init__.py +8 -7
  206. alita_sdk/tools/pandas/api_wrapper.py +7 -25
  207. alita_sdk/tools/postman/__init__.py +8 -10
  208. alita_sdk/tools/postman/api_wrapper.py +19 -8
  209. alita_sdk/tools/postman/postman_analysis.py +8 -1
  210. alita_sdk/tools/pptx/__init__.py +8 -9
  211. alita_sdk/tools/qtest/__init__.py +19 -13
  212. alita_sdk/tools/qtest/api_wrapper.py +1784 -88
  213. alita_sdk/tools/rally/__init__.py +10 -9
  214. alita_sdk/tools/report_portal/__init__.py +20 -15
  215. alita_sdk/tools/salesforce/__init__.py +19 -15
  216. alita_sdk/tools/servicenow/__init__.py +14 -11
  217. alita_sdk/tools/sharepoint/__init__.py +14 -13
  218. alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
  219. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  220. alita_sdk/tools/sharepoint/utils.py +8 -2
  221. alita_sdk/tools/slack/__init__.py +10 -7
  222. alita_sdk/tools/sql/__init__.py +19 -18
  223. alita_sdk/tools/sql/api_wrapper.py +71 -23
  224. alita_sdk/tools/testio/__init__.py +18 -12
  225. alita_sdk/tools/testrail/__init__.py +10 -10
  226. alita_sdk/tools/testrail/api_wrapper.py +213 -45
  227. alita_sdk/tools/utils/__init__.py +28 -4
  228. alita_sdk/tools/utils/content_parser.py +181 -61
  229. alita_sdk/tools/utils/text_operations.py +254 -0
  230. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
  231. alita_sdk/tools/xray/__init__.py +12 -7
  232. alita_sdk/tools/xray/api_wrapper.py +58 -113
  233. alita_sdk/tools/zephyr/__init__.py +9 -6
  234. alita_sdk/tools/zephyr_enterprise/__init__.py +13 -8
  235. alita_sdk/tools/zephyr_enterprise/api_wrapper.py +17 -7
  236. alita_sdk/tools/zephyr_essential/__init__.py +13 -9
  237. alita_sdk/tools/zephyr_essential/api_wrapper.py +289 -47
  238. alita_sdk/tools/zephyr_essential/client.py +6 -4
  239. alita_sdk/tools/zephyr_scale/__init__.py +10 -7
  240. alita_sdk/tools/zephyr_scale/api_wrapper.py +6 -2
  241. alita_sdk/tools/zephyr_squad/__init__.py +9 -6
  242. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +180 -33
  243. alita_sdk-0.3.499.dist-info/RECORD +433 -0
  244. alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
  245. alita_sdk-0.3.263.dist-info/RECORD +0 -342
  246. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
  247. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
  248. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0
@@ -13,9 +13,8 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from langchain_community.document_loaders import (
16
- UnstructuredMarkdownLoader,
17
16
  AirbyteJSONLoader, UnstructuredHTMLLoader,
18
- PythonLoader)
17
+ UnstructuredXMLLoader)
19
18
 
20
19
  from .AlitaCSVLoader import AlitaCSVLoader
21
20
  from .AlitaDocxMammothLoader import AlitaDocxMammothLoader
@@ -25,150 +24,298 @@ from .AlitaJSONLoader import AlitaJSONLoader
25
24
  from .AlitaPDFLoader import AlitaPDFLoader
26
25
  from .AlitaPowerPointLoader import AlitaPowerPointLoader
27
26
  from .AlitaTextLoader import AlitaTextLoader
27
+ from .AlitaMarkdownLoader import AlitaMarkdownLoader
28
+ from .AlitaPythonLoader import AlitaPythonLoader
29
+ from enum import Enum
30
+ from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
28
31
 
29
- loaders_map = {
32
+
33
+ class LoaderProperties(Enum):
34
+ LLM = 'use_llm'
35
+ PROMPT_DEFAULT = 'use_default_prompt'
36
+ PROMPT = 'prompt'
37
+
38
+ DEFAULT_ALLOWED_BASE = {'max_tokens': LOADER_MAX_TOKENS_DEFAULT}
39
+
40
+ DEFAULT_ALLOWED_WITH_LLM = {
41
+ **DEFAULT_ALLOWED_BASE,
42
+ LoaderProperties.LLM.value: False,
43
+ LoaderProperties.PROMPT_DEFAULT.value: False,
44
+ LoaderProperties.PROMPT.value: "",
45
+ }
46
+
47
+ DEFAULT_ALLOWED_EXCEL = {**DEFAULT_ALLOWED_WITH_LLM, 'add_header_to_chunks': False, 'header_row_number': 1, 'max_tokens': -1, 'sheet_name': ''}
48
+
49
+ # Image file loaders mapping - directly supported by LLM with image_url
50
+ image_loaders_map = {
30
51
  '.png': {
31
52
  'class': AlitaImageLoader,
53
+ 'mime_type': 'image/png',
32
54
  'is_multimodal_processing': True,
33
- 'kwargs': {}
55
+ 'kwargs': {},
56
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM,
34
57
  },
35
58
  '.jpg': {
36
59
  'class': AlitaImageLoader,
60
+ 'mime_type': 'image/jpeg',
37
61
  'is_multimodal_processing': True,
38
- 'kwargs': {}
62
+ 'kwargs': {},
63
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
39
64
  },
40
65
  '.jpeg': {
41
66
  'class': AlitaImageLoader,
67
+ 'mime_type': 'image/jpeg',
42
68
  'is_multimodal_processing': True,
43
- 'kwargs': {}
69
+ 'kwargs': {},
70
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
44
71
  },
45
72
  '.gif': {
46
73
  'class': AlitaImageLoader,
74
+ 'mime_type': 'image/gif',
47
75
  'is_multimodal_processing': True,
48
- 'kwargs': {}
76
+ 'kwargs': {},
77
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
49
78
  },
79
+ '.webp': {
80
+ 'class': AlitaImageLoader,
81
+ 'mime_type': 'image/webp',
82
+ 'is_multimodal_processing': True,
83
+ 'kwargs': {},
84
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
85
+ }
86
+ }
87
+
88
+ # Image file loaders mapping - require conversion before sending to LLM
89
+ image_loaders_map_converted = {
50
90
  '.bmp': {
51
91
  'class': AlitaImageLoader,
92
+ 'mime_type': 'image/bmp',
52
93
  'is_multimodal_processing': True,
53
- 'kwargs': {}
94
+ 'kwargs': {},
95
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
54
96
  },
55
97
  '.svg': {
56
98
  'class': AlitaImageLoader,
99
+ 'mime_type': 'image/svg+xml',
57
100
  'is_multimodal_processing': True,
58
- 'kwargs': {}
59
- },
101
+ 'kwargs': {},
102
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
103
+ }
104
+ }
105
+
106
+ # Document file loaders mapping
107
+ document_loaders_map = {
60
108
  '.txt': {
61
109
  'class': AlitaTextLoader,
110
+ 'mime_type': 'text/plain',
62
111
  'is_multimodal_processing': False,
63
112
  'kwargs': {
64
113
  'autodetect_encoding': True
65
- }
114
+ },
115
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
66
116
  },
67
117
  '.yml': {
68
118
  'class': AlitaTextLoader,
119
+ 'mime_type': 'application/yaml',
69
120
  'is_multimodal_processing': False,
70
121
  'kwargs': {
71
122
  'autodetect_encoding': True
72
- }
123
+ },
124
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
73
125
  },
74
126
  '.yaml': {
75
127
  'class': AlitaTextLoader,
128
+ 'mime_type': 'application/yaml',
76
129
  'is_multimodal_processing': False,
77
130
  'kwargs': {
78
131
  'autodetect_encoding': True
79
- }
132
+ },
133
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
80
134
  },
81
135
  '.groovy': {
82
136
  'class': AlitaTextLoader,
137
+ 'mime_type': 'text/x-groovy',
83
138
  'is_multimodal_processing': False,
84
139
  'kwargs': {
85
140
  'autodetect_encoding': True
86
- }
141
+ },
142
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
87
143
  },
88
144
  '.md': {
89
- 'class': UnstructuredMarkdownLoader,
145
+ 'class': AlitaMarkdownLoader,
146
+ 'mime_type': 'text/markdown',
90
147
  'is_multimodal_processing': False,
91
- 'kwargs': {}
148
+ 'kwargs': {},
149
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
92
150
  },
93
151
  '.csv': {
94
152
  'class': AlitaCSVLoader,
153
+ 'mime_type': 'text/csv',
95
154
  'is_multimodal_processing': False,
96
155
  'kwargs': {
97
156
  'encoding': 'utf-8',
98
- 'raw_content': False,
157
+ 'raw_content': True,
99
158
  'cleanse': False
100
- }
159
+ },
160
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
101
161
  },
102
162
  '.xlsx': {
103
163
  'class': AlitaExcelLoader,
164
+ 'mime_type': ('application/vnd.openxmlformats-officedocument.'
165
+ 'spreadsheetml.sheet'),
104
166
  'is_multimodal_processing': False,
105
167
  'kwargs': {
106
- 'raw_content': False,
107
- 'cleanse': False
108
- }
168
+ 'add_header_to_chunks': False,
169
+ 'header_row_number': 1,
170
+ 'max_tokens': -1,
171
+ 'sheet_name': ''
172
+ },
173
+ 'allowed_to_override': DEFAULT_ALLOWED_EXCEL
109
174
  },
110
175
  '.xls': {
111
176
  'class': AlitaExcelLoader,
177
+ 'mime_type': 'application/vnd.ms-excel',
112
178
  'is_multimodal_processing': False,
113
179
  'kwargs': {
114
- 'raw_content': False,
180
+ 'excel_by_sheets': True,
181
+ 'raw_content': True,
115
182
  'cleanse': False
116
- }
183
+ },
184
+ 'allowed_to_override': DEFAULT_ALLOWED_EXCEL
117
185
  },
118
186
  '.pdf': {
119
187
  'class': AlitaPDFLoader,
188
+ 'mime_type': 'application/pdf',
120
189
  'is_multimodal_processing': False,
121
- 'kwargs': {}
190
+ 'kwargs': {},
191
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
122
192
  },
123
193
  '.docx': {
124
194
  'class': AlitaDocxMammothLoader,
195
+ 'mime_type': ('application/vnd.openxmlformats-officedocument.'
196
+ 'wordprocessingml.document'),
125
197
  'is_multimodal_processing': True,
126
198
  'kwargs': {
127
199
  'extract_images': True
128
- }
129
- },
130
- '.doc': {
131
- 'class': AlitaTextLoader,
132
- 'is_multimodal_processing': True,
133
- 'kwargs': {}
200
+ },
201
+ 'allowed_to_override': {**DEFAULT_ALLOWED_WITH_LLM, 'mode': 'paged'}
134
202
  },
135
203
  '.json': {
136
204
  'class': AlitaJSONLoader,
205
+ 'mime_type': 'application/json',
137
206
  'is_multimodal_processing': False,
138
- 'kwargs': {}
207
+ 'kwargs': {},
208
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
139
209
  },
140
210
  '.jsonl': {
141
211
  'class': AirbyteJSONLoader,
212
+ 'mime_type': 'application/jsonl',
142
213
  'is_multimodal_processing': False,
143
- 'kwargs': {}
214
+ 'kwargs': {},
215
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
144
216
  },
145
217
  '.htm': {
146
218
  'class': UnstructuredHTMLLoader,
219
+ 'mime_type': 'text/html',
147
220
  'is_multimodal_processing': False,
148
- 'kwargs': {}
221
+ 'kwargs': {},
222
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
149
223
  },
150
224
  '.html': {
151
225
  'class': UnstructuredHTMLLoader,
226
+ 'mime_type': 'text/html',
227
+ 'is_multimodal_processing': False,
228
+ 'kwargs': {},
229
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
230
+ },
231
+ '.xml': {
232
+ 'class': UnstructuredXMLLoader,
233
+ 'mime_type': 'text/xml',
152
234
  'is_multimodal_processing': False,
153
- 'kwargs': {}
235
+ 'kwargs': {},
236
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
154
237
  },
155
238
  '.ppt': {
156
239
  'class': AlitaPowerPointLoader,
240
+ 'mime_type': 'application/vnd.ms-powerpoint',
157
241
  'is_multimodal_processing': False,
158
242
  'kwargs': {
159
243
  'mode': 'paged'
160
- }
244
+ },
245
+ 'allowed_to_override': {**DEFAULT_ALLOWED_WITH_LLM, 'mode': 'paged'}
161
246
  },
162
247
  '.pptx': {
163
248
  'class': AlitaPowerPointLoader,
249
+ 'mime_type': ('application/vnd.openxmlformats-officedocument.'
250
+ 'presentationml.presentation'),
164
251
  'is_multimodal_processing': False,
165
252
  'kwargs': {
166
253
  'mode': 'paged'
254
+ },
255
+ 'allowed_to_override': {
256
+ **DEFAULT_ALLOWED_WITH_LLM,
257
+ 'mode': 'paged',
258
+ 'pages_per_chunk': 5,
259
+ 'extract_images': False,
167
260
  }
168
261
  },
169
- '.py': {
170
- 'class': PythonLoader,
171
- 'is_multimodal_processing': False,
172
- 'kwargs': {}
173
- }
262
+ # '.py': {
263
+ # 'class': AlitaPythonLoader,
264
+ # 'mime_type': 'text/x-python',
265
+ # 'is_multimodal_processing': False,
266
+ # 'kwargs': {},
267
+ # 'allowed_to_override': DEFAULT_ALLOWED_BASE
268
+ # }
269
+ }
270
+
271
+ code_extensions = [
272
+ '.py', # Python
273
+ '.js', # JavaScript
274
+ '.ts', # TypeScript
275
+ '.java', # Java
276
+ '.cpp', # C++
277
+ '.c', # C
278
+ '.cs', # C#
279
+ '.rb', # Ruby
280
+ '.go', # Go
281
+ '.php', # PHP
282
+ '.swift', # Swift
283
+ '.kt', # Kotlin
284
+ '.rs', # Rust
285
+ '.m', # Objective-C
286
+ '.scala', # Scala
287
+ '.pl', # Perl
288
+ '.sh', # Shell
289
+ '.bat', # Batch
290
+ '.lua', # Lua
291
+ '.r', # R
292
+ '.pas', # Pascal
293
+ '.asm', # Assembly
294
+ '.dart', # Dart
295
+ '.groovy', # Groovy
296
+ '.sql', # SQL
297
+ ]
298
+
299
+ default_loader_config = {
300
+ 'class': AlitaTextLoader,
301
+ 'mime_type': 'text/plain',
302
+ 'is_multimodal_processing': False,
303
+ 'kwargs': {},
304
+ 'allowed_to_override': DEFAULT_ALLOWED_BASE
305
+ }
306
+
307
+ code_loaders_map = {ext: default_loader_config for ext in code_extensions}
308
+
309
+ # Combined mapping for backward compatibility
310
+ loaders_map = {
311
+ **image_loaders_map,
312
+ **image_loaders_map_converted,
313
+ **document_loaders_map,
314
+ **code_loaders_map
315
+ }
316
+
317
+ loaders_allowed_to_override = {
318
+ extension: config.get('allowed_to_override')
319
+ for extension, config in loaders_map.items()
320
+ if 'allowed_to_override' in config
174
321
  }
@@ -173,13 +173,15 @@ def get_vectorstore(vectorstore_type, vectorstore_params, embedding_func=None):
173
173
  #
174
174
  raise RuntimeError(f"Unknown VectorStore type: {vectorstore_type}")
175
175
 
176
- def add_documents(vectorstore, documents):
176
+ def add_documents(vectorstore, documents, ids = None) -> list[str]:
177
177
  """ Add documents to vectorstore """
178
178
  if vectorstore is None:
179
179
  return None
180
180
  texts = []
181
181
  metadata = []
182
182
  for document in documents:
183
+ if not document.page_content:
184
+ continue
183
185
  texts.append(document.page_content)
184
186
  for key in document.metadata:
185
187
  if isinstance(document.metadata[key], list):
@@ -187,7 +189,7 @@ def add_documents(vectorstore, documents):
187
189
  if isinstance(document.metadata[key], dict):
188
190
  document.metadata[key] = dumps(document.metadata[key])
189
191
  metadata.append(document.metadata)
190
- vectorstore.add_texts(texts, metadatas=metadata)
192
+ return vectorstore.add_texts(texts, metadatas=metadata, ids=ids)
191
193
 
192
194
 
193
195
  def generateResponse(