alita-sdk 0.3.462__py3-none-any.whl → 0.3.627__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. alita_sdk/cli/agent/__init__.py +5 -0
  2. alita_sdk/cli/agent/default.py +258 -0
  3. alita_sdk/cli/agent_executor.py +15 -3
  4. alita_sdk/cli/agent_loader.py +56 -8
  5. alita_sdk/cli/agent_ui.py +93 -31
  6. alita_sdk/cli/agents.py +2274 -230
  7. alita_sdk/cli/callbacks.py +96 -25
  8. alita_sdk/cli/cli.py +10 -1
  9. alita_sdk/cli/config.py +162 -9
  10. alita_sdk/cli/context/__init__.py +30 -0
  11. alita_sdk/cli/context/cleanup.py +198 -0
  12. alita_sdk/cli/context/manager.py +731 -0
  13. alita_sdk/cli/context/message.py +285 -0
  14. alita_sdk/cli/context/strategies.py +289 -0
  15. alita_sdk/cli/context/token_estimation.py +127 -0
  16. alita_sdk/cli/input_handler.py +419 -0
  17. alita_sdk/cli/inventory.py +1073 -0
  18. alita_sdk/cli/testcases/__init__.py +94 -0
  19. alita_sdk/cli/testcases/data_generation.py +119 -0
  20. alita_sdk/cli/testcases/discovery.py +96 -0
  21. alita_sdk/cli/testcases/executor.py +84 -0
  22. alita_sdk/cli/testcases/logger.py +85 -0
  23. alita_sdk/cli/testcases/parser.py +172 -0
  24. alita_sdk/cli/testcases/prompts.py +91 -0
  25. alita_sdk/cli/testcases/reporting.py +125 -0
  26. alita_sdk/cli/testcases/setup.py +108 -0
  27. alita_sdk/cli/testcases/test_runner.py +282 -0
  28. alita_sdk/cli/testcases/utils.py +39 -0
  29. alita_sdk/cli/testcases/validation.py +90 -0
  30. alita_sdk/cli/testcases/workflow.py +196 -0
  31. alita_sdk/cli/toolkit.py +14 -17
  32. alita_sdk/cli/toolkit_loader.py +35 -5
  33. alita_sdk/cli/tools/__init__.py +36 -2
  34. alita_sdk/cli/tools/approval.py +224 -0
  35. alita_sdk/cli/tools/filesystem.py +910 -64
  36. alita_sdk/cli/tools/planning.py +389 -0
  37. alita_sdk/cli/tools/terminal.py +414 -0
  38. alita_sdk/community/__init__.py +72 -12
  39. alita_sdk/community/inventory/__init__.py +236 -0
  40. alita_sdk/community/inventory/config.py +257 -0
  41. alita_sdk/community/inventory/enrichment.py +2137 -0
  42. alita_sdk/community/inventory/extractors.py +1469 -0
  43. alita_sdk/community/inventory/ingestion.py +3172 -0
  44. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  45. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  46. alita_sdk/community/inventory/parsers/base.py +295 -0
  47. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  48. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  49. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  50. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  51. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  52. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  53. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  54. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  55. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  56. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  57. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  58. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  59. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  60. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  61. alita_sdk/community/inventory/patterns/loader.py +348 -0
  62. alita_sdk/community/inventory/patterns/registry.py +198 -0
  63. alita_sdk/community/inventory/presets.py +535 -0
  64. alita_sdk/community/inventory/retrieval.py +1403 -0
  65. alita_sdk/community/inventory/toolkit.py +173 -0
  66. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  67. alita_sdk/community/inventory/visualize.py +1370 -0
  68. alita_sdk/configurations/__init__.py +1 -1
  69. alita_sdk/configurations/ado.py +141 -20
  70. alita_sdk/configurations/bitbucket.py +0 -3
  71. alita_sdk/configurations/confluence.py +76 -42
  72. alita_sdk/configurations/figma.py +76 -0
  73. alita_sdk/configurations/gitlab.py +17 -5
  74. alita_sdk/configurations/openapi.py +329 -0
  75. alita_sdk/configurations/qtest.py +72 -1
  76. alita_sdk/configurations/report_portal.py +96 -0
  77. alita_sdk/configurations/sharepoint.py +148 -0
  78. alita_sdk/configurations/testio.py +83 -0
  79. alita_sdk/runtime/clients/artifact.py +3 -3
  80. alita_sdk/runtime/clients/client.py +353 -48
  81. alita_sdk/runtime/clients/sandbox_client.py +0 -21
  82. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  83. alita_sdk/runtime/langchain/assistant.py +123 -26
  84. alita_sdk/runtime/langchain/constants.py +642 -1
  85. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  86. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  87. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +6 -3
  88. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
  89. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
  90. alita_sdk/runtime/langchain/document_loaders/constants.py +12 -7
  91. alita_sdk/runtime/langchain/langraph_agent.py +279 -73
  92. alita_sdk/runtime/langchain/utils.py +82 -15
  93. alita_sdk/runtime/llms/preloaded.py +2 -6
  94. alita_sdk/runtime/skills/__init__.py +91 -0
  95. alita_sdk/runtime/skills/callbacks.py +498 -0
  96. alita_sdk/runtime/skills/discovery.py +540 -0
  97. alita_sdk/runtime/skills/executor.py +610 -0
  98. alita_sdk/runtime/skills/input_builder.py +371 -0
  99. alita_sdk/runtime/skills/models.py +330 -0
  100. alita_sdk/runtime/skills/registry.py +355 -0
  101. alita_sdk/runtime/skills/skill_runner.py +330 -0
  102. alita_sdk/runtime/toolkits/__init__.py +7 -0
  103. alita_sdk/runtime/toolkits/application.py +21 -9
  104. alita_sdk/runtime/toolkits/artifact.py +15 -5
  105. alita_sdk/runtime/toolkits/datasource.py +13 -6
  106. alita_sdk/runtime/toolkits/mcp.py +139 -251
  107. alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
  108. alita_sdk/runtime/toolkits/planning.py +178 -0
  109. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  110. alita_sdk/runtime/toolkits/subgraph.py +251 -6
  111. alita_sdk/runtime/toolkits/tools.py +238 -32
  112. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  113. alita_sdk/runtime/tools/__init__.py +3 -1
  114. alita_sdk/runtime/tools/application.py +20 -6
  115. alita_sdk/runtime/tools/artifact.py +511 -28
  116. alita_sdk/runtime/tools/data_analysis.py +183 -0
  117. alita_sdk/runtime/tools/function.py +43 -15
  118. alita_sdk/runtime/tools/image_generation.py +50 -44
  119. alita_sdk/runtime/tools/llm.py +852 -67
  120. alita_sdk/runtime/tools/loop.py +3 -1
  121. alita_sdk/runtime/tools/loop_output.py +3 -1
  122. alita_sdk/runtime/tools/mcp_remote_tool.py +25 -10
  123. alita_sdk/runtime/tools/mcp_server_tool.py +7 -6
  124. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  125. alita_sdk/runtime/tools/planning/models.py +246 -0
  126. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  127. alita_sdk/runtime/tools/router.py +2 -4
  128. alita_sdk/runtime/tools/sandbox.py +9 -6
  129. alita_sdk/runtime/tools/skill_router.py +776 -0
  130. alita_sdk/runtime/tools/tool.py +3 -1
  131. alita_sdk/runtime/tools/vectorstore.py +7 -2
  132. alita_sdk/runtime/tools/vectorstore_base.py +51 -11
  133. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  134. alita_sdk/runtime/utils/constants.py +5 -1
  135. alita_sdk/runtime/utils/mcp_client.py +492 -0
  136. alita_sdk/runtime/utils/mcp_oauth.py +202 -5
  137. alita_sdk/runtime/utils/mcp_sse_client.py +36 -7
  138. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  139. alita_sdk/runtime/utils/serialization.py +155 -0
  140. alita_sdk/runtime/utils/streamlit.py +6 -10
  141. alita_sdk/runtime/utils/toolkit_utils.py +16 -5
  142. alita_sdk/runtime/utils/utils.py +36 -0
  143. alita_sdk/tools/__init__.py +113 -29
  144. alita_sdk/tools/ado/repos/__init__.py +51 -33
  145. alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
  146. alita_sdk/tools/ado/test_plan/__init__.py +25 -9
  147. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
  148. alita_sdk/tools/ado/utils.py +1 -18
  149. alita_sdk/tools/ado/wiki/__init__.py +25 -8
  150. alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
  151. alita_sdk/tools/ado/work_item/__init__.py +26 -9
  152. alita_sdk/tools/ado/work_item/ado_wrapper.py +56 -3
  153. alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
  154. alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
  155. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  156. alita_sdk/tools/azure_ai/search/__init__.py +11 -8
  157. alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
  158. alita_sdk/tools/base/tool.py +5 -1
  159. alita_sdk/tools/base_indexer_toolkit.py +170 -45
  160. alita_sdk/tools/bitbucket/__init__.py +17 -12
  161. alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
  162. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
  163. alita_sdk/tools/browser/__init__.py +5 -4
  164. alita_sdk/tools/carrier/__init__.py +5 -6
  165. alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
  166. alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
  167. alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
  168. alita_sdk/tools/chunkers/__init__.py +3 -1
  169. alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
  170. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  171. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  172. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  173. alita_sdk/tools/cloud/aws/__init__.py +10 -7
  174. alita_sdk/tools/cloud/azure/__init__.py +10 -7
  175. alita_sdk/tools/cloud/gcp/__init__.py +10 -7
  176. alita_sdk/tools/cloud/k8s/__init__.py +10 -7
  177. alita_sdk/tools/code/linter/__init__.py +10 -8
  178. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  179. alita_sdk/tools/code/sonar/__init__.py +10 -7
  180. alita_sdk/tools/code_indexer_toolkit.py +73 -23
  181. alita_sdk/tools/confluence/__init__.py +21 -15
  182. alita_sdk/tools/confluence/api_wrapper.py +78 -23
  183. alita_sdk/tools/confluence/loader.py +4 -2
  184. alita_sdk/tools/custom_open_api/__init__.py +12 -5
  185. alita_sdk/tools/elastic/__init__.py +11 -8
  186. alita_sdk/tools/elitea_base.py +493 -30
  187. alita_sdk/tools/figma/__init__.py +58 -11
  188. alita_sdk/tools/figma/api_wrapper.py +1235 -143
  189. alita_sdk/tools/figma/figma_client.py +73 -0
  190. alita_sdk/tools/figma/toon_tools.py +2748 -0
  191. alita_sdk/tools/github/__init__.py +13 -14
  192. alita_sdk/tools/github/github_client.py +224 -100
  193. alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
  194. alita_sdk/tools/github/schemas.py +14 -5
  195. alita_sdk/tools/github/tool.py +5 -1
  196. alita_sdk/tools/github/tool_prompts.py +9 -22
  197. alita_sdk/tools/gitlab/__init__.py +15 -11
  198. alita_sdk/tools/gitlab/api_wrapper.py +207 -41
  199. alita_sdk/tools/gitlab_org/__init__.py +10 -8
  200. alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
  201. alita_sdk/tools/google/bigquery/__init__.py +13 -12
  202. alita_sdk/tools/google/bigquery/tool.py +5 -1
  203. alita_sdk/tools/google_places/__init__.py +10 -8
  204. alita_sdk/tools/google_places/api_wrapper.py +1 -1
  205. alita_sdk/tools/jira/__init__.py +17 -11
  206. alita_sdk/tools/jira/api_wrapper.py +91 -40
  207. alita_sdk/tools/keycloak/__init__.py +11 -8
  208. alita_sdk/tools/localgit/__init__.py +9 -3
  209. alita_sdk/tools/localgit/local_git.py +62 -54
  210. alita_sdk/tools/localgit/tool.py +5 -1
  211. alita_sdk/tools/memory/__init__.py +11 -3
  212. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  213. alita_sdk/tools/ocr/__init__.py +11 -8
  214. alita_sdk/tools/openapi/__init__.py +490 -114
  215. alita_sdk/tools/openapi/api_wrapper.py +1368 -0
  216. alita_sdk/tools/openapi/tool.py +20 -0
  217. alita_sdk/tools/pandas/__init__.py +20 -12
  218. alita_sdk/tools/pandas/api_wrapper.py +38 -25
  219. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  220. alita_sdk/tools/postman/__init__.py +11 -11
  221. alita_sdk/tools/pptx/__init__.py +10 -9
  222. alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
  223. alita_sdk/tools/qtest/__init__.py +30 -10
  224. alita_sdk/tools/qtest/api_wrapper.py +430 -13
  225. alita_sdk/tools/rally/__init__.py +10 -8
  226. alita_sdk/tools/rally/api_wrapper.py +1 -1
  227. alita_sdk/tools/report_portal/__init__.py +12 -9
  228. alita_sdk/tools/salesforce/__init__.py +10 -9
  229. alita_sdk/tools/servicenow/__init__.py +17 -14
  230. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  231. alita_sdk/tools/sharepoint/__init__.py +10 -8
  232. alita_sdk/tools/sharepoint/api_wrapper.py +4 -4
  233. alita_sdk/tools/slack/__init__.py +10 -8
  234. alita_sdk/tools/slack/api_wrapper.py +2 -2
  235. alita_sdk/tools/sql/__init__.py +11 -9
  236. alita_sdk/tools/testio/__init__.py +10 -8
  237. alita_sdk/tools/testrail/__init__.py +11 -8
  238. alita_sdk/tools/testrail/api_wrapper.py +1 -1
  239. alita_sdk/tools/utils/__init__.py +9 -4
  240. alita_sdk/tools/utils/content_parser.py +77 -3
  241. alita_sdk/tools/utils/text_operations.py +410 -0
  242. alita_sdk/tools/utils/tool_prompts.py +79 -0
  243. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
  244. alita_sdk/tools/xray/__init__.py +12 -9
  245. alita_sdk/tools/yagmail/__init__.py +9 -3
  246. alita_sdk/tools/zephyr/__init__.py +9 -7
  247. alita_sdk/tools/zephyr_enterprise/__init__.py +11 -8
  248. alita_sdk/tools/zephyr_essential/__init__.py +10 -8
  249. alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
  250. alita_sdk/tools/zephyr_essential/client.py +2 -2
  251. alita_sdk/tools/zephyr_scale/__init__.py +11 -9
  252. alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
  253. alita_sdk/tools/zephyr_squad/__init__.py +10 -8
  254. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +147 -7
  255. alita_sdk-0.3.627.dist-info/RECORD +468 -0
  256. alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
  257. alita_sdk-0.3.462.dist-info/RECORD +0 -384
  258. alita_sdk-0.3.462.dist-info/entry_points.txt +0 -2
  259. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
  260. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
  261. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
@@ -21,14 +21,16 @@ from openpyxl import load_workbook
21
21
  from xlrd import open_workbook
22
22
  from langchain_core.documents import Document
23
23
  from .AlitaTableLoader import AlitaTableLoader
24
+ from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
24
25
 
25
26
  cell_delimiter = " | "
26
27
 
27
28
  class AlitaExcelLoader(AlitaTableLoader):
28
- excel_by_sheets: bool = False
29
29
  sheet_name: str = None
30
- return_type: str = 'str'
31
30
  file_name: str = None
31
+ max_tokens: int = LOADER_MAX_TOKENS_DEFAULT
32
+ add_header_to_chunks: bool = False
33
+ header_row_number: int = 1
32
34
 
33
35
  def __init__(self, **kwargs):
34
36
  if not kwargs.get('file_path'):
@@ -39,9 +41,22 @@ class AlitaExcelLoader(AlitaTableLoader):
39
41
  else:
40
42
  self.file_name = kwargs.get('file_path')
41
43
  super().__init__(**kwargs)
42
- self.excel_by_sheets = kwargs.get('excel_by_sheets')
43
- self.return_type = kwargs.get('return_type')
44
44
  self.sheet_name = kwargs.get('sheet_name')
45
+ # Set and validate chunking parameters only once
46
+ self.max_tokens = int(kwargs.get('max_tokens', LOADER_MAX_TOKENS_DEFAULT))
47
+ self.add_header_to_chunks = bool(kwargs.get('add_header_to_chunks', False))
48
+ header_row_number = kwargs.get('header_row_number', 1)
49
+ # Validate header_row_number
50
+ try:
51
+ header_row_number = int(header_row_number)
52
+ if header_row_number > 0:
53
+ self.header_row_number = header_row_number
54
+ else:
55
+ self.header_row_number = 1
56
+ self.add_header_to_chunks = False
57
+ except (ValueError, TypeError):
58
+ self.header_row_number = 1
59
+ self.add_header_to_chunks = False
45
60
 
46
61
  def get_content(self):
47
62
  try:
@@ -64,59 +79,32 @@ class AlitaExcelLoader(AlitaTableLoader):
64
79
  Reads .xlsx files using openpyxl.
65
80
  """
66
81
  workbook = load_workbook(self.file_path, data_only=True) # `data_only=True` ensures we get cell values, not formulas
67
-
82
+ sheets = workbook.sheetnames
68
83
  if self.sheet_name:
69
- # If a specific sheet name is provided, parse only that sheet
70
- if self.sheet_name in workbook.sheetnames:
84
+ if self.sheet_name in sheets:
71
85
  sheet_content = self.parse_sheet(workbook[self.sheet_name])
72
- return sheet_content
73
86
  else:
74
- raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
75
- elif self.excel_by_sheets:
76
- # Parse each sheet individually and return as a dictionary
77
- result = {}
78
- for sheet_name in workbook.sheetnames:
79
- sheet_content = self.parse_sheet(workbook[sheet_name])
80
- result[sheet_name] = sheet_content
81
- return result
87
+ sheet_content = [f"Sheet '{self.sheet_name}' does not exist in the workbook."]
88
+ return {self.sheet_name: sheet_content}
82
89
  else:
83
- # Combine all sheets into a single string result
84
- result = []
85
- for sheet_name in workbook.sheetnames:
86
- sheet_content = self.parse_sheet(workbook[sheet_name])
87
- result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
88
- return "\n\n".join(result)
90
+ # Dictionary comprehension for all sheets
91
+ return {name: self.parse_sheet(workbook[name]) for name in sheets}
89
92
 
90
93
  def _read_xls(self):
91
94
  """
92
95
  Reads .xls files using xlrd.
93
96
  """
94
97
  workbook = open_workbook(filename=self.file_name, file_contents=self.file_content)
95
-
98
+ sheets = workbook.sheet_names()
96
99
  if self.sheet_name:
97
- # If a specific sheet name is provided, parse only that sheet
98
- if self.sheet_name in workbook.sheet_names():
100
+ if self.sheet_name in sheets:
99
101
  sheet = workbook.sheet_by_name(self.sheet_name)
100
- sheet_content = self.parse_sheet_xls(sheet)
101
- return sheet_content
102
+ return {self.sheet_name: self.parse_sheet_xls(sheet)}
102
103
  else:
103
- raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
104
- elif self.excel_by_sheets:
105
- # Parse each sheet individually and return as a dictionary
106
- result = {}
107
- for sheet_name in workbook.sheet_names():
108
- sheet = workbook.sheet_by_name(sheet_name)
109
- sheet_content = self.parse_sheet_xls(sheet)
110
- result[sheet_name] = sheet_content
111
- return result
104
+ return {self.sheet_name: [f"Sheet '{self.sheet_name}' does not exist in the workbook."]}
112
105
  else:
113
- # Combine all sheets into a single string result
114
- result = []
115
- for sheet_name in workbook.sheet_names():
116
- sheet = workbook.sheet_by_name(sheet_name)
117
- sheet_content = self.parse_sheet_xls(sheet)
118
- result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
119
- return "\n\n".join(result)
106
+ # Dictionary comprehension for all sheets
107
+ return {name: self.parse_sheet_xls(workbook.sheet_by_name(name)) for name in sheets}
120
108
 
121
109
  def parse_sheet(self, sheet):
122
110
  """
@@ -170,34 +158,89 @@ class AlitaExcelLoader(AlitaTableLoader):
170
158
  # Format the sheet content based on the return type
171
159
  return self._format_sheet_content(sheet_content)
172
160
 
173
- def _format_sheet_content(self, sheet_content):
161
+ def _format_sheet_content(self, rows):
174
162
  """
175
- Formats the sheet content based on the return type.
163
+ Specification:
164
+ Formats a list of sheet rows into a list of string chunks according to the following rules:
165
+ 1. If max_tokens < 1, returns a single chunk (list of one string) with all rows joined by a newline ('\n').
166
+ - If add_header_to_chunks is True and header_row_number is valid, the specified header row is prepended as the first line.
167
+ 2. If max_tokens >= 1:
168
+ a. Each chunk is a string containing one or more rows, separated by newlines ('\n'), such that the total token count (as measured by tiktoken) does not exceed max_tokens.
169
+ b. If add_header_to_chunks is True and header_row_number is valid, the specified header row is prepended once at the top of each chunk (not before every row).
170
+ c. If a single row exceeds max_tokens, it is placed in its own chunk without splitting, with the header prepended if applicable.
171
+ 3. Returns: List[str], where each string is a chunk ready for further processing.
176
172
  """
177
- if self.return_type == 'dict':
178
- # Convert to a list of dictionaries (each row is a dictionary)
179
- headers = sheet_content[0].split(cell_delimiter) if sheet_content else []
180
- data_rows = sheet_content[1:] if len(sheet_content) > 1 else []
181
- return [dict(zip(headers, row.split(cell_delimiter))) for row in data_rows]
182
- elif self.return_type == 'csv':
183
- # Return as CSV (newline-separated rows, comma-separated values)
184
- return "\n".join([",".join(row.split(cell_delimiter)) for row in sheet_content])
185
- else:
186
- # Default: Return as plain text (newline-separated rows, pipe-separated values)
187
- return "\n".join(sheet_content)
173
+ import tiktoken
174
+ encoding = tiktoken.get_encoding('cl100k_base')
175
+
176
+ # --- Inner functions ---
177
+ def count_tokens(text):
178
+ """Count tokens in text using tiktoken encoding."""
179
+ return len(encoding.encode(text))
180
+
181
+ def finalize_chunk(chunk_rows):
182
+ """Join rows for a chunk, prepending header if needed."""
183
+ if self.add_header_to_chunks and header:
184
+ return '\n'.join([header] + chunk_rows)
185
+ else:
186
+ return '\n'.join(chunk_rows)
187
+ # --- End inner functions ---
188
+
189
+ # If max_tokens < 1, return all rows as a single chunk
190
+ if self.max_tokens < 1:
191
+ return ['\n'.join(rows)]
192
+
193
+ # Extract header if needed
194
+ header = None
195
+ if self.add_header_to_chunks and rows:
196
+ header_idx = self.header_row_number - 1
197
+ header = rows.pop(header_idx)
198
+
199
+ chunks = [] # List to store final chunks
200
+ current_chunk = [] # Accumulate rows for the current chunk
201
+ current_tokens = 0 # Token count for the current chunk
202
+
203
+ for row in rows:
204
+ row_tokens = count_tokens(row)
205
+ # If row itself exceeds max_tokens, flush current chunk and add row as its own chunk (with header if needed)
206
+ if row_tokens > self.max_tokens:
207
+ if current_chunk:
208
+ chunks.append(finalize_chunk(current_chunk))
209
+ current_chunk = []
210
+ current_tokens = 0
211
+ # Add the large row as its own chunk, with header if needed
212
+ if self.add_header_to_chunks and header:
213
+ chunks.append(finalize_chunk([row]))
214
+ else:
215
+ chunks.append(row)
216
+ continue
217
+ # If adding row would exceed max_tokens, flush current chunk and start new
218
+ if current_tokens + row_tokens > self.max_tokens:
219
+ if current_chunk:
220
+ chunks.append(finalize_chunk(current_chunk))
221
+ current_chunk = [row]
222
+ current_tokens = row_tokens
223
+ else:
224
+ current_chunk.append(row)
225
+ current_tokens += row_tokens
226
+ # Add any remaining rows as the last chunk
227
+ if current_chunk:
228
+ chunks.append(finalize_chunk(current_chunk))
229
+ return chunks
188
230
 
189
231
  def load(self) -> list:
190
232
  docs = []
191
233
  content_per_sheet = self.get_content()
192
- for sheet_name, content in content_per_sheet.items():
234
+ # content_per_sheet is a dict of sheet_name: list of chunk strings
235
+ for sheet_name, content_chunks in content_per_sheet.items():
193
236
  metadata = {
194
237
  "source": f'{self.file_path}:{sheet_name}',
195
238
  "sheet_name": sheet_name,
196
239
  "file_type": "excel",
197
- "excel_by_sheets": self.excel_by_sheets,
198
- "return_type": self.return_type,
199
240
  }
200
- docs.append(Document(page_content=f"Sheet: {sheet_name}\n {str(content)}", metadata=metadata))
241
+ # Each chunk is a separate Document
242
+ for chunk in content_chunks:
243
+ docs.append(Document(page_content=chunk, metadata=metadata))
201
244
  return docs
202
245
 
203
246
  def read(self, lazy: bool = False):
@@ -0,0 +1,77 @@
1
+ from .AlitaJSONLoader import AlitaJSONLoader
2
+ import json
3
+ from io import StringIO
4
+ from typing import List, Iterator
5
+
6
+ from langchain_core.documents import Document
7
+ from langchain_core.tools import ToolException
8
+
9
+
10
+ class AlitaJSONLinesLoader(AlitaJSONLoader):
11
+ """Load local JSONL files (one JSON object per line) using AlitaJSONLoader behavior.
12
+
13
+ Behavior:
14
+ - Supports both `file_path` and `file_content` (bytes or file-like object), same as AlitaJSONLoader.
15
+ - Treats each non-empty line as an independent JSON object.
16
+ - Aggregates all parsed JSON objects into a list and feeds them through the same
17
+ RecursiveJsonSplitter-based chunking used by AlitaJSONLoader.lazy_load.
18
+ - Returns a list of Documents with chunked JSON content.
19
+ """
20
+
21
+ def __init__(self, **kwargs):
22
+ # Reuse AlitaJSONLoader initialization logic (file_path / file_content handling, encoding, etc.)
23
+ super().__init__(**kwargs)
24
+
25
+ def _iter_lines(self) -> Iterator[str]:
26
+ """Yield lines from file_path or file_content, mirroring AlitaJSONLoader sources."""
27
+ # Prefer file_path if available
28
+ if hasattr(self, "file_path") and self.file_path:
29
+ with open(self.file_path, "r", encoding=self.encoding) as f:
30
+ for line in f:
31
+ yield line
32
+ # Fallback to file_content if available
33
+ elif hasattr(self, "file_content") and self.file_content:
34
+ # file_content may be bytes or a file-like object
35
+ if isinstance(self.file_content, (bytes, bytearray)):
36
+ text = self.file_content.decode(self.encoding)
37
+ for line in StringIO(text):
38
+ yield line
39
+ else:
40
+ # Assume it's a text file-like object positioned at the beginning
41
+ self.file_content.seek(0)
42
+ for line in self.file_content:
43
+ yield line
44
+ else:
45
+ raise ToolException("'file_path' or 'file_content' parameter should be provided.")
46
+
47
+ def load(self) -> List[Document]: # type: ignore[override]
48
+ """Load JSONL content by delegating each non-empty line to AlitaJSONLoader.
49
+
50
+ For each non-empty line in the underlying source (file_path or file_content):
51
+ - Create a temporary AlitaJSONLoader instance with that line as file_content.
52
+ - Call lazy_load() on that instance to apply the same RecursiveJsonSplitter logic
53
+ as for a normal JSON file.
54
+ - Accumulate all Documents from all lines and return them as a single list.
55
+ """
56
+ docs: List[Document] = []
57
+
58
+ for raw_line in self._iter_lines():
59
+ line = raw_line.strip()
60
+ if not line:
61
+ continue
62
+ try:
63
+ # Instantiate a per-line AlitaJSONLoader using the same configuration
64
+ line_loader = AlitaJSONLoader(
65
+ file_content=line,
66
+ file_name=getattr(self, "file_name", str(getattr(self, "file_path", "no_name"))),
67
+ encoding=self.encoding,
68
+ autodetect_encoding=self.autodetect_encoding,
69
+ max_tokens=self.max_tokens,
70
+ )
71
+
72
+ for doc in line_loader.lazy_load():
73
+ docs.append(doc)
74
+ except Exception as e:
75
+ raise ToolException(f"Error processing JSONL line: {line[:100]}... Error: {e}") from e
76
+
77
+ return docs
@@ -32,6 +32,8 @@ class AlitaJSONLoader(BaseLoader):
32
32
  elif hasattr(self, 'file_content') and self.file_content:
33
33
  if isinstance(self.file_content, bytes):
34
34
  return json.loads(self.file_content.decode(self.encoding))
35
+ elif isinstance(self.file_content, str):
36
+ return json.loads(self.file_content)
35
37
  else:
36
38
  return json.load(self.file_content)
37
39
  else:
@@ -45,7 +47,6 @@ class AlitaJSONLoader(BaseLoader):
45
47
  try:
46
48
  with open(self.file_path, encoding=encoding.encoding) as f:
47
49
  return f.read()
48
- break
49
50
  except UnicodeDecodeError:
50
51
  continue
51
52
  elif hasattr(self, 'file_content') and self.file_content:
@@ -58,9 +59,11 @@ class AlitaJSONLoader(BaseLoader):
58
59
  else:
59
60
  raise ValueError("Neither file_path nor file_content is provided for encoding detection.")
60
61
  else:
61
- raise RuntimeError(f"Error loading content with encoding {self.encoding}.") from e
62
+ raise RuntimeError(f"Error loading content with encoding {self.encoding}: {e}") from e
62
63
  except Exception as e:
63
- raise RuntimeError(f"Error loading content.") from e
64
+ # Preserve original error details so callers (e.g., parse_file_content)
65
+ # can expose the real root cause instead of a generic message.
66
+ raise RuntimeError(f"Error loading content: {e}") from e
64
67
 
65
68
  def lazy_load(self) -> Iterator[Document]:
66
69
  """Load from file path."""
@@ -6,6 +6,7 @@ from .utils import perform_llm_prediction_for_image_bytes, create_temp_file
6
6
  from pptx.enum.shapes import MSO_SHAPE_TYPE
7
7
  from langchain_core.documents import Document
8
8
 
9
+
9
10
  class AlitaPowerPointLoader:
10
11
 
11
12
  def __init__(self, file_path=None, file_content=None, mode=None, **unstructured_kwargs):
@@ -43,10 +44,203 @@ class AlitaPowerPointLoader:
43
44
  else:
44
45
  raise ToolException(f"Unknown mode value: {self.mode}. Only 'single', 'paged' values allowed.")
45
46
 
47
+ def _extract_table_as_markdown(self, table) -> str:
48
+ """Convert PPTX table to markdown format."""
49
+ if not table.rows:
50
+ return ""
51
+
52
+ rows = []
53
+ for row in table.rows:
54
+ cells = []
55
+ for cell in row.cells:
56
+ cell_text = cell.text.strip().replace("|", "\\|").replace("\n", " ")
57
+ cells.append(cell_text)
58
+ rows.append("| " + " | ".join(cells) + " |")
59
+
60
+ if len(rows) > 0:
61
+ # Add header separator after first row
62
+ num_cols = len(table.rows[0].cells)
63
+ header_sep = "| " + " | ".join(["---"] * num_cols) + " |"
64
+ rows.insert(1, header_sep)
65
+
66
+ return "\n**Table:**\n" + "\n".join(rows) + "\n"
67
+
68
+ def _extract_chart_info(self, chart) -> str:
69
+ """Extract data and labels from PPTX chart."""
70
+ result = []
71
+
72
+ # Extract chart title
73
+ try:
74
+ if chart.has_title and chart.chart_title.has_text_frame:
75
+ title_text = chart.chart_title.text_frame.text.strip()
76
+ if title_text:
77
+ result.append(f"Chart Title: {title_text}")
78
+ except Exception:
79
+ pass
80
+
81
+ # Try to extract series data directly from chart.series (works for some chart types)
82
+ try:
83
+ if hasattr(chart, 'series') and chart.series:
84
+ for series in chart.series:
85
+ series_name = series.name if series.name else "Unnamed Series"
86
+ values = []
87
+ categories = []
88
+
89
+ # Try to get values
90
+ try:
91
+ if hasattr(series, 'values') and series.values:
92
+ values = list(series.values)
93
+ except Exception:
94
+ pass
95
+
96
+ # Try to get categories from series
97
+ try:
98
+ if hasattr(series, 'categories') and series.categories:
99
+ categories = list(series.categories)
100
+ except Exception:
101
+ pass
102
+
103
+ # Build output
104
+ if categories and values and len(categories) == len(values):
105
+ data_pairs = [f"{cat}: {val}" for cat, val in zip(categories, values)]
106
+ result.append(f"Series '{series_name}': {', '.join(data_pairs)}")
107
+ elif values:
108
+ result.append(f"Series '{series_name}': {', '.join(str(v) for v in values)}")
109
+ elif categories:
110
+ result.append(f"Series '{series_name}' categories: {', '.join(str(c) for c in categories)}")
111
+ except Exception:
112
+ pass
113
+
114
+ # Fallback: try plots API for bar/line charts
115
+ if not result or (len(result) == 1 and "Chart Title" in result[0]):
116
+ try:
117
+ if hasattr(chart, 'plots') and chart.plots and len(chart.plots) > 0:
118
+ plot = chart.plots[0]
119
+ categories = []
120
+ if hasattr(plot, 'categories') and plot.categories:
121
+ categories = list(plot.categories)
122
+ if categories:
123
+ result.append(f"Categories: {', '.join(str(c) for c in categories)}")
124
+
125
+ # Extract series data from plot
126
+ for series in plot.series:
127
+ series_name = series.name if series.name else "Unnamed Series"
128
+ values = list(series.values) if series.values else []
129
+
130
+ if categories and len(categories) == len(values):
131
+ data_pairs = [f"{cat}: {val}" for cat, val in zip(categories, values)]
132
+ result.append(f"Series '{series_name}': {', '.join(data_pairs)}")
133
+ elif values:
134
+ result.append(f"Series '{series_name}': {', '.join(str(v) for v in values)}")
135
+ except Exception:
136
+ pass
137
+
138
+ # Final fallback: parse XML directly for unsupported chart types (e.g., pie3DChart)
139
+ if not result or (len(result) == 1 and "Chart Title" in result[0]):
140
+ try:
141
+ result.extend(self._extract_chart_from_xml(chart))
142
+ except Exception:
143
+ pass
144
+
145
+ # If we still have no data, add a note
146
+ if not result:
147
+ result.append("(Chart detected - there is no parsed data from this type of chart)")
148
+
149
+ return "\n**Chart:**\n" + "\n".join(result) + "\n"
150
+
151
+ def _extract_chart_from_xml(self, chart) -> list:
152
+ """Extract chart data by parsing the underlying XML directly."""
153
+ result = []
154
+
155
+ # Get the chart part XML
156
+ chart_part = chart.part
157
+ chart_element = chart_part.element
158
+
159
+ # Define namespaces used in chart XML
160
+ namespaces = {
161
+ 'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart',
162
+ 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
163
+ }
164
+
165
+ # Find all series (ser) elements
166
+ series_elements = chart_element.findall('.//c:ser', namespaces)
167
+
168
+ for ser in series_elements:
169
+ series_name = "Unnamed Series"
170
+ categories = []
171
+ values = []
172
+
173
+ # Extract series name from tx/v or tx/strRef
174
+ tx = ser.find('.//c:tx', namespaces)
175
+ if tx is not None:
176
+ v = tx.find('.//c:v', namespaces)
177
+ if v is not None and v.text:
178
+ series_name = v.text
179
+
180
+ # Extract category labels from c:cat
181
+ cat = ser.find('.//c:cat', namespaces)
182
+ if cat is not None:
183
+ # Try strRef first (string references)
184
+ str_cache = cat.find('.//c:strCache', namespaces)
185
+ if str_cache is not None:
186
+ for pt in str_cache.findall('.//c:pt', namespaces):
187
+ v = pt.find('c:v', namespaces)
188
+ if v is not None and v.text:
189
+ categories.append(v.text)
190
+
191
+ # Try numRef (numeric references used as categories)
192
+ if not categories:
193
+ num_cache = cat.find('.//c:numCache', namespaces)
194
+ if num_cache is not None:
195
+ for pt in num_cache.findall('.//c:pt', namespaces):
196
+ v = pt.find('c:v', namespaces)
197
+ if v is not None and v.text:
198
+ categories.append(v.text)
199
+
200
+ # Extract values from c:val
201
+ val = ser.find('.//c:val', namespaces)
202
+ if val is not None:
203
+ num_cache = val.find('.//c:numCache', namespaces)
204
+ if num_cache is not None:
205
+ for pt in num_cache.findall('.//c:pt', namespaces):
206
+ v = pt.find('c:v', namespaces)
207
+ if v is not None and v.text:
208
+ try:
209
+ values.append(float(v.text))
210
+ except ValueError:
211
+ values.append(v.text)
212
+
213
+ # Build output
214
+ if categories and values and len(categories) == len(values):
215
+ data_pairs = [f"{cat}: {val}" for cat, val in zip(categories, values)]
216
+ result.append(f"Series '{series_name}': {', '.join(data_pairs)}")
217
+ elif values:
218
+ result.append(f"Series '{series_name}': {', '.join(str(v) for v in values)}")
219
+ elif categories:
220
+ result.append(f"Series '{series_name}' categories: {', '.join(str(c) for c in categories)}")
221
+
222
+ return result
223
+
46
224
  def read_pptx_slide(self, slide, index):
47
225
  text_content = f'Slide: {index}\n'
48
226
  for shape in slide.shapes:
49
- if hasattr(shape, "text_frame") and shape.text_frame is not None:
227
+ # Handle tables
228
+ if shape.has_table:
229
+ text_content += self._extract_table_as_markdown(shape.table)
230
+ # Handle charts
231
+ elif shape.has_chart:
232
+ text_content += self._extract_chart_info(shape.chart)
233
+ # Handle images - check multiple ways images can be embedded
234
+ elif self.extract_images and self._is_image_shape(shape):
235
+ try:
236
+ image_blob = self._get_image_blob(shape)
237
+ if image_blob:
238
+ caption = perform_llm_prediction_for_image_bytes(image_blob, self.llm, self.prompt)
239
+ text_content += "\n**Image Transcript:**\n" + caption + "\n--------------------\n"
240
+ except Exception:
241
+ pass
242
+ # Handle text frames with hyperlinks
243
+ elif hasattr(shape, "text_frame") and shape.text_frame is not None:
50
244
  for paragraph in shape.text_frame.paragraphs:
51
245
  for run in paragraph.runs:
52
246
  if run.hyperlink and run.hyperlink.address:
@@ -56,14 +250,39 @@ class AlitaPowerPointLoader:
56
250
  else:
57
251
  text_content += run.text
58
252
  text_content += "\n"
59
- elif self.extract_images and shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
60
- try:
61
- caption = perform_llm_prediction_for_image_bytes(shape.image.blob, self.llm, self.prompt)
62
- except:
63
- caption = "unknown"
64
- text_content += "\n**Image Transcript:**\n" + caption + "\n--------------------\n"
65
253
  return text_content + "\n"
66
254
 
255
+ def _is_image_shape(self, shape) -> bool:
256
+ """Check if shape contains an image using multiple detection methods."""
257
+ # Method 1: Check shape type
258
+ if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
259
+ return True
260
+ # Method 2: Check if shape has image attribute with blob
261
+ if hasattr(shape, 'image') and shape.image is not None:
262
+ try:
263
+ if shape.image.blob:
264
+ return True
265
+ except Exception:
266
+ pass
267
+ # Method 3: Check for placeholder with image
268
+ if hasattr(shape, 'placeholder_format') and shape.placeholder_format is not None:
269
+ try:
270
+ if hasattr(shape, 'image') and shape.image is not None:
271
+ return True
272
+ except Exception:
273
+ pass
274
+ return False
275
+
276
+ def _get_image_blob(self, shape) -> bytes:
277
+ """Extract image blob from shape using available methods."""
278
+ # Try direct image access
279
+ if hasattr(shape, 'image') and shape.image is not None:
280
+ try:
281
+ return shape.image.blob
282
+ except Exception:
283
+ pass
284
+ return None
285
+
67
286
  def load(self):
68
287
  content = self.get_content()
69
288
  if isinstance(content, str):
@@ -58,9 +58,12 @@ class AlitaTextLoader(BaseLoader):
58
58
  else:
59
59
  raise ValueError("Neither file_path nor file_content is provided for encoding detection.")
60
60
  else:
61
- raise RuntimeError(f"Error loading content with encoding {self.encoding}.") from e
61
+ # Preserve original error details for callers
62
+ raise RuntimeError(f"Error loading content with encoding {self.encoding}: {e}") from e
62
63
  except Exception as e:
63
- raise RuntimeError(f"Error loading content.") from e
64
+ # Preserve original error details so higher-level code (e.g., parse_file_content)
65
+ # can expose the real root cause instead of a generic message.
66
+ raise RuntimeError(f"Error loading content: {e}") from e
64
67
 
65
68
  return text
66
69
 
@@ -21,12 +21,14 @@ from .AlitaDocxMammothLoader import AlitaDocxMammothLoader
21
21
  from .AlitaExcelLoader import AlitaExcelLoader
22
22
  from .AlitaImageLoader import AlitaImageLoader
23
23
  from .AlitaJSONLoader import AlitaJSONLoader
24
+ from .AlitaJSONLinesLoader import AlitaJSONLinesLoader
24
25
  from .AlitaPDFLoader import AlitaPDFLoader
25
26
  from .AlitaPowerPointLoader import AlitaPowerPointLoader
26
27
  from .AlitaTextLoader import AlitaTextLoader
27
28
  from .AlitaMarkdownLoader import AlitaMarkdownLoader
28
29
  from .AlitaPythonLoader import AlitaPythonLoader
29
30
  from enum import Enum
31
+ from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
30
32
 
31
33
 
32
34
  class LoaderProperties(Enum):
@@ -34,7 +36,7 @@ class LoaderProperties(Enum):
34
36
  PROMPT_DEFAULT = 'use_default_prompt'
35
37
  PROMPT = 'prompt'
36
38
 
37
- DEFAULT_ALLOWED_BASE = {'max_tokens': 512}
39
+ DEFAULT_ALLOWED_BASE = {'max_tokens': LOADER_MAX_TOKENS_DEFAULT}
38
40
 
39
41
  DEFAULT_ALLOWED_WITH_LLM = {
40
42
  **DEFAULT_ALLOWED_BASE,
@@ -43,6 +45,8 @@ DEFAULT_ALLOWED_WITH_LLM = {
43
45
  LoaderProperties.PROMPT.value: "",
44
46
  }
45
47
 
48
+ DEFAULT_ALLOWED_EXCEL = {**DEFAULT_ALLOWED_WITH_LLM, 'add_header_to_chunks': False, 'header_row_number': 1, 'max_tokens': -1, 'sheet_name': ''}
49
+
46
50
  # Image file loaders mapping - directly supported by LLM with image_url
47
51
  image_loaders_map = {
48
52
  '.png': {
@@ -162,11 +166,12 @@ document_loaders_map = {
162
166
  'spreadsheetml.sheet'),
163
167
  'is_multimodal_processing': False,
164
168
  'kwargs': {
165
- 'excel_by_sheets': True,
166
- 'raw_content': True,
167
- 'cleanse': False
169
+ 'add_header_to_chunks': False,
170
+ 'header_row_number': 1,
171
+ 'max_tokens': -1,
172
+ 'sheet_name': ''
168
173
  },
169
- 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
174
+ 'allowed_to_override': DEFAULT_ALLOWED_EXCEL
170
175
  },
171
176
  '.xls': {
172
177
  'class': AlitaExcelLoader,
@@ -177,7 +182,7 @@ document_loaders_map = {
177
182
  'raw_content': True,
178
183
  'cleanse': False
179
184
  },
180
- 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
185
+ 'allowed_to_override': DEFAULT_ALLOWED_EXCEL
181
186
  },
182
187
  '.pdf': {
183
188
  'class': AlitaPDFLoader,
@@ -204,7 +209,7 @@ document_loaders_map = {
204
209
  'allowed_to_override': DEFAULT_ALLOWED_BASE
205
210
  },
206
211
  '.jsonl': {
207
- 'class': AirbyteJSONLoader,
212
+ 'class': AlitaJSONLinesLoader,
208
213
  'mime_type': 'application/jsonl',
209
214
  'is_multimodal_processing': False,
210
215
  'kwargs': {},