alita-sdk 0.3.379__py3-none-any.whl → 0.3.627__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +156 -0
  6. alita_sdk/cli/agent_loader.py +245 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3113 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1073 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/testcases/__init__.py +94 -0
  23. alita_sdk/cli/testcases/data_generation.py +119 -0
  24. alita_sdk/cli/testcases/discovery.py +96 -0
  25. alita_sdk/cli/testcases/executor.py +84 -0
  26. alita_sdk/cli/testcases/logger.py +85 -0
  27. alita_sdk/cli/testcases/parser.py +172 -0
  28. alita_sdk/cli/testcases/prompts.py +91 -0
  29. alita_sdk/cli/testcases/reporting.py +125 -0
  30. alita_sdk/cli/testcases/setup.py +108 -0
  31. alita_sdk/cli/testcases/test_runner.py +282 -0
  32. alita_sdk/cli/testcases/utils.py +39 -0
  33. alita_sdk/cli/testcases/validation.py +90 -0
  34. alita_sdk/cli/testcases/workflow.py +196 -0
  35. alita_sdk/cli/toolkit.py +327 -0
  36. alita_sdk/cli/toolkit_loader.py +85 -0
  37. alita_sdk/cli/tools/__init__.py +43 -0
  38. alita_sdk/cli/tools/approval.py +224 -0
  39. alita_sdk/cli/tools/filesystem.py +1751 -0
  40. alita_sdk/cli/tools/planning.py +389 -0
  41. alita_sdk/cli/tools/terminal.py +414 -0
  42. alita_sdk/community/__init__.py +72 -12
  43. alita_sdk/community/inventory/__init__.py +236 -0
  44. alita_sdk/community/inventory/config.py +257 -0
  45. alita_sdk/community/inventory/enrichment.py +2137 -0
  46. alita_sdk/community/inventory/extractors.py +1469 -0
  47. alita_sdk/community/inventory/ingestion.py +3172 -0
  48. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  49. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  50. alita_sdk/community/inventory/parsers/base.py +295 -0
  51. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  52. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  53. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  54. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  55. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  56. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  57. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  58. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  59. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  60. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  61. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  62. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  63. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  64. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  65. alita_sdk/community/inventory/patterns/loader.py +348 -0
  66. alita_sdk/community/inventory/patterns/registry.py +198 -0
  67. alita_sdk/community/inventory/presets.py +535 -0
  68. alita_sdk/community/inventory/retrieval.py +1403 -0
  69. alita_sdk/community/inventory/toolkit.py +173 -0
  70. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  71. alita_sdk/community/inventory/visualize.py +1370 -0
  72. alita_sdk/configurations/__init__.py +1 -1
  73. alita_sdk/configurations/ado.py +141 -20
  74. alita_sdk/configurations/bitbucket.py +94 -2
  75. alita_sdk/configurations/confluence.py +130 -1
  76. alita_sdk/configurations/figma.py +76 -0
  77. alita_sdk/configurations/gitlab.py +91 -0
  78. alita_sdk/configurations/jira.py +103 -0
  79. alita_sdk/configurations/openapi.py +329 -0
  80. alita_sdk/configurations/qtest.py +72 -1
  81. alita_sdk/configurations/report_portal.py +96 -0
  82. alita_sdk/configurations/sharepoint.py +148 -0
  83. alita_sdk/configurations/testio.py +83 -0
  84. alita_sdk/configurations/testrail.py +88 -0
  85. alita_sdk/configurations/xray.py +93 -0
  86. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  87. alita_sdk/configurations/zephyr_essential.py +75 -0
  88. alita_sdk/runtime/clients/artifact.py +3 -3
  89. alita_sdk/runtime/clients/client.py +388 -46
  90. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  91. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  92. alita_sdk/runtime/clients/sandbox_client.py +8 -21
  93. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  94. alita_sdk/runtime/langchain/assistant.py +157 -39
  95. alita_sdk/runtime/langchain/constants.py +647 -1
  96. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  97. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  98. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  99. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -4
  100. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
  101. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
  102. alita_sdk/runtime/langchain/document_loaders/constants.py +40 -19
  103. alita_sdk/runtime/langchain/langraph_agent.py +405 -84
  104. alita_sdk/runtime/langchain/utils.py +106 -7
  105. alita_sdk/runtime/llms/preloaded.py +2 -6
  106. alita_sdk/runtime/models/mcp_models.py +61 -0
  107. alita_sdk/runtime/skills/__init__.py +91 -0
  108. alita_sdk/runtime/skills/callbacks.py +498 -0
  109. alita_sdk/runtime/skills/discovery.py +540 -0
  110. alita_sdk/runtime/skills/executor.py +610 -0
  111. alita_sdk/runtime/skills/input_builder.py +371 -0
  112. alita_sdk/runtime/skills/models.py +330 -0
  113. alita_sdk/runtime/skills/registry.py +355 -0
  114. alita_sdk/runtime/skills/skill_runner.py +330 -0
  115. alita_sdk/runtime/toolkits/__init__.py +31 -0
  116. alita_sdk/runtime/toolkits/application.py +29 -10
  117. alita_sdk/runtime/toolkits/artifact.py +20 -11
  118. alita_sdk/runtime/toolkits/datasource.py +13 -6
  119. alita_sdk/runtime/toolkits/mcp.py +783 -0
  120. alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
  121. alita_sdk/runtime/toolkits/planning.py +178 -0
  122. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  123. alita_sdk/runtime/toolkits/subgraph.py +251 -6
  124. alita_sdk/runtime/toolkits/tools.py +356 -69
  125. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  126. alita_sdk/runtime/tools/__init__.py +10 -3
  127. alita_sdk/runtime/tools/application.py +27 -6
  128. alita_sdk/runtime/tools/artifact.py +511 -28
  129. alita_sdk/runtime/tools/data_analysis.py +183 -0
  130. alita_sdk/runtime/tools/function.py +67 -35
  131. alita_sdk/runtime/tools/graph.py +10 -4
  132. alita_sdk/runtime/tools/image_generation.py +148 -46
  133. alita_sdk/runtime/tools/llm.py +1003 -128
  134. alita_sdk/runtime/tools/loop.py +3 -1
  135. alita_sdk/runtime/tools/loop_output.py +3 -1
  136. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  137. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  138. alita_sdk/runtime/tools/mcp_server_tool.py +8 -5
  139. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  140. alita_sdk/runtime/tools/planning/models.py +246 -0
  141. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  142. alita_sdk/runtime/tools/router.py +2 -4
  143. alita_sdk/runtime/tools/sandbox.py +65 -48
  144. alita_sdk/runtime/tools/skill_router.py +776 -0
  145. alita_sdk/runtime/tools/tool.py +3 -1
  146. alita_sdk/runtime/tools/vectorstore.py +9 -3
  147. alita_sdk/runtime/tools/vectorstore_base.py +70 -14
  148. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  149. alita_sdk/runtime/utils/constants.py +5 -1
  150. alita_sdk/runtime/utils/mcp_client.py +492 -0
  151. alita_sdk/runtime/utils/mcp_oauth.py +361 -0
  152. alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
  153. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  154. alita_sdk/runtime/utils/serialization.py +155 -0
  155. alita_sdk/runtime/utils/streamlit.py +40 -13
  156. alita_sdk/runtime/utils/toolkit_utils.py +30 -9
  157. alita_sdk/runtime/utils/utils.py +36 -0
  158. alita_sdk/tools/__init__.py +134 -35
  159. alita_sdk/tools/ado/repos/__init__.py +51 -32
  160. alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
  161. alita_sdk/tools/ado/test_plan/__init__.py +25 -9
  162. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
  163. alita_sdk/tools/ado/utils.py +1 -18
  164. alita_sdk/tools/ado/wiki/__init__.py +25 -12
  165. alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
  166. alita_sdk/tools/ado/work_item/__init__.py +26 -13
  167. alita_sdk/tools/ado/work_item/ado_wrapper.py +73 -11
  168. alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
  169. alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
  170. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  171. alita_sdk/tools/azure_ai/search/__init__.py +11 -8
  172. alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
  173. alita_sdk/tools/base/tool.py +5 -1
  174. alita_sdk/tools/base_indexer_toolkit.py +271 -84
  175. alita_sdk/tools/bitbucket/__init__.py +17 -11
  176. alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
  177. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
  178. alita_sdk/tools/browser/__init__.py +5 -4
  179. alita_sdk/tools/carrier/__init__.py +5 -6
  180. alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
  181. alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
  182. alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
  183. alita_sdk/tools/chunkers/__init__.py +3 -1
  184. alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
  185. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  186. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  187. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  188. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  189. alita_sdk/tools/cloud/aws/__init__.py +10 -7
  190. alita_sdk/tools/cloud/azure/__init__.py +10 -7
  191. alita_sdk/tools/cloud/gcp/__init__.py +10 -7
  192. alita_sdk/tools/cloud/k8s/__init__.py +10 -7
  193. alita_sdk/tools/code/linter/__init__.py +10 -8
  194. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  195. alita_sdk/tools/code/sonar/__init__.py +11 -8
  196. alita_sdk/tools/code_indexer_toolkit.py +82 -22
  197. alita_sdk/tools/confluence/__init__.py +22 -16
  198. alita_sdk/tools/confluence/api_wrapper.py +107 -30
  199. alita_sdk/tools/confluence/loader.py +14 -2
  200. alita_sdk/tools/custom_open_api/__init__.py +12 -5
  201. alita_sdk/tools/elastic/__init__.py +11 -8
  202. alita_sdk/tools/elitea_base.py +493 -30
  203. alita_sdk/tools/figma/__init__.py +58 -11
  204. alita_sdk/tools/figma/api_wrapper.py +1235 -143
  205. alita_sdk/tools/figma/figma_client.py +73 -0
  206. alita_sdk/tools/figma/toon_tools.py +2748 -0
  207. alita_sdk/tools/github/__init__.py +14 -15
  208. alita_sdk/tools/github/github_client.py +224 -100
  209. alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
  210. alita_sdk/tools/github/schemas.py +14 -5
  211. alita_sdk/tools/github/tool.py +5 -1
  212. alita_sdk/tools/github/tool_prompts.py +9 -22
  213. alita_sdk/tools/gitlab/__init__.py +16 -11
  214. alita_sdk/tools/gitlab/api_wrapper.py +218 -48
  215. alita_sdk/tools/gitlab_org/__init__.py +10 -9
  216. alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
  217. alita_sdk/tools/google/bigquery/__init__.py +13 -12
  218. alita_sdk/tools/google/bigquery/tool.py +5 -1
  219. alita_sdk/tools/google_places/__init__.py +11 -8
  220. alita_sdk/tools/google_places/api_wrapper.py +1 -1
  221. alita_sdk/tools/jira/__init__.py +17 -10
  222. alita_sdk/tools/jira/api_wrapper.py +92 -41
  223. alita_sdk/tools/keycloak/__init__.py +11 -8
  224. alita_sdk/tools/localgit/__init__.py +9 -3
  225. alita_sdk/tools/localgit/local_git.py +62 -54
  226. alita_sdk/tools/localgit/tool.py +5 -1
  227. alita_sdk/tools/memory/__init__.py +12 -4
  228. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  229. alita_sdk/tools/ocr/__init__.py +11 -8
  230. alita_sdk/tools/openapi/__init__.py +491 -106
  231. alita_sdk/tools/openapi/api_wrapper.py +1368 -0
  232. alita_sdk/tools/openapi/tool.py +20 -0
  233. alita_sdk/tools/pandas/__init__.py +20 -12
  234. alita_sdk/tools/pandas/api_wrapper.py +38 -25
  235. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  236. alita_sdk/tools/postman/__init__.py +10 -9
  237. alita_sdk/tools/pptx/__init__.py +11 -10
  238. alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
  239. alita_sdk/tools/qtest/__init__.py +31 -11
  240. alita_sdk/tools/qtest/api_wrapper.py +2135 -86
  241. alita_sdk/tools/rally/__init__.py +10 -9
  242. alita_sdk/tools/rally/api_wrapper.py +1 -1
  243. alita_sdk/tools/report_portal/__init__.py +12 -8
  244. alita_sdk/tools/salesforce/__init__.py +10 -8
  245. alita_sdk/tools/servicenow/__init__.py +17 -15
  246. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  247. alita_sdk/tools/sharepoint/__init__.py +10 -7
  248. alita_sdk/tools/sharepoint/api_wrapper.py +129 -38
  249. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  250. alita_sdk/tools/sharepoint/utils.py +8 -2
  251. alita_sdk/tools/slack/__init__.py +10 -7
  252. alita_sdk/tools/slack/api_wrapper.py +2 -2
  253. alita_sdk/tools/sql/__init__.py +12 -9
  254. alita_sdk/tools/testio/__init__.py +10 -7
  255. alita_sdk/tools/testrail/__init__.py +11 -10
  256. alita_sdk/tools/testrail/api_wrapper.py +1 -1
  257. alita_sdk/tools/utils/__init__.py +9 -4
  258. alita_sdk/tools/utils/content_parser.py +103 -18
  259. alita_sdk/tools/utils/text_operations.py +410 -0
  260. alita_sdk/tools/utils/tool_prompts.py +79 -0
  261. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +30 -13
  262. alita_sdk/tools/xray/__init__.py +13 -9
  263. alita_sdk/tools/yagmail/__init__.py +9 -3
  264. alita_sdk/tools/zephyr/__init__.py +10 -7
  265. alita_sdk/tools/zephyr_enterprise/__init__.py +11 -7
  266. alita_sdk/tools/zephyr_essential/__init__.py +10 -7
  267. alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
  268. alita_sdk/tools/zephyr_essential/client.py +2 -2
  269. alita_sdk/tools/zephyr_scale/__init__.py +11 -8
  270. alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
  271. alita_sdk/tools/zephyr_squad/__init__.py +10 -7
  272. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +154 -8
  273. alita_sdk-0.3.627.dist-info/RECORD +468 -0
  274. alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
  275. alita_sdk-0.3.379.dist-info/RECORD +0 -360
  276. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
  277. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
  278. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
@@ -30,7 +30,12 @@ class AlitaJSONLoader(BaseLoader):
30
30
  with open(self.file_path, encoding=self.encoding) as f:
31
31
  return json.load(f)
32
32
  elif hasattr(self, 'file_content') and self.file_content:
33
- return json.load(self.file_content)
33
+ if isinstance(self.file_content, bytes):
34
+ return json.loads(self.file_content.decode(self.encoding))
35
+ elif isinstance(self.file_content, str):
36
+ return json.loads(self.file_content)
37
+ else:
38
+ return json.load(self.file_content)
34
39
  else:
35
40
  raise ValueError("Neither file_path nor file_content is provided.")
36
41
 
@@ -42,7 +47,6 @@ class AlitaJSONLoader(BaseLoader):
42
47
  try:
43
48
  with open(self.file_path, encoding=encoding.encoding) as f:
44
49
  return f.read()
45
- break
46
50
  except UnicodeDecodeError:
47
51
  continue
48
52
  elif hasattr(self, 'file_content') and self.file_content:
@@ -55,9 +59,11 @@ class AlitaJSONLoader(BaseLoader):
55
59
  else:
56
60
  raise ValueError("Neither file_path nor file_content is provided for encoding detection.")
57
61
  else:
58
- raise RuntimeError(f"Error loading content with encoding {self.encoding}.") from e
62
+ raise RuntimeError(f"Error loading content with encoding {self.encoding}: {e}") from e
59
63
  except Exception as e:
60
- raise RuntimeError(f"Error loading content.") from e
64
+ # Preserve original error details so callers (e.g., parse_file_content)
65
+ # can expose the real root cause instead of a generic message.
66
+ raise RuntimeError(f"Error loading content: {e}") from e
61
67
 
62
68
  def lazy_load(self) -> Iterator[Document]:
63
69
  """Load from file path."""
@@ -6,6 +6,7 @@ from .utils import perform_llm_prediction_for_image_bytes, create_temp_file
6
6
  from pptx.enum.shapes import MSO_SHAPE_TYPE
7
7
  from langchain_core.documents import Document
8
8
 
9
+
9
10
  class AlitaPowerPointLoader:
10
11
 
11
12
  def __init__(self, file_path=None, file_content=None, mode=None, **unstructured_kwargs):
@@ -43,10 +44,203 @@ class AlitaPowerPointLoader:
43
44
  else:
44
45
  raise ToolException(f"Unknown mode value: {self.mode}. Only 'single', 'paged' values allowed.")
45
46
 
47
+ def _extract_table_as_markdown(self, table) -> str:
48
+ """Convert PPTX table to markdown format."""
49
+ if not table.rows:
50
+ return ""
51
+
52
+ rows = []
53
+ for row in table.rows:
54
+ cells = []
55
+ for cell in row.cells:
56
+ cell_text = cell.text.strip().replace("|", "\\|").replace("\n", " ")
57
+ cells.append(cell_text)
58
+ rows.append("| " + " | ".join(cells) + " |")
59
+
60
+ if len(rows) > 0:
61
+ # Add header separator after first row
62
+ num_cols = len(table.rows[0].cells)
63
+ header_sep = "| " + " | ".join(["---"] * num_cols) + " |"
64
+ rows.insert(1, header_sep)
65
+
66
+ return "\n**Table:**\n" + "\n".join(rows) + "\n"
67
+
68
+ def _extract_chart_info(self, chart) -> str:
69
+ """Extract data and labels from PPTX chart."""
70
+ result = []
71
+
72
+ # Extract chart title
73
+ try:
74
+ if chart.has_title and chart.chart_title.has_text_frame:
75
+ title_text = chart.chart_title.text_frame.text.strip()
76
+ if title_text:
77
+ result.append(f"Chart Title: {title_text}")
78
+ except Exception:
79
+ pass
80
+
81
+ # Try to extract series data directly from chart.series (works for some chart types)
82
+ try:
83
+ if hasattr(chart, 'series') and chart.series:
84
+ for series in chart.series:
85
+ series_name = series.name if series.name else "Unnamed Series"
86
+ values = []
87
+ categories = []
88
+
89
+ # Try to get values
90
+ try:
91
+ if hasattr(series, 'values') and series.values:
92
+ values = list(series.values)
93
+ except Exception:
94
+ pass
95
+
96
+ # Try to get categories from series
97
+ try:
98
+ if hasattr(series, 'categories') and series.categories:
99
+ categories = list(series.categories)
100
+ except Exception:
101
+ pass
102
+
103
+ # Build output
104
+ if categories and values and len(categories) == len(values):
105
+ data_pairs = [f"{cat}: {val}" for cat, val in zip(categories, values)]
106
+ result.append(f"Series '{series_name}': {', '.join(data_pairs)}")
107
+ elif values:
108
+ result.append(f"Series '{series_name}': {', '.join(str(v) for v in values)}")
109
+ elif categories:
110
+ result.append(f"Series '{series_name}' categories: {', '.join(str(c) for c in categories)}")
111
+ except Exception:
112
+ pass
113
+
114
+ # Fallback: try plots API for bar/line charts
115
+ if not result or (len(result) == 1 and "Chart Title" in result[0]):
116
+ try:
117
+ if hasattr(chart, 'plots') and chart.plots and len(chart.plots) > 0:
118
+ plot = chart.plots[0]
119
+ categories = []
120
+ if hasattr(plot, 'categories') and plot.categories:
121
+ categories = list(plot.categories)
122
+ if categories:
123
+ result.append(f"Categories: {', '.join(str(c) for c in categories)}")
124
+
125
+ # Extract series data from plot
126
+ for series in plot.series:
127
+ series_name = series.name if series.name else "Unnamed Series"
128
+ values = list(series.values) if series.values else []
129
+
130
+ if categories and len(categories) == len(values):
131
+ data_pairs = [f"{cat}: {val}" for cat, val in zip(categories, values)]
132
+ result.append(f"Series '{series_name}': {', '.join(data_pairs)}")
133
+ elif values:
134
+ result.append(f"Series '{series_name}': {', '.join(str(v) for v in values)}")
135
+ except Exception:
136
+ pass
137
+
138
+ # Final fallback: parse XML directly for unsupported chart types (e.g., pie3DChart)
139
+ if not result or (len(result) == 1 and "Chart Title" in result[0]):
140
+ try:
141
+ result.extend(self._extract_chart_from_xml(chart))
142
+ except Exception:
143
+ pass
144
+
145
+ # If we still have no data, add a note
146
+ if not result:
147
+ result.append("(Chart detected - there is no parsed data from this type of chart)")
148
+
149
+ return "\n**Chart:**\n" + "\n".join(result) + "\n"
150
+
151
+ def _extract_chart_from_xml(self, chart) -> list:
152
+ """Extract chart data by parsing the underlying XML directly."""
153
+ result = []
154
+
155
+ # Get the chart part XML
156
+ chart_part = chart.part
157
+ chart_element = chart_part.element
158
+
159
+ # Define namespaces used in chart XML
160
+ namespaces = {
161
+ 'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart',
162
+ 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
163
+ }
164
+
165
+ # Find all series (ser) elements
166
+ series_elements = chart_element.findall('.//c:ser', namespaces)
167
+
168
+ for ser in series_elements:
169
+ series_name = "Unnamed Series"
170
+ categories = []
171
+ values = []
172
+
173
+ # Extract series name from tx/v or tx/strRef
174
+ tx = ser.find('.//c:tx', namespaces)
175
+ if tx is not None:
176
+ v = tx.find('.//c:v', namespaces)
177
+ if v is not None and v.text:
178
+ series_name = v.text
179
+
180
+ # Extract category labels from c:cat
181
+ cat = ser.find('.//c:cat', namespaces)
182
+ if cat is not None:
183
+ # Try strRef first (string references)
184
+ str_cache = cat.find('.//c:strCache', namespaces)
185
+ if str_cache is not None:
186
+ for pt in str_cache.findall('.//c:pt', namespaces):
187
+ v = pt.find('c:v', namespaces)
188
+ if v is not None and v.text:
189
+ categories.append(v.text)
190
+
191
+ # Try numRef (numeric references used as categories)
192
+ if not categories:
193
+ num_cache = cat.find('.//c:numCache', namespaces)
194
+ if num_cache is not None:
195
+ for pt in num_cache.findall('.//c:pt', namespaces):
196
+ v = pt.find('c:v', namespaces)
197
+ if v is not None and v.text:
198
+ categories.append(v.text)
199
+
200
+ # Extract values from c:val
201
+ val = ser.find('.//c:val', namespaces)
202
+ if val is not None:
203
+ num_cache = val.find('.//c:numCache', namespaces)
204
+ if num_cache is not None:
205
+ for pt in num_cache.findall('.//c:pt', namespaces):
206
+ v = pt.find('c:v', namespaces)
207
+ if v is not None and v.text:
208
+ try:
209
+ values.append(float(v.text))
210
+ except ValueError:
211
+ values.append(v.text)
212
+
213
+ # Build output
214
+ if categories and values and len(categories) == len(values):
215
+ data_pairs = [f"{cat}: {val}" for cat, val in zip(categories, values)]
216
+ result.append(f"Series '{series_name}': {', '.join(data_pairs)}")
217
+ elif values:
218
+ result.append(f"Series '{series_name}': {', '.join(str(v) for v in values)}")
219
+ elif categories:
220
+ result.append(f"Series '{series_name}' categories: {', '.join(str(c) for c in categories)}")
221
+
222
+ return result
223
+
46
224
  def read_pptx_slide(self, slide, index):
47
225
  text_content = f'Slide: {index}\n'
48
226
  for shape in slide.shapes:
49
- if hasattr(shape, "text_frame") and shape.text_frame is not None:
227
+ # Handle tables
228
+ if shape.has_table:
229
+ text_content += self._extract_table_as_markdown(shape.table)
230
+ # Handle charts
231
+ elif shape.has_chart:
232
+ text_content += self._extract_chart_info(shape.chart)
233
+ # Handle images - check multiple ways images can be embedded
234
+ elif self.extract_images and self._is_image_shape(shape):
235
+ try:
236
+ image_blob = self._get_image_blob(shape)
237
+ if image_blob:
238
+ caption = perform_llm_prediction_for_image_bytes(image_blob, self.llm, self.prompt)
239
+ text_content += "\n**Image Transcript:**\n" + caption + "\n--------------------\n"
240
+ except Exception:
241
+ pass
242
+ # Handle text frames with hyperlinks
243
+ elif hasattr(shape, "text_frame") and shape.text_frame is not None:
50
244
  for paragraph in shape.text_frame.paragraphs:
51
245
  for run in paragraph.runs:
52
246
  if run.hyperlink and run.hyperlink.address:
@@ -56,14 +250,39 @@ class AlitaPowerPointLoader:
56
250
  else:
57
251
  text_content += run.text
58
252
  text_content += "\n"
59
- elif self.extract_images and shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
60
- try:
61
- caption = perform_llm_prediction_for_image_bytes(shape.image.blob, self.llm, self.prompt)
62
- except:
63
- caption = "unknown"
64
- text_content += "\n**Image Transcript:**\n" + caption + "\n--------------------\n"
65
253
  return text_content + "\n"
66
254
 
255
+ def _is_image_shape(self, shape) -> bool:
256
+ """Check if shape contains an image using multiple detection methods."""
257
+ # Method 1: Check shape type
258
+ if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
259
+ return True
260
+ # Method 2: Check if shape has image attribute with blob
261
+ if hasattr(shape, 'image') and shape.image is not None:
262
+ try:
263
+ if shape.image.blob:
264
+ return True
265
+ except Exception:
266
+ pass
267
+ # Method 3: Check for placeholder with image
268
+ if hasattr(shape, 'placeholder_format') and shape.placeholder_format is not None:
269
+ try:
270
+ if hasattr(shape, 'image') and shape.image is not None:
271
+ return True
272
+ except Exception:
273
+ pass
274
+ return False
275
+
276
+ def _get_image_blob(self, shape) -> bytes:
277
+ """Extract image blob from shape using available methods."""
278
+ # Try direct image access
279
+ if hasattr(shape, 'image') and shape.image is not None:
280
+ try:
281
+ return shape.image.blob
282
+ except Exception:
283
+ pass
284
+ return None
285
+
67
286
  def load(self):
68
287
  content = self.get_content()
69
288
  if isinstance(content, str):
@@ -58,9 +58,12 @@ class AlitaTextLoader(BaseLoader):
58
58
  else:
59
59
  raise ValueError("Neither file_path nor file_content is provided for encoding detection.")
60
60
  else:
61
- raise RuntimeError(f"Error loading content with encoding {self.encoding}.") from e
61
+ # Preserve original error details for callers
62
+ raise RuntimeError(f"Error loading content with encoding {self.encoding}: {e}") from e
62
63
  except Exception as e:
63
- raise RuntimeError(f"Error loading content.") from e
64
+ # Preserve original error details so higher-level code (e.g., parse_file_content)
65
+ # can expose the real root cause instead of a generic message.
66
+ raise RuntimeError(f"Error loading content: {e}") from e
64
67
 
65
68
  return text
66
69
 
@@ -21,12 +21,14 @@ from .AlitaDocxMammothLoader import AlitaDocxMammothLoader
21
21
  from .AlitaExcelLoader import AlitaExcelLoader
22
22
  from .AlitaImageLoader import AlitaImageLoader
23
23
  from .AlitaJSONLoader import AlitaJSONLoader
24
+ from .AlitaJSONLinesLoader import AlitaJSONLinesLoader
24
25
  from .AlitaPDFLoader import AlitaPDFLoader
25
26
  from .AlitaPowerPointLoader import AlitaPowerPointLoader
26
27
  from .AlitaTextLoader import AlitaTextLoader
27
28
  from .AlitaMarkdownLoader import AlitaMarkdownLoader
28
29
  from .AlitaPythonLoader import AlitaPythonLoader
29
30
  from enum import Enum
31
+ from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
30
32
 
31
33
 
32
34
  class LoaderProperties(Enum):
@@ -34,7 +36,7 @@ class LoaderProperties(Enum):
34
36
  PROMPT_DEFAULT = 'use_default_prompt'
35
37
  PROMPT = 'prompt'
36
38
 
37
- DEFAULT_ALLOWED_BASE = {'max_tokens': 512}
39
+ DEFAULT_ALLOWED_BASE = {'max_tokens': LOADER_MAX_TOKENS_DEFAULT}
38
40
 
39
41
  DEFAULT_ALLOWED_WITH_LLM = {
40
42
  **DEFAULT_ALLOWED_BASE,
@@ -43,7 +45,9 @@ DEFAULT_ALLOWED_WITH_LLM = {
43
45
  LoaderProperties.PROMPT.value: "",
44
46
  }
45
47
 
46
- # Image file loaders mapping
48
+ DEFAULT_ALLOWED_EXCEL = {**DEFAULT_ALLOWED_WITH_LLM, 'add_header_to_chunks': False, 'header_row_number': 1, 'max_tokens': -1, 'sheet_name': ''}
49
+
50
+ # Image file loaders mapping - directly supported by LLM with image_url
47
51
  image_loaders_map = {
48
52
  '.png': {
49
53
  'class': AlitaImageLoader,
@@ -73,6 +77,17 @@ image_loaders_map = {
73
77
  'kwargs': {},
74
78
  'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
75
79
  },
80
+ '.webp': {
81
+ 'class': AlitaImageLoader,
82
+ 'mime_type': 'image/webp',
83
+ 'is_multimodal_processing': True,
84
+ 'kwargs': {},
85
+ 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
86
+ }
87
+ }
88
+
89
+ # Image file loaders mapping - require conversion before sending to LLM
90
+ image_loaders_map_converted = {
76
91
  '.bmp': {
77
92
  'class': AlitaImageLoader,
78
93
  'mime_type': 'image/bmp',
@@ -102,7 +117,7 @@ document_loaders_map = {
102
117
  },
103
118
  '.yml': {
104
119
  'class': AlitaTextLoader,
105
- 'mime_type': 'application/x-yaml',
120
+ 'mime_type': 'application/yaml',
106
121
  'is_multimodal_processing': False,
107
122
  'kwargs': {
108
123
  'autodetect_encoding': True
@@ -111,7 +126,7 @@ document_loaders_map = {
111
126
  },
112
127
  '.yaml': {
113
128
  'class': AlitaTextLoader,
114
- 'mime_type': 'application/x-yaml',
129
+ 'mime_type': 'application/yaml',
115
130
  'is_multimodal_processing': False,
116
131
  'kwargs': {
117
132
  'autodetect_encoding': True
@@ -151,11 +166,12 @@ document_loaders_map = {
151
166
  'spreadsheetml.sheet'),
152
167
  'is_multimodal_processing': False,
153
168
  'kwargs': {
154
- 'excel_by_sheets': True,
155
- 'raw_content': True,
156
- 'cleanse': False
169
+ 'add_header_to_chunks': False,
170
+ 'header_row_number': 1,
171
+ 'max_tokens': -1,
172
+ 'sheet_name': ''
157
173
  },
158
- 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
174
+ 'allowed_to_override': DEFAULT_ALLOWED_EXCEL
159
175
  },
160
176
  '.xls': {
161
177
  'class': AlitaExcelLoader,
@@ -166,7 +182,7 @@ document_loaders_map = {
166
182
  'raw_content': True,
167
183
  'cleanse': False
168
184
  },
169
- 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
185
+ 'allowed_to_override': DEFAULT_ALLOWED_EXCEL
170
186
  },
171
187
  '.pdf': {
172
188
  'class': AlitaPDFLoader,
@@ -193,7 +209,7 @@ document_loaders_map = {
193
209
  'allowed_to_override': DEFAULT_ALLOWED_BASE
194
210
  },
195
211
  '.jsonl': {
196
- 'class': AirbyteJSONLoader,
212
+ 'class': AlitaJSONLinesLoader,
197
213
  'mime_type': 'application/jsonl',
198
214
  'is_multimodal_processing': False,
199
215
  'kwargs': {},
@@ -244,17 +260,17 @@ document_loaders_map = {
244
260
  'extract_images': False,
245
261
  }
246
262
  },
247
- '.py': {
248
- 'class': AlitaPythonLoader,
249
- 'mime_type': 'text/x-python',
250
- 'is_multimodal_processing': False,
251
- 'kwargs': {},
252
- 'allowed_to_override': DEFAULT_ALLOWED_BASE
253
- }
263
+ # '.py': {
264
+ # 'class': AlitaPythonLoader,
265
+ # 'mime_type': 'text/x-python',
266
+ # 'is_multimodal_processing': False,
267
+ # 'kwargs': {},
268
+ # 'allowed_to_override': DEFAULT_ALLOWED_BASE
269
+ # }
254
270
  }
255
271
 
256
272
  code_extensions = [
257
- # '.py', # Python
273
+ '.py', # Python
258
274
  '.js', # JavaScript
259
275
  '.ts', # TypeScript
260
276
  '.java', # Java
@@ -292,7 +308,12 @@ default_loader_config = {
292
308
  code_loaders_map = {ext: default_loader_config for ext in code_extensions}
293
309
 
294
310
  # Combined mapping for backward compatibility
295
- loaders_map = {**image_loaders_map, **document_loaders_map, **code_loaders_map}
311
+ loaders_map = {
312
+ **image_loaders_map,
313
+ **image_loaders_map_converted,
314
+ **document_loaders_map,
315
+ **code_loaders_map
316
+ }
296
317
 
297
318
  loaders_allowed_to_override = {
298
319
  extension: config.get('allowed_to_override')