alita-sdk 0.3.379__py3-none-any.whl → 0.3.627__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +156 -0
- alita_sdk/cli/agent_loader.py +245 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3113 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/testcases/__init__.py +94 -0
- alita_sdk/cli/testcases/data_generation.py +119 -0
- alita_sdk/cli/testcases/discovery.py +96 -0
- alita_sdk/cli/testcases/executor.py +84 -0
- alita_sdk/cli/testcases/logger.py +85 -0
- alita_sdk/cli/testcases/parser.py +172 -0
- alita_sdk/cli/testcases/prompts.py +91 -0
- alita_sdk/cli/testcases/reporting.py +125 -0
- alita_sdk/cli/testcases/setup.py +108 -0
- alita_sdk/cli/testcases/test_runner.py +282 -0
- alita_sdk/cli/testcases/utils.py +39 -0
- alita_sdk/cli/testcases/validation.py +90 -0
- alita_sdk/cli/testcases/workflow.py +196 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +1 -1
- alita_sdk/configurations/ado.py +141 -20
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/confluence.py +130 -1
- alita_sdk/configurations/figma.py +76 -0
- alita_sdk/configurations/gitlab.py +91 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/openapi.py +329 -0
- alita_sdk/configurations/qtest.py +72 -1
- alita_sdk/configurations/report_portal.py +96 -0
- alita_sdk/configurations/sharepoint.py +148 -0
- alita_sdk/configurations/testio.py +83 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/artifact.py +3 -3
- alita_sdk/runtime/clients/client.py +388 -46
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +8 -21
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +157 -39
- alita_sdk/runtime/langchain/constants.py +647 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
- alita_sdk/runtime/langchain/document_loaders/constants.py +40 -19
- alita_sdk/runtime/langchain/langraph_agent.py +405 -84
- alita_sdk/runtime/langchain/utils.py +106 -7
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +31 -0
- alita_sdk/runtime/toolkits/application.py +29 -10
- alita_sdk/runtime/toolkits/artifact.py +20 -11
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +783 -0
- alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +251 -6
- alita_sdk/runtime/toolkits/tools.py +356 -69
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +10 -3
- alita_sdk/runtime/tools/application.py +27 -6
- alita_sdk/runtime/tools/artifact.py +511 -28
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +67 -35
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +148 -46
- alita_sdk/runtime/tools/llm.py +1003 -128
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +8 -5
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -4
- alita_sdk/runtime/tools/sandbox.py +65 -48
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +9 -3
- alita_sdk/runtime/tools/vectorstore_base.py +70 -14
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/constants.py +5 -1
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +361 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/serialization.py +155 -0
- alita_sdk/runtime/utils/streamlit.py +40 -13
- alita_sdk/runtime/utils/toolkit_utils.py +30 -9
- alita_sdk/runtime/utils/utils.py +36 -0
- alita_sdk/tools/__init__.py +134 -35
- alita_sdk/tools/ado/repos/__init__.py +51 -32
- alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
- alita_sdk/tools/ado/test_plan/__init__.py +25 -9
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
- alita_sdk/tools/ado/utils.py +1 -18
- alita_sdk/tools/ado/wiki/__init__.py +25 -12
- alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
- alita_sdk/tools/ado/work_item/__init__.py +26 -13
- alita_sdk/tools/ado/work_item/ado_wrapper.py +73 -11
- alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +11 -8
- alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +271 -84
- alita_sdk/tools/bitbucket/__init__.py +17 -11
- alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
- alita_sdk/tools/browser/__init__.py +5 -4
- alita_sdk/tools/carrier/__init__.py +5 -6
- alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
- alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
- alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +10 -7
- alita_sdk/tools/cloud/azure/__init__.py +10 -7
- alita_sdk/tools/cloud/gcp/__init__.py +10 -7
- alita_sdk/tools/cloud/k8s/__init__.py +10 -7
- alita_sdk/tools/code/linter/__init__.py +10 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +11 -8
- alita_sdk/tools/code_indexer_toolkit.py +82 -22
- alita_sdk/tools/confluence/__init__.py +22 -16
- alita_sdk/tools/confluence/api_wrapper.py +107 -30
- alita_sdk/tools/confluence/loader.py +14 -2
- alita_sdk/tools/custom_open_api/__init__.py +12 -5
- alita_sdk/tools/elastic/__init__.py +11 -8
- alita_sdk/tools/elitea_base.py +493 -30
- alita_sdk/tools/figma/__init__.py +58 -11
- alita_sdk/tools/figma/api_wrapper.py +1235 -143
- alita_sdk/tools/figma/figma_client.py +73 -0
- alita_sdk/tools/figma/toon_tools.py +2748 -0
- alita_sdk/tools/github/__init__.py +14 -15
- alita_sdk/tools/github/github_client.py +224 -100
- alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
- alita_sdk/tools/github/schemas.py +14 -5
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/github/tool_prompts.py +9 -22
- alita_sdk/tools/gitlab/__init__.py +16 -11
- alita_sdk/tools/gitlab/api_wrapper.py +218 -48
- alita_sdk/tools/gitlab_org/__init__.py +10 -9
- alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
- alita_sdk/tools/google/bigquery/__init__.py +13 -12
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +11 -8
- alita_sdk/tools/google_places/api_wrapper.py +1 -1
- alita_sdk/tools/jira/__init__.py +17 -10
- alita_sdk/tools/jira/api_wrapper.py +92 -41
- alita_sdk/tools/keycloak/__init__.py +11 -8
- alita_sdk/tools/localgit/__init__.py +9 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +12 -4
- alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
- alita_sdk/tools/ocr/__init__.py +11 -8
- alita_sdk/tools/openapi/__init__.py +491 -106
- alita_sdk/tools/openapi/api_wrapper.py +1368 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +20 -12
- alita_sdk/tools/pandas/api_wrapper.py +38 -25
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +10 -9
- alita_sdk/tools/pptx/__init__.py +11 -10
- alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
- alita_sdk/tools/qtest/__init__.py +31 -11
- alita_sdk/tools/qtest/api_wrapper.py +2135 -86
- alita_sdk/tools/rally/__init__.py +10 -9
- alita_sdk/tools/rally/api_wrapper.py +1 -1
- alita_sdk/tools/report_portal/__init__.py +12 -8
- alita_sdk/tools/salesforce/__init__.py +10 -8
- alita_sdk/tools/servicenow/__init__.py +17 -15
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +10 -7
- alita_sdk/tools/sharepoint/api_wrapper.py +129 -38
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +10 -7
- alita_sdk/tools/slack/api_wrapper.py +2 -2
- alita_sdk/tools/sql/__init__.py +12 -9
- alita_sdk/tools/testio/__init__.py +10 -7
- alita_sdk/tools/testrail/__init__.py +11 -10
- alita_sdk/tools/testrail/api_wrapper.py +1 -1
- alita_sdk/tools/utils/__init__.py +9 -4
- alita_sdk/tools/utils/content_parser.py +103 -18
- alita_sdk/tools/utils/text_operations.py +410 -0
- alita_sdk/tools/utils/tool_prompts.py +79 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +30 -13
- alita_sdk/tools/xray/__init__.py +13 -9
- alita_sdk/tools/yagmail/__init__.py +9 -3
- alita_sdk/tools/zephyr/__init__.py +10 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +11 -7
- alita_sdk/tools/zephyr_essential/__init__.py +10 -7
- alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
- alita_sdk/tools/zephyr_essential/client.py +2 -2
- alita_sdk/tools/zephyr_scale/__init__.py +11 -8
- alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
- alita_sdk/tools/zephyr_squad/__init__.py +10 -7
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +154 -8
- alita_sdk-0.3.627.dist-info/RECORD +468 -0
- alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
- alita_sdk-0.3.379.dist-info/RECORD +0 -360
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
|
@@ -30,7 +30,12 @@ class AlitaJSONLoader(BaseLoader):
|
|
|
30
30
|
with open(self.file_path, encoding=self.encoding) as f:
|
|
31
31
|
return json.load(f)
|
|
32
32
|
elif hasattr(self, 'file_content') and self.file_content:
|
|
33
|
-
|
|
33
|
+
if isinstance(self.file_content, bytes):
|
|
34
|
+
return json.loads(self.file_content.decode(self.encoding))
|
|
35
|
+
elif isinstance(self.file_content, str):
|
|
36
|
+
return json.loads(self.file_content)
|
|
37
|
+
else:
|
|
38
|
+
return json.load(self.file_content)
|
|
34
39
|
else:
|
|
35
40
|
raise ValueError("Neither file_path nor file_content is provided.")
|
|
36
41
|
|
|
@@ -42,7 +47,6 @@ class AlitaJSONLoader(BaseLoader):
|
|
|
42
47
|
try:
|
|
43
48
|
with open(self.file_path, encoding=encoding.encoding) as f:
|
|
44
49
|
return f.read()
|
|
45
|
-
break
|
|
46
50
|
except UnicodeDecodeError:
|
|
47
51
|
continue
|
|
48
52
|
elif hasattr(self, 'file_content') and self.file_content:
|
|
@@ -55,9 +59,11 @@ class AlitaJSONLoader(BaseLoader):
|
|
|
55
59
|
else:
|
|
56
60
|
raise ValueError("Neither file_path nor file_content is provided for encoding detection.")
|
|
57
61
|
else:
|
|
58
|
-
raise RuntimeError(f"Error loading content with encoding {self.encoding}
|
|
62
|
+
raise RuntimeError(f"Error loading content with encoding {self.encoding}: {e}") from e
|
|
59
63
|
except Exception as e:
|
|
60
|
-
|
|
64
|
+
# Preserve original error details so callers (e.g., parse_file_content)
|
|
65
|
+
# can expose the real root cause instead of a generic message.
|
|
66
|
+
raise RuntimeError(f"Error loading content: {e}") from e
|
|
61
67
|
|
|
62
68
|
def lazy_load(self) -> Iterator[Document]:
|
|
63
69
|
"""Load from file path."""
|
|
@@ -6,6 +6,7 @@ from .utils import perform_llm_prediction_for_image_bytes, create_temp_file
|
|
|
6
6
|
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
|
7
7
|
from langchain_core.documents import Document
|
|
8
8
|
|
|
9
|
+
|
|
9
10
|
class AlitaPowerPointLoader:
|
|
10
11
|
|
|
11
12
|
def __init__(self, file_path=None, file_content=None, mode=None, **unstructured_kwargs):
|
|
@@ -43,10 +44,203 @@ class AlitaPowerPointLoader:
|
|
|
43
44
|
else:
|
|
44
45
|
raise ToolException(f"Unknown mode value: {self.mode}. Only 'single', 'paged' values allowed.")
|
|
45
46
|
|
|
47
|
+
def _extract_table_as_markdown(self, table) -> str:
|
|
48
|
+
"""Convert PPTX table to markdown format."""
|
|
49
|
+
if not table.rows:
|
|
50
|
+
return ""
|
|
51
|
+
|
|
52
|
+
rows = []
|
|
53
|
+
for row in table.rows:
|
|
54
|
+
cells = []
|
|
55
|
+
for cell in row.cells:
|
|
56
|
+
cell_text = cell.text.strip().replace("|", "\\|").replace("\n", " ")
|
|
57
|
+
cells.append(cell_text)
|
|
58
|
+
rows.append("| " + " | ".join(cells) + " |")
|
|
59
|
+
|
|
60
|
+
if len(rows) > 0:
|
|
61
|
+
# Add header separator after first row
|
|
62
|
+
num_cols = len(table.rows[0].cells)
|
|
63
|
+
header_sep = "| " + " | ".join(["---"] * num_cols) + " |"
|
|
64
|
+
rows.insert(1, header_sep)
|
|
65
|
+
|
|
66
|
+
return "\n**Table:**\n" + "\n".join(rows) + "\n"
|
|
67
|
+
|
|
68
|
+
def _extract_chart_info(self, chart) -> str:
|
|
69
|
+
"""Extract data and labels from PPTX chart."""
|
|
70
|
+
result = []
|
|
71
|
+
|
|
72
|
+
# Extract chart title
|
|
73
|
+
try:
|
|
74
|
+
if chart.has_title and chart.chart_title.has_text_frame:
|
|
75
|
+
title_text = chart.chart_title.text_frame.text.strip()
|
|
76
|
+
if title_text:
|
|
77
|
+
result.append(f"Chart Title: {title_text}")
|
|
78
|
+
except Exception:
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
# Try to extract series data directly from chart.series (works for some chart types)
|
|
82
|
+
try:
|
|
83
|
+
if hasattr(chart, 'series') and chart.series:
|
|
84
|
+
for series in chart.series:
|
|
85
|
+
series_name = series.name if series.name else "Unnamed Series"
|
|
86
|
+
values = []
|
|
87
|
+
categories = []
|
|
88
|
+
|
|
89
|
+
# Try to get values
|
|
90
|
+
try:
|
|
91
|
+
if hasattr(series, 'values') and series.values:
|
|
92
|
+
values = list(series.values)
|
|
93
|
+
except Exception:
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
# Try to get categories from series
|
|
97
|
+
try:
|
|
98
|
+
if hasattr(series, 'categories') and series.categories:
|
|
99
|
+
categories = list(series.categories)
|
|
100
|
+
except Exception:
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
# Build output
|
|
104
|
+
if categories and values and len(categories) == len(values):
|
|
105
|
+
data_pairs = [f"{cat}: {val}" for cat, val in zip(categories, values)]
|
|
106
|
+
result.append(f"Series '{series_name}': {', '.join(data_pairs)}")
|
|
107
|
+
elif values:
|
|
108
|
+
result.append(f"Series '{series_name}': {', '.join(str(v) for v in values)}")
|
|
109
|
+
elif categories:
|
|
110
|
+
result.append(f"Series '{series_name}' categories: {', '.join(str(c) for c in categories)}")
|
|
111
|
+
except Exception:
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
# Fallback: try plots API for bar/line charts
|
|
115
|
+
if not result or (len(result) == 1 and "Chart Title" in result[0]):
|
|
116
|
+
try:
|
|
117
|
+
if hasattr(chart, 'plots') and chart.plots and len(chart.plots) > 0:
|
|
118
|
+
plot = chart.plots[0]
|
|
119
|
+
categories = []
|
|
120
|
+
if hasattr(plot, 'categories') and plot.categories:
|
|
121
|
+
categories = list(plot.categories)
|
|
122
|
+
if categories:
|
|
123
|
+
result.append(f"Categories: {', '.join(str(c) for c in categories)}")
|
|
124
|
+
|
|
125
|
+
# Extract series data from plot
|
|
126
|
+
for series in plot.series:
|
|
127
|
+
series_name = series.name if series.name else "Unnamed Series"
|
|
128
|
+
values = list(series.values) if series.values else []
|
|
129
|
+
|
|
130
|
+
if categories and len(categories) == len(values):
|
|
131
|
+
data_pairs = [f"{cat}: {val}" for cat, val in zip(categories, values)]
|
|
132
|
+
result.append(f"Series '{series_name}': {', '.join(data_pairs)}")
|
|
133
|
+
elif values:
|
|
134
|
+
result.append(f"Series '{series_name}': {', '.join(str(v) for v in values)}")
|
|
135
|
+
except Exception:
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
# Final fallback: parse XML directly for unsupported chart types (e.g., pie3DChart)
|
|
139
|
+
if not result or (len(result) == 1 and "Chart Title" in result[0]):
|
|
140
|
+
try:
|
|
141
|
+
result.extend(self._extract_chart_from_xml(chart))
|
|
142
|
+
except Exception:
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
# If we still have no data, add a note
|
|
146
|
+
if not result:
|
|
147
|
+
result.append("(Chart detected - there is no parsed data from this type of chart)")
|
|
148
|
+
|
|
149
|
+
return "\n**Chart:**\n" + "\n".join(result) + "\n"
|
|
150
|
+
|
|
151
|
+
def _extract_chart_from_xml(self, chart) -> list:
|
|
152
|
+
"""Extract chart data by parsing the underlying XML directly."""
|
|
153
|
+
result = []
|
|
154
|
+
|
|
155
|
+
# Get the chart part XML
|
|
156
|
+
chart_part = chart.part
|
|
157
|
+
chart_element = chart_part.element
|
|
158
|
+
|
|
159
|
+
# Define namespaces used in chart XML
|
|
160
|
+
namespaces = {
|
|
161
|
+
'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart',
|
|
162
|
+
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
# Find all series (ser) elements
|
|
166
|
+
series_elements = chart_element.findall('.//c:ser', namespaces)
|
|
167
|
+
|
|
168
|
+
for ser in series_elements:
|
|
169
|
+
series_name = "Unnamed Series"
|
|
170
|
+
categories = []
|
|
171
|
+
values = []
|
|
172
|
+
|
|
173
|
+
# Extract series name from tx/v or tx/strRef
|
|
174
|
+
tx = ser.find('.//c:tx', namespaces)
|
|
175
|
+
if tx is not None:
|
|
176
|
+
v = tx.find('.//c:v', namespaces)
|
|
177
|
+
if v is not None and v.text:
|
|
178
|
+
series_name = v.text
|
|
179
|
+
|
|
180
|
+
# Extract category labels from c:cat
|
|
181
|
+
cat = ser.find('.//c:cat', namespaces)
|
|
182
|
+
if cat is not None:
|
|
183
|
+
# Try strRef first (string references)
|
|
184
|
+
str_cache = cat.find('.//c:strCache', namespaces)
|
|
185
|
+
if str_cache is not None:
|
|
186
|
+
for pt in str_cache.findall('.//c:pt', namespaces):
|
|
187
|
+
v = pt.find('c:v', namespaces)
|
|
188
|
+
if v is not None and v.text:
|
|
189
|
+
categories.append(v.text)
|
|
190
|
+
|
|
191
|
+
# Try numRef (numeric references used as categories)
|
|
192
|
+
if not categories:
|
|
193
|
+
num_cache = cat.find('.//c:numCache', namespaces)
|
|
194
|
+
if num_cache is not None:
|
|
195
|
+
for pt in num_cache.findall('.//c:pt', namespaces):
|
|
196
|
+
v = pt.find('c:v', namespaces)
|
|
197
|
+
if v is not None and v.text:
|
|
198
|
+
categories.append(v.text)
|
|
199
|
+
|
|
200
|
+
# Extract values from c:val
|
|
201
|
+
val = ser.find('.//c:val', namespaces)
|
|
202
|
+
if val is not None:
|
|
203
|
+
num_cache = val.find('.//c:numCache', namespaces)
|
|
204
|
+
if num_cache is not None:
|
|
205
|
+
for pt in num_cache.findall('.//c:pt', namespaces):
|
|
206
|
+
v = pt.find('c:v', namespaces)
|
|
207
|
+
if v is not None and v.text:
|
|
208
|
+
try:
|
|
209
|
+
values.append(float(v.text))
|
|
210
|
+
except ValueError:
|
|
211
|
+
values.append(v.text)
|
|
212
|
+
|
|
213
|
+
# Build output
|
|
214
|
+
if categories and values and len(categories) == len(values):
|
|
215
|
+
data_pairs = [f"{cat}: {val}" for cat, val in zip(categories, values)]
|
|
216
|
+
result.append(f"Series '{series_name}': {', '.join(data_pairs)}")
|
|
217
|
+
elif values:
|
|
218
|
+
result.append(f"Series '{series_name}': {', '.join(str(v) for v in values)}")
|
|
219
|
+
elif categories:
|
|
220
|
+
result.append(f"Series '{series_name}' categories: {', '.join(str(c) for c in categories)}")
|
|
221
|
+
|
|
222
|
+
return result
|
|
223
|
+
|
|
46
224
|
def read_pptx_slide(self, slide, index):
|
|
47
225
|
text_content = f'Slide: {index}\n'
|
|
48
226
|
for shape in slide.shapes:
|
|
49
|
-
|
|
227
|
+
# Handle tables
|
|
228
|
+
if shape.has_table:
|
|
229
|
+
text_content += self._extract_table_as_markdown(shape.table)
|
|
230
|
+
# Handle charts
|
|
231
|
+
elif shape.has_chart:
|
|
232
|
+
text_content += self._extract_chart_info(shape.chart)
|
|
233
|
+
# Handle images - check multiple ways images can be embedded
|
|
234
|
+
elif self.extract_images and self._is_image_shape(shape):
|
|
235
|
+
try:
|
|
236
|
+
image_blob = self._get_image_blob(shape)
|
|
237
|
+
if image_blob:
|
|
238
|
+
caption = perform_llm_prediction_for_image_bytes(image_blob, self.llm, self.prompt)
|
|
239
|
+
text_content += "\n**Image Transcript:**\n" + caption + "\n--------------------\n"
|
|
240
|
+
except Exception:
|
|
241
|
+
pass
|
|
242
|
+
# Handle text frames with hyperlinks
|
|
243
|
+
elif hasattr(shape, "text_frame") and shape.text_frame is not None:
|
|
50
244
|
for paragraph in shape.text_frame.paragraphs:
|
|
51
245
|
for run in paragraph.runs:
|
|
52
246
|
if run.hyperlink and run.hyperlink.address:
|
|
@@ -56,14 +250,39 @@ class AlitaPowerPointLoader:
|
|
|
56
250
|
else:
|
|
57
251
|
text_content += run.text
|
|
58
252
|
text_content += "\n"
|
|
59
|
-
elif self.extract_images and shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
|
|
60
|
-
try:
|
|
61
|
-
caption = perform_llm_prediction_for_image_bytes(shape.image.blob, self.llm, self.prompt)
|
|
62
|
-
except:
|
|
63
|
-
caption = "unknown"
|
|
64
|
-
text_content += "\n**Image Transcript:**\n" + caption + "\n--------------------\n"
|
|
65
253
|
return text_content + "\n"
|
|
66
254
|
|
|
255
|
+
def _is_image_shape(self, shape) -> bool:
|
|
256
|
+
"""Check if shape contains an image using multiple detection methods."""
|
|
257
|
+
# Method 1: Check shape type
|
|
258
|
+
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
|
|
259
|
+
return True
|
|
260
|
+
# Method 2: Check if shape has image attribute with blob
|
|
261
|
+
if hasattr(shape, 'image') and shape.image is not None:
|
|
262
|
+
try:
|
|
263
|
+
if shape.image.blob:
|
|
264
|
+
return True
|
|
265
|
+
except Exception:
|
|
266
|
+
pass
|
|
267
|
+
# Method 3: Check for placeholder with image
|
|
268
|
+
if hasattr(shape, 'placeholder_format') and shape.placeholder_format is not None:
|
|
269
|
+
try:
|
|
270
|
+
if hasattr(shape, 'image') and shape.image is not None:
|
|
271
|
+
return True
|
|
272
|
+
except Exception:
|
|
273
|
+
pass
|
|
274
|
+
return False
|
|
275
|
+
|
|
276
|
+
def _get_image_blob(self, shape) -> bytes:
|
|
277
|
+
"""Extract image blob from shape using available methods."""
|
|
278
|
+
# Try direct image access
|
|
279
|
+
if hasattr(shape, 'image') and shape.image is not None:
|
|
280
|
+
try:
|
|
281
|
+
return shape.image.blob
|
|
282
|
+
except Exception:
|
|
283
|
+
pass
|
|
284
|
+
return None
|
|
285
|
+
|
|
67
286
|
def load(self):
|
|
68
287
|
content = self.get_content()
|
|
69
288
|
if isinstance(content, str):
|
|
@@ -58,9 +58,12 @@ class AlitaTextLoader(BaseLoader):
|
|
|
58
58
|
else:
|
|
59
59
|
raise ValueError("Neither file_path nor file_content is provided for encoding detection.")
|
|
60
60
|
else:
|
|
61
|
-
|
|
61
|
+
# Preserve original error details for callers
|
|
62
|
+
raise RuntimeError(f"Error loading content with encoding {self.encoding}: {e}") from e
|
|
62
63
|
except Exception as e:
|
|
63
|
-
|
|
64
|
+
# Preserve original error details so higher-level code (e.g., parse_file_content)
|
|
65
|
+
# can expose the real root cause instead of a generic message.
|
|
66
|
+
raise RuntimeError(f"Error loading content: {e}") from e
|
|
64
67
|
|
|
65
68
|
return text
|
|
66
69
|
|
|
@@ -21,12 +21,14 @@ from .AlitaDocxMammothLoader import AlitaDocxMammothLoader
|
|
|
21
21
|
from .AlitaExcelLoader import AlitaExcelLoader
|
|
22
22
|
from .AlitaImageLoader import AlitaImageLoader
|
|
23
23
|
from .AlitaJSONLoader import AlitaJSONLoader
|
|
24
|
+
from .AlitaJSONLinesLoader import AlitaJSONLinesLoader
|
|
24
25
|
from .AlitaPDFLoader import AlitaPDFLoader
|
|
25
26
|
from .AlitaPowerPointLoader import AlitaPowerPointLoader
|
|
26
27
|
from .AlitaTextLoader import AlitaTextLoader
|
|
27
28
|
from .AlitaMarkdownLoader import AlitaMarkdownLoader
|
|
28
29
|
from .AlitaPythonLoader import AlitaPythonLoader
|
|
29
30
|
from enum import Enum
|
|
31
|
+
from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
|
|
30
32
|
|
|
31
33
|
|
|
32
34
|
class LoaderProperties(Enum):
|
|
@@ -34,7 +36,7 @@ class LoaderProperties(Enum):
|
|
|
34
36
|
PROMPT_DEFAULT = 'use_default_prompt'
|
|
35
37
|
PROMPT = 'prompt'
|
|
36
38
|
|
|
37
|
-
DEFAULT_ALLOWED_BASE = {'max_tokens':
|
|
39
|
+
DEFAULT_ALLOWED_BASE = {'max_tokens': LOADER_MAX_TOKENS_DEFAULT}
|
|
38
40
|
|
|
39
41
|
DEFAULT_ALLOWED_WITH_LLM = {
|
|
40
42
|
**DEFAULT_ALLOWED_BASE,
|
|
@@ -43,7 +45,9 @@ DEFAULT_ALLOWED_WITH_LLM = {
|
|
|
43
45
|
LoaderProperties.PROMPT.value: "",
|
|
44
46
|
}
|
|
45
47
|
|
|
46
|
-
|
|
48
|
+
DEFAULT_ALLOWED_EXCEL = {**DEFAULT_ALLOWED_WITH_LLM, 'add_header_to_chunks': False, 'header_row_number': 1, 'max_tokens': -1, 'sheet_name': ''}
|
|
49
|
+
|
|
50
|
+
# Image file loaders mapping - directly supported by LLM with image_url
|
|
47
51
|
image_loaders_map = {
|
|
48
52
|
'.png': {
|
|
49
53
|
'class': AlitaImageLoader,
|
|
@@ -73,6 +77,17 @@ image_loaders_map = {
|
|
|
73
77
|
'kwargs': {},
|
|
74
78
|
'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
|
|
75
79
|
},
|
|
80
|
+
'.webp': {
|
|
81
|
+
'class': AlitaImageLoader,
|
|
82
|
+
'mime_type': 'image/webp',
|
|
83
|
+
'is_multimodal_processing': True,
|
|
84
|
+
'kwargs': {},
|
|
85
|
+
'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# Image file loaders mapping - require conversion before sending to LLM
|
|
90
|
+
image_loaders_map_converted = {
|
|
76
91
|
'.bmp': {
|
|
77
92
|
'class': AlitaImageLoader,
|
|
78
93
|
'mime_type': 'image/bmp',
|
|
@@ -102,7 +117,7 @@ document_loaders_map = {
|
|
|
102
117
|
},
|
|
103
118
|
'.yml': {
|
|
104
119
|
'class': AlitaTextLoader,
|
|
105
|
-
'mime_type': 'application/
|
|
120
|
+
'mime_type': 'application/yaml',
|
|
106
121
|
'is_multimodal_processing': False,
|
|
107
122
|
'kwargs': {
|
|
108
123
|
'autodetect_encoding': True
|
|
@@ -111,7 +126,7 @@ document_loaders_map = {
|
|
|
111
126
|
},
|
|
112
127
|
'.yaml': {
|
|
113
128
|
'class': AlitaTextLoader,
|
|
114
|
-
'mime_type': 'application/
|
|
129
|
+
'mime_type': 'application/yaml',
|
|
115
130
|
'is_multimodal_processing': False,
|
|
116
131
|
'kwargs': {
|
|
117
132
|
'autodetect_encoding': True
|
|
@@ -151,11 +166,12 @@ document_loaders_map = {
|
|
|
151
166
|
'spreadsheetml.sheet'),
|
|
152
167
|
'is_multimodal_processing': False,
|
|
153
168
|
'kwargs': {
|
|
154
|
-
'
|
|
155
|
-
'
|
|
156
|
-
'
|
|
169
|
+
'add_header_to_chunks': False,
|
|
170
|
+
'header_row_number': 1,
|
|
171
|
+
'max_tokens': -1,
|
|
172
|
+
'sheet_name': ''
|
|
157
173
|
},
|
|
158
|
-
'allowed_to_override':
|
|
174
|
+
'allowed_to_override': DEFAULT_ALLOWED_EXCEL
|
|
159
175
|
},
|
|
160
176
|
'.xls': {
|
|
161
177
|
'class': AlitaExcelLoader,
|
|
@@ -166,7 +182,7 @@ document_loaders_map = {
|
|
|
166
182
|
'raw_content': True,
|
|
167
183
|
'cleanse': False
|
|
168
184
|
},
|
|
169
|
-
'allowed_to_override':
|
|
185
|
+
'allowed_to_override': DEFAULT_ALLOWED_EXCEL
|
|
170
186
|
},
|
|
171
187
|
'.pdf': {
|
|
172
188
|
'class': AlitaPDFLoader,
|
|
@@ -193,7 +209,7 @@ document_loaders_map = {
|
|
|
193
209
|
'allowed_to_override': DEFAULT_ALLOWED_BASE
|
|
194
210
|
},
|
|
195
211
|
'.jsonl': {
|
|
196
|
-
'class':
|
|
212
|
+
'class': AlitaJSONLinesLoader,
|
|
197
213
|
'mime_type': 'application/jsonl',
|
|
198
214
|
'is_multimodal_processing': False,
|
|
199
215
|
'kwargs': {},
|
|
@@ -244,17 +260,17 @@ document_loaders_map = {
|
|
|
244
260
|
'extract_images': False,
|
|
245
261
|
}
|
|
246
262
|
},
|
|
247
|
-
'.py': {
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
}
|
|
263
|
+
# '.py': {
|
|
264
|
+
# 'class': AlitaPythonLoader,
|
|
265
|
+
# 'mime_type': 'text/x-python',
|
|
266
|
+
# 'is_multimodal_processing': False,
|
|
267
|
+
# 'kwargs': {},
|
|
268
|
+
# 'allowed_to_override': DEFAULT_ALLOWED_BASE
|
|
269
|
+
# }
|
|
254
270
|
}
|
|
255
271
|
|
|
256
272
|
code_extensions = [
|
|
257
|
-
|
|
273
|
+
'.py', # Python
|
|
258
274
|
'.js', # JavaScript
|
|
259
275
|
'.ts', # TypeScript
|
|
260
276
|
'.java', # Java
|
|
@@ -292,7 +308,12 @@ default_loader_config = {
|
|
|
292
308
|
code_loaders_map = {ext: default_loader_config for ext in code_extensions}
|
|
293
309
|
|
|
294
310
|
# Combined mapping for backward compatibility
|
|
295
|
-
loaders_map = {
|
|
311
|
+
loaders_map = {
|
|
312
|
+
**image_loaders_map,
|
|
313
|
+
**image_loaders_map_converted,
|
|
314
|
+
**document_loaders_map,
|
|
315
|
+
**code_loaders_map
|
|
316
|
+
}
|
|
296
317
|
|
|
297
318
|
loaders_allowed_to_override = {
|
|
298
319
|
extension: config.get('allowed_to_override')
|