alita-sdk 0.3.486__py3-none-any.whl → 0.3.497__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/agent_loader.py +27 -6
- alita_sdk/cli/agents.py +10 -1
- alita_sdk/cli/tools/filesystem.py +95 -9
- alita_sdk/runtime/clients/client.py +40 -21
- alita_sdk/runtime/langchain/constants.py +3 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/constants.py +10 -6
- alita_sdk/runtime/langchain/langraph_agent.py +2 -1
- alita_sdk/runtime/toolkits/mcp.py +68 -62
- alita_sdk/runtime/toolkits/planning.py +3 -1
- alita_sdk/runtime/toolkits/tools.py +37 -18
- alita_sdk/runtime/tools/artifact.py +46 -17
- alita_sdk/runtime/tools/function.py +2 -1
- alita_sdk/runtime/tools/llm.py +135 -24
- alita_sdk/runtime/tools/mcp_remote_tool.py +23 -7
- alita_sdk/runtime/tools/vectorstore_base.py +3 -3
- alita_sdk/runtime/utils/AlitaCallback.py +106 -20
- alita_sdk/runtime/utils/mcp_client.py +465 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/toolkit_utils.py +7 -13
- alita_sdk/tools/base_indexer_toolkit.py +1 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +2 -0
- alita_sdk/tools/chunkers/universal_chunker.py +1 -0
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/confluence/api_wrapper.py +63 -14
- alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +16 -18
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/RECORD +34 -32
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/entry_points.txt +0 -0
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.497.dist-info}/top_level.txt +0 -0
alita_sdk/cli/agent_loader.py
CHANGED
|
@@ -8,6 +8,7 @@ import json
|
|
|
8
8
|
import yaml
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
from typing import Dict, Any
|
|
11
|
+
from pydantic import SecretStr
|
|
11
12
|
|
|
12
13
|
from .config import substitute_env_vars
|
|
13
14
|
|
|
@@ -85,6 +86,25 @@ def load_agent_definition(file_path: str) -> Dict[str, Any]:
|
|
|
85
86
|
raise ValueError(f"Unsupported file format: {path.suffix}")
|
|
86
87
|
|
|
87
88
|
|
|
89
|
+
def unwrap_secrets(obj: Any) -> Any:
|
|
90
|
+
"""
|
|
91
|
+
Recursively unwrap pydantic SecretStr values into plain strings.
|
|
92
|
+
|
|
93
|
+
Handles nested dicts, lists, tuples, and sets while preserving structure.
|
|
94
|
+
"""
|
|
95
|
+
if isinstance(obj, SecretStr):
|
|
96
|
+
return obj.get_secret_value()
|
|
97
|
+
if isinstance(obj, dict):
|
|
98
|
+
return {k: unwrap_secrets(v) for k, v in obj.items()}
|
|
99
|
+
if isinstance(obj, list):
|
|
100
|
+
return [unwrap_secrets(v) for v in obj]
|
|
101
|
+
if isinstance(obj, tuple):
|
|
102
|
+
return tuple(unwrap_secrets(v) for v in obj)
|
|
103
|
+
if isinstance(obj, set):
|
|
104
|
+
return {unwrap_secrets(v) for v in obj}
|
|
105
|
+
return obj
|
|
106
|
+
|
|
107
|
+
|
|
88
108
|
def build_agent_data_structure(agent_def: Dict[str, Any], toolkit_configs: list,
|
|
89
109
|
llm_model: str, llm_temperature: float, llm_max_tokens: int) -> Dict[str, Any]:
|
|
90
110
|
"""
|
|
@@ -128,12 +148,13 @@ def build_agent_data_structure(agent_def: Dict[str, Any], toolkit_configs: list,
|
|
|
128
148
|
if hasattr(toolkit_class, 'toolkit_config_schema'):
|
|
129
149
|
schema = toolkit_class.toolkit_config_schema()
|
|
130
150
|
validated_config = schema(**toolkit_config)
|
|
131
|
-
#
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
151
|
+
# Use python mode so SecretStr remains as objects, then unwrap recursively
|
|
152
|
+
validated_dict = unwrap_secrets(validated_config.model_dump(mode="python"))
|
|
153
|
+
validated_dict['type'] = toolkit_config.get('type')
|
|
154
|
+
validated_dict['toolkit_name'] = toolkit_config.get('toolkit_name')
|
|
155
|
+
validated_toolkit_configs.append(validated_dict)
|
|
156
|
+
else:
|
|
157
|
+
validated_toolkit_configs.append(toolkit_config)
|
|
137
158
|
else:
|
|
138
159
|
validated_toolkit_configs.append(toolkit_config)
|
|
139
160
|
except Exception:
|
alita_sdk/cli/agents.py
CHANGED
|
@@ -1358,12 +1358,14 @@ def agent_show(ctx, agent_source: str, version: Optional[str]):
|
|
|
1358
1358
|
help='Grant agent filesystem access to this directory')
|
|
1359
1359
|
@click.option('--verbose', '-v', type=click.Choice(['quiet', 'default', 'debug']), default='default',
|
|
1360
1360
|
help='Output verbosity level: quiet (final output only), default (tool calls + outputs), debug (all including LLM calls)')
|
|
1361
|
+
@click.option('--recursion-limit', type=int, default=50,
|
|
1362
|
+
help='Maximum number of tool execution steps per turn')
|
|
1361
1363
|
@click.pass_context
|
|
1362
1364
|
def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
|
|
1363
1365
|
toolkit_config: tuple, inventory_path: Optional[str], thread_id: Optional[str],
|
|
1364
1366
|
model: Optional[str], temperature: Optional[float],
|
|
1365
1367
|
max_tokens: Optional[int], work_dir: Optional[str],
|
|
1366
|
-
verbose: str):
|
|
1368
|
+
verbose: str, recursion_limit: Optional[int]):
|
|
1367
1369
|
"""Start interactive chat with an agent.
|
|
1368
1370
|
|
|
1369
1371
|
\b
|
|
@@ -2615,6 +2617,11 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
|
|
|
2615
2617
|
invoke_config = RunnableConfig(
|
|
2616
2618
|
configurable={"thread_id": current_session_id}
|
|
2617
2619
|
)
|
|
2620
|
+
# always proceed with continuation enabled
|
|
2621
|
+
invoke_config["should_continue"] = True
|
|
2622
|
+
# Set recursion limit for tool executions
|
|
2623
|
+
logger.debug(f"Setting tool steps limit to {recursion_limit}")
|
|
2624
|
+
invoke_config["recursion_limit"] = recursion_limit
|
|
2618
2625
|
cli_callback = None
|
|
2619
2626
|
if show_verbose:
|
|
2620
2627
|
cli_callback = create_cli_callback(verbose=True, debug=debug_mode)
|
|
@@ -2718,6 +2725,8 @@ def agent_chat(ctx, agent_source: Optional[str], version: Optional[str],
|
|
|
2718
2725
|
invoke_config = RunnableConfig(
|
|
2719
2726
|
configurable={"thread_id": continuation_thread_id}
|
|
2720
2727
|
)
|
|
2728
|
+
invoke_config["should_continue"] = True
|
|
2729
|
+
invoke_config["recursion_limit"] = recursion_limit
|
|
2721
2730
|
if cli_callback:
|
|
2722
2731
|
invoke_config["callbacks"] = [cli_callback]
|
|
2723
2732
|
|
|
@@ -135,6 +135,7 @@ class ListDirectoryInput(BaseModel):
|
|
|
135
135
|
path: str = Field(default=".", description="Relative path to the directory to list")
|
|
136
136
|
include_sizes: bool = Field(default=False, description="Include file sizes in the output")
|
|
137
137
|
sort_by: str = Field(default="name", description="Sort by 'name' or 'size'")
|
|
138
|
+
max_results: Optional[int] = Field(default=200, description="Maximum number of entries to return. Default is 200 to prevent context overflow.")
|
|
138
139
|
|
|
139
140
|
|
|
140
141
|
class DirectoryTreeInput(BaseModel):
|
|
@@ -181,6 +182,8 @@ class FileSystemTool(BaseTool):
|
|
|
181
182
|
"""Base class for filesystem tools with directory restriction."""
|
|
182
183
|
base_directory: str # Primary directory (for backward compatibility)
|
|
183
184
|
allowed_directories: List[str] = [] # Additional allowed directories
|
|
185
|
+
_basename_collision_detected: bool = False # Cache for collision detection
|
|
186
|
+
_basename_collision_checked: bool = False # Whether we've checked for collisions
|
|
184
187
|
|
|
185
188
|
def _get_all_allowed_directories(self) -> List[Path]:
|
|
186
189
|
"""Get all allowed directories as resolved Paths."""
|
|
@@ -191,6 +194,56 @@ class FileSystemTool(BaseTool):
|
|
|
191
194
|
dirs.append(resolved)
|
|
192
195
|
return dirs
|
|
193
196
|
|
|
197
|
+
def _check_basename_collision(self) -> bool:
|
|
198
|
+
"""Check if multiple allowed directories have the same basename."""
|
|
199
|
+
if self._basename_collision_checked:
|
|
200
|
+
return self._basename_collision_detected
|
|
201
|
+
|
|
202
|
+
allowed_dirs = self._get_all_allowed_directories()
|
|
203
|
+
basenames = [d.name for d in allowed_dirs]
|
|
204
|
+
self._basename_collision_detected = len(basenames) != len(set(basenames))
|
|
205
|
+
self._basename_collision_checked = True
|
|
206
|
+
return self._basename_collision_detected
|
|
207
|
+
|
|
208
|
+
def _get_relative_path_from_allowed_dirs(self, absolute_path: Path) -> tuple:
|
|
209
|
+
"""Get relative path and directory name for a file in allowed directories.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
absolute_path: Absolute path to the file
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Tuple of (relative_path, directory_name)
|
|
216
|
+
|
|
217
|
+
Raises:
|
|
218
|
+
ValueError: If path is not within any allowed directory
|
|
219
|
+
"""
|
|
220
|
+
allowed_dirs = self._get_all_allowed_directories()
|
|
221
|
+
|
|
222
|
+
# Find which allowed directory contains this path
|
|
223
|
+
for base in allowed_dirs:
|
|
224
|
+
try:
|
|
225
|
+
rel_path = absolute_path.relative_to(base)
|
|
226
|
+
|
|
227
|
+
# Determine directory name for prefix
|
|
228
|
+
if self._check_basename_collision():
|
|
229
|
+
# Use parent/basename format to disambiguate
|
|
230
|
+
dir_name = f"{base.parent.name}/{base.name}"
|
|
231
|
+
else:
|
|
232
|
+
# Use just basename
|
|
233
|
+
dir_name = base.name
|
|
234
|
+
|
|
235
|
+
return (str(rel_path), dir_name)
|
|
236
|
+
except ValueError:
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
# Path not in any allowed directory
|
|
240
|
+
allowed_paths = [str(d) for d in allowed_dirs]
|
|
241
|
+
raise ValueError(
|
|
242
|
+
f"Path '{absolute_path}' is not within any allowed directory.\n"
|
|
243
|
+
f"Allowed directories: {allowed_paths}\n"
|
|
244
|
+
f"Attempted path: {absolute_path}"
|
|
245
|
+
)
|
|
246
|
+
|
|
194
247
|
def _resolve_path(self, relative_path: str) -> Path:
|
|
195
248
|
"""
|
|
196
249
|
Resolve and validate a path within any of the allowed directories.
|
|
@@ -602,7 +655,7 @@ class ListDirectoryTool(FileSystemTool):
|
|
|
602
655
|
"Consider using filesystem_directory_tree with max_depth=1 for hierarchical overview",
|
|
603
656
|
]
|
|
604
657
|
|
|
605
|
-
def _run(self, path: str = ".", include_sizes: bool = False, sort_by: str = "name") -> str:
|
|
658
|
+
def _run(self, path: str = ".", include_sizes: bool = False, sort_by: str = "name", max_results: Optional[int] = 200) -> str:
|
|
606
659
|
"""List directory contents."""
|
|
607
660
|
try:
|
|
608
661
|
target = self._resolve_path(path)
|
|
@@ -618,7 +671,8 @@ class ListDirectoryTool(FileSystemTool):
|
|
|
618
671
|
entry_info = {
|
|
619
672
|
'name': entry.name,
|
|
620
673
|
'is_dir': entry.is_dir(),
|
|
621
|
-
'size': entry.stat().st_size if entry.is_file() else 0
|
|
674
|
+
'size': entry.stat().st_size if entry.is_file() else 0,
|
|
675
|
+
'path': entry
|
|
622
676
|
}
|
|
623
677
|
entries.append(entry_info)
|
|
624
678
|
|
|
@@ -628,6 +682,18 @@ class ListDirectoryTool(FileSystemTool):
|
|
|
628
682
|
else:
|
|
629
683
|
entries.sort(key=lambda x: x['name'].lower())
|
|
630
684
|
|
|
685
|
+
# Apply limit
|
|
686
|
+
total_count = len(entries)
|
|
687
|
+
truncated = False
|
|
688
|
+
if max_results is not None and total_count > max_results:
|
|
689
|
+
entries = entries[:max_results]
|
|
690
|
+
truncated = True
|
|
691
|
+
|
|
692
|
+
# Get directory name for multi-directory configs
|
|
693
|
+
allowed_dirs = self._get_all_allowed_directories()
|
|
694
|
+
has_multiple_dirs = len(allowed_dirs) > 1
|
|
695
|
+
_, dir_name = self._get_relative_path_from_allowed_dirs(target) if has_multiple_dirs else ("", "")
|
|
696
|
+
|
|
631
697
|
# Format output
|
|
632
698
|
lines = []
|
|
633
699
|
total_files = 0
|
|
@@ -636,7 +702,12 @@ class ListDirectoryTool(FileSystemTool):
|
|
|
636
702
|
|
|
637
703
|
for entry in entries:
|
|
638
704
|
prefix = "[DIR] " if entry['is_dir'] else "[FILE]"
|
|
639
|
-
|
|
705
|
+
|
|
706
|
+
# Add directory prefix for multi-directory configs
|
|
707
|
+
if has_multiple_dirs:
|
|
708
|
+
name = f"{dir_name}/{entry['name']}"
|
|
709
|
+
else:
|
|
710
|
+
name = entry['name']
|
|
640
711
|
|
|
641
712
|
if include_sizes and not entry['is_dir']:
|
|
642
713
|
size_str = self._format_size(entry['size'])
|
|
@@ -665,6 +736,10 @@ class ListDirectoryTool(FileSystemTool):
|
|
|
665
736
|
summary += f"\nCombined size: {self._format_size(total_size)}"
|
|
666
737
|
result += summary
|
|
667
738
|
|
|
739
|
+
if truncated:
|
|
740
|
+
result += f"\n\n⚠️ OUTPUT TRUNCATED: Showing {len(entries)} of {total_count} entries from '{dir_name if has_multiple_dirs else path}' (max_results={max_results})"
|
|
741
|
+
result += "\n To see more: increase max_results or list a specific subdirectory"
|
|
742
|
+
|
|
668
743
|
# Add note about how to access files
|
|
669
744
|
result += "\n\nNote: Access files using paths shown above (e.g., 'agents/file.md' for items in agents/ directory)"
|
|
670
745
|
|
|
@@ -818,23 +893,34 @@ class SearchFilesTool(FileSystemTool):
|
|
|
818
893
|
else:
|
|
819
894
|
matches = sorted(all_matches)
|
|
820
895
|
|
|
821
|
-
# Format results
|
|
822
|
-
|
|
896
|
+
# Format results with directory prefixes for multi-directory configs
|
|
897
|
+
allowed_dirs = self._get_all_allowed_directories()
|
|
898
|
+
has_multiple_dirs = len(allowed_dirs) > 1
|
|
823
899
|
results = []
|
|
900
|
+
search_dir_name = None
|
|
824
901
|
|
|
825
902
|
for match in matches:
|
|
826
|
-
|
|
903
|
+
if has_multiple_dirs:
|
|
904
|
+
rel_path_str, dir_name = self._get_relative_path_from_allowed_dirs(match)
|
|
905
|
+
display_path = f"{dir_name}/{rel_path_str}"
|
|
906
|
+
if search_dir_name is None:
|
|
907
|
+
search_dir_name = dir_name
|
|
908
|
+
else:
|
|
909
|
+
rel_path_str = str(match.relative_to(Path(self.base_directory).resolve()))
|
|
910
|
+
display_path = rel_path_str
|
|
911
|
+
|
|
827
912
|
if match.is_dir():
|
|
828
|
-
results.append(f"📁 {
|
|
913
|
+
results.append(f"📁 {display_path}/")
|
|
829
914
|
else:
|
|
830
915
|
size = self._format_size(match.stat().st_size)
|
|
831
|
-
results.append(f"📄 {
|
|
916
|
+
results.append(f"📄 {display_path} ({size})")
|
|
832
917
|
|
|
833
918
|
header = f"Found {total_count} matches for '{pattern}':\n\n"
|
|
834
919
|
output = header + "\n".join(results)
|
|
835
920
|
|
|
836
921
|
if truncated:
|
|
837
|
-
|
|
922
|
+
location_str = f"from '{search_dir_name}' " if search_dir_name else ""
|
|
923
|
+
output += f"\n\n⚠️ OUTPUT TRUNCATED: Showing {max_results} of {total_count} results {location_str}(max_results={max_results})"
|
|
838
924
|
output += "\n To see more: increase max_results or use a more specific pattern"
|
|
839
925
|
|
|
840
926
|
return output
|
|
@@ -21,6 +21,7 @@ from .datasource import AlitaDataSource
|
|
|
21
21
|
from .artifact import Artifact
|
|
22
22
|
from ..langchain.chat_message_template import Jinja2TemplatedChatMessagesTemplate
|
|
23
23
|
from ..utils.utils import TOOLKIT_SPLITTER
|
|
24
|
+
from ..utils.mcp_oauth import McpAuthorizationRequired
|
|
24
25
|
from ...tools import get_available_toolkit_models
|
|
25
26
|
from ...tools.base_indexer_toolkit import IndexTools
|
|
26
27
|
|
|
@@ -469,11 +470,44 @@ class AlitaClient:
|
|
|
469
470
|
return self._process_requst(data)
|
|
470
471
|
|
|
471
472
|
def create_artifact(self, bucket_name, artifact_name, artifact_data):
|
|
473
|
+
# Sanitize filename to prevent regex errors during indexing
|
|
474
|
+
sanitized_name, was_modified = self._sanitize_artifact_name(artifact_name)
|
|
475
|
+
if was_modified:
|
|
476
|
+
logger.warning(f"Artifact filename sanitized: '{artifact_name}' -> '{sanitized_name}'")
|
|
477
|
+
|
|
472
478
|
url = f'{self.artifacts_url}/{bucket_name.lower()}'
|
|
473
479
|
data = requests.post(url, headers=self.headers, files={
|
|
474
|
-
'file': (
|
|
480
|
+
'file': (sanitized_name, artifact_data)
|
|
475
481
|
}, verify=False)
|
|
476
482
|
return self._process_requst(data)
|
|
483
|
+
|
|
484
|
+
@staticmethod
|
|
485
|
+
def _sanitize_artifact_name(filename: str) -> tuple:
|
|
486
|
+
"""Sanitize filename for safe storage and regex pattern matching."""
|
|
487
|
+
import re
|
|
488
|
+
from pathlib import Path
|
|
489
|
+
|
|
490
|
+
if not filename or not filename.strip():
|
|
491
|
+
return "unnamed_file", True
|
|
492
|
+
|
|
493
|
+
original = filename
|
|
494
|
+
path_obj = Path(filename)
|
|
495
|
+
name = path_obj.stem
|
|
496
|
+
extension = path_obj.suffix
|
|
497
|
+
|
|
498
|
+
# Whitelist: alphanumeric, underscore, hyphen, space, Unicode letters/digits
|
|
499
|
+
sanitized_name = re.sub(r'[^\w\s-]', '', name, flags=re.UNICODE)
|
|
500
|
+
sanitized_name = re.sub(r'[-\s]+', '-', sanitized_name)
|
|
501
|
+
sanitized_name = sanitized_name.strip('-').strip()
|
|
502
|
+
|
|
503
|
+
if not sanitized_name:
|
|
504
|
+
sanitized_name = "file"
|
|
505
|
+
|
|
506
|
+
if extension:
|
|
507
|
+
extension = re.sub(r'[^\w.-]', '', extension, flags=re.UNICODE)
|
|
508
|
+
|
|
509
|
+
sanitized = sanitized_name + extension
|
|
510
|
+
return sanitized, (sanitized != original)
|
|
477
511
|
|
|
478
512
|
def download_artifact(self, bucket_name, artifact_name):
|
|
479
513
|
url = f'{self.artifact_url}/{bucket_name.lower()}/{artifact_name}'
|
|
@@ -814,26 +848,12 @@ class AlitaClient:
|
|
|
814
848
|
|
|
815
849
|
# Instantiate the toolkit with client and LLM support
|
|
816
850
|
try:
|
|
817
|
-
tools = instantiate_toolkit_with_client(toolkit_config, llm, self, mcp_tokens=mcp_tokens)
|
|
818
|
-
except
|
|
851
|
+
tools = instantiate_toolkit_with_client(toolkit_config, llm, self, mcp_tokens=mcp_tokens, use_prefix=False)
|
|
852
|
+
except McpAuthorizationRequired:
|
|
819
853
|
# Re-raise McpAuthorizationRequired to allow proper handling upstream
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
if isinstance(toolkit_error, McpAuthorizationRequired):
|
|
824
|
-
logger.info(f"McpAuthorizationRequired detected, re-raising")
|
|
825
|
-
raise
|
|
826
|
-
|
|
827
|
-
# Also check for wrapped exceptions (e.g., from asyncio)
|
|
828
|
-
if hasattr(toolkit_error, '__cause__') and isinstance(toolkit_error.__cause__, McpAuthorizationRequired):
|
|
829
|
-
logger.info(f"Wrapped McpAuthorizationRequired detected, re-raising cause")
|
|
830
|
-
raise toolkit_error.__cause__
|
|
831
|
-
|
|
832
|
-
# Check exception class name as fallback (in case of module reload issues)
|
|
833
|
-
if toolkit_error.__class__.__name__ == 'McpAuthorizationRequired':
|
|
834
|
-
logger.info(f"McpAuthorizationRequired detected by name, re-raising")
|
|
835
|
-
raise
|
|
836
|
-
|
|
854
|
+
logger.info(f"McpAuthorizationRequired detected, re-raising")
|
|
855
|
+
raise
|
|
856
|
+
except Exception as toolkit_error:
|
|
837
857
|
# For other errors, return error response
|
|
838
858
|
return {
|
|
839
859
|
"success": False,
|
|
@@ -1068,7 +1088,6 @@ class AlitaClient:
|
|
|
1068
1088
|
|
|
1069
1089
|
except Exception as e:
|
|
1070
1090
|
# Re-raise McpAuthorizationRequired to allow proper handling upstream
|
|
1071
|
-
from ..utils.mcp_oauth import McpAuthorizationRequired
|
|
1072
1091
|
if isinstance(e, McpAuthorizationRequired):
|
|
1073
1092
|
raise
|
|
1074
1093
|
logger = logging.getLogger(__name__)
|
|
@@ -21,14 +21,16 @@ from openpyxl import load_workbook
|
|
|
21
21
|
from xlrd import open_workbook
|
|
22
22
|
from langchain_core.documents import Document
|
|
23
23
|
from .AlitaTableLoader import AlitaTableLoader
|
|
24
|
+
from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
|
|
24
25
|
|
|
25
26
|
cell_delimiter = " | "
|
|
26
27
|
|
|
27
28
|
class AlitaExcelLoader(AlitaTableLoader):
|
|
28
|
-
excel_by_sheets: bool = False
|
|
29
29
|
sheet_name: str = None
|
|
30
|
-
return_type: str = 'str'
|
|
31
30
|
file_name: str = None
|
|
31
|
+
max_tokens: int = LOADER_MAX_TOKENS_DEFAULT
|
|
32
|
+
add_header_to_chunks: bool = False
|
|
33
|
+
header_row_number: int = 1
|
|
32
34
|
|
|
33
35
|
def __init__(self, **kwargs):
|
|
34
36
|
if not kwargs.get('file_path'):
|
|
@@ -39,9 +41,22 @@ class AlitaExcelLoader(AlitaTableLoader):
|
|
|
39
41
|
else:
|
|
40
42
|
self.file_name = kwargs.get('file_path')
|
|
41
43
|
super().__init__(**kwargs)
|
|
42
|
-
self.excel_by_sheets = kwargs.get('excel_by_sheets')
|
|
43
|
-
self.return_type = kwargs.get('return_type')
|
|
44
44
|
self.sheet_name = kwargs.get('sheet_name')
|
|
45
|
+
# Set and validate chunking parameters only once
|
|
46
|
+
self.max_tokens = int(kwargs.get('max_tokens', LOADER_MAX_TOKENS_DEFAULT))
|
|
47
|
+
self.add_header_to_chunks = bool(kwargs.get('add_header_to_chunks', False))
|
|
48
|
+
header_row_number = kwargs.get('header_row_number', 1)
|
|
49
|
+
# Validate header_row_number
|
|
50
|
+
try:
|
|
51
|
+
header_row_number = int(header_row_number)
|
|
52
|
+
if header_row_number > 0:
|
|
53
|
+
self.header_row_number = header_row_number
|
|
54
|
+
else:
|
|
55
|
+
self.header_row_number = 1
|
|
56
|
+
self.add_header_to_chunks = False
|
|
57
|
+
except (ValueError, TypeError):
|
|
58
|
+
self.header_row_number = 1
|
|
59
|
+
self.add_header_to_chunks = False
|
|
45
60
|
|
|
46
61
|
def get_content(self):
|
|
47
62
|
try:
|
|
@@ -64,59 +79,32 @@ class AlitaExcelLoader(AlitaTableLoader):
|
|
|
64
79
|
Reads .xlsx files using openpyxl.
|
|
65
80
|
"""
|
|
66
81
|
workbook = load_workbook(self.file_path, data_only=True) # `data_only=True` ensures we get cell values, not formulas
|
|
67
|
-
|
|
82
|
+
sheets = workbook.sheetnames
|
|
68
83
|
if self.sheet_name:
|
|
69
|
-
|
|
70
|
-
if self.sheet_name in workbook.sheetnames:
|
|
84
|
+
if self.sheet_name in sheets:
|
|
71
85
|
sheet_content = self.parse_sheet(workbook[self.sheet_name])
|
|
72
|
-
return sheet_content
|
|
73
86
|
else:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
# Parse each sheet individually and return as a dictionary
|
|
77
|
-
result = {}
|
|
78
|
-
for sheet_name in workbook.sheetnames:
|
|
79
|
-
sheet_content = self.parse_sheet(workbook[sheet_name])
|
|
80
|
-
result[sheet_name] = sheet_content
|
|
81
|
-
return result
|
|
87
|
+
sheet_content = [f"Sheet '{self.sheet_name}' does not exist in the workbook."]
|
|
88
|
+
return {self.sheet_name: sheet_content}
|
|
82
89
|
else:
|
|
83
|
-
#
|
|
84
|
-
|
|
85
|
-
for sheet_name in workbook.sheetnames:
|
|
86
|
-
sheet_content = self.parse_sheet(workbook[sheet_name])
|
|
87
|
-
result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
|
|
88
|
-
return "\n\n".join(result)
|
|
90
|
+
# Dictionary comprehension for all sheets
|
|
91
|
+
return {name: self.parse_sheet(workbook[name]) for name in sheets}
|
|
89
92
|
|
|
90
93
|
def _read_xls(self):
|
|
91
94
|
"""
|
|
92
95
|
Reads .xls files using xlrd.
|
|
93
96
|
"""
|
|
94
97
|
workbook = open_workbook(filename=self.file_name, file_contents=self.file_content)
|
|
95
|
-
|
|
98
|
+
sheets = workbook.sheet_names()
|
|
96
99
|
if self.sheet_name:
|
|
97
|
-
|
|
98
|
-
if self.sheet_name in workbook.sheet_names():
|
|
100
|
+
if self.sheet_name in sheets:
|
|
99
101
|
sheet = workbook.sheet_by_name(self.sheet_name)
|
|
100
|
-
|
|
101
|
-
return sheet_content
|
|
102
|
+
return {self.sheet_name: self.parse_sheet_xls(sheet)}
|
|
102
103
|
else:
|
|
103
|
-
|
|
104
|
-
elif self.excel_by_sheets:
|
|
105
|
-
# Parse each sheet individually and return as a dictionary
|
|
106
|
-
result = {}
|
|
107
|
-
for sheet_name in workbook.sheet_names():
|
|
108
|
-
sheet = workbook.sheet_by_name(sheet_name)
|
|
109
|
-
sheet_content = self.parse_sheet_xls(sheet)
|
|
110
|
-
result[sheet_name] = sheet_content
|
|
111
|
-
return result
|
|
104
|
+
return {self.sheet_name: [f"Sheet '{self.sheet_name}' does not exist in the workbook."]}
|
|
112
105
|
else:
|
|
113
|
-
#
|
|
114
|
-
|
|
115
|
-
for sheet_name in workbook.sheet_names():
|
|
116
|
-
sheet = workbook.sheet_by_name(sheet_name)
|
|
117
|
-
sheet_content = self.parse_sheet_xls(sheet)
|
|
118
|
-
result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
|
|
119
|
-
return "\n\n".join(result)
|
|
106
|
+
# Dictionary comprehension for all sheets
|
|
107
|
+
return {name: self.parse_sheet_xls(workbook.sheet_by_name(name)) for name in sheets}
|
|
120
108
|
|
|
121
109
|
def parse_sheet(self, sheet):
|
|
122
110
|
"""
|
|
@@ -170,34 +158,89 @@ class AlitaExcelLoader(AlitaTableLoader):
|
|
|
170
158
|
# Format the sheet content based on the return type
|
|
171
159
|
return self._format_sheet_content(sheet_content)
|
|
172
160
|
|
|
173
|
-
def _format_sheet_content(self,
|
|
161
|
+
def _format_sheet_content(self, rows):
|
|
174
162
|
"""
|
|
175
|
-
|
|
163
|
+
Specification:
|
|
164
|
+
Formats a list of sheet rows into a list of string chunks according to the following rules:
|
|
165
|
+
1. If max_tokens < 1, returns a single chunk (list of one string) with all rows joined by a newline ('\n').
|
|
166
|
+
- If add_header_to_chunks is True and header_row_number is valid, the specified header row is prepended as the first line.
|
|
167
|
+
2. If max_tokens >= 1:
|
|
168
|
+
a. Each chunk is a string containing one or more rows, separated by newlines ('\n'), such that the total token count (as measured by tiktoken) does not exceed max_tokens.
|
|
169
|
+
b. If add_header_to_chunks is True and header_row_number is valid, the specified header row is prepended once at the top of each chunk (not before every row).
|
|
170
|
+
c. If a single row exceeds max_tokens, it is placed in its own chunk without splitting, with the header prepended if applicable.
|
|
171
|
+
3. Returns: List[str], where each string is a chunk ready for further processing.
|
|
176
172
|
"""
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
173
|
+
import tiktoken
|
|
174
|
+
encoding = tiktoken.get_encoding('cl100k_base')
|
|
175
|
+
|
|
176
|
+
# --- Inner functions ---
|
|
177
|
+
def count_tokens(text):
|
|
178
|
+
"""Count tokens in text using tiktoken encoding."""
|
|
179
|
+
return len(encoding.encode(text))
|
|
180
|
+
|
|
181
|
+
def finalize_chunk(chunk_rows):
|
|
182
|
+
"""Join rows for a chunk, prepending header if needed."""
|
|
183
|
+
if self.add_header_to_chunks and header:
|
|
184
|
+
return '\n'.join([header] + chunk_rows)
|
|
185
|
+
else:
|
|
186
|
+
return '\n'.join(chunk_rows)
|
|
187
|
+
# --- End inner functions ---
|
|
188
|
+
|
|
189
|
+
# If max_tokens < 1, return all rows as a single chunk
|
|
190
|
+
if self.max_tokens < 1:
|
|
191
|
+
return ['\n'.join(rows)]
|
|
192
|
+
|
|
193
|
+
# Extract header if needed
|
|
194
|
+
header = None
|
|
195
|
+
if self.add_header_to_chunks and rows:
|
|
196
|
+
header_idx = self.header_row_number - 1
|
|
197
|
+
header = rows.pop(header_idx)
|
|
198
|
+
|
|
199
|
+
chunks = [] # List to store final chunks
|
|
200
|
+
current_chunk = [] # Accumulate rows for the current chunk
|
|
201
|
+
current_tokens = 0 # Token count for the current chunk
|
|
202
|
+
|
|
203
|
+
for row in rows:
|
|
204
|
+
row_tokens = count_tokens(row)
|
|
205
|
+
# If row itself exceeds max_tokens, flush current chunk and add row as its own chunk (with header if needed)
|
|
206
|
+
if row_tokens > self.max_tokens:
|
|
207
|
+
if current_chunk:
|
|
208
|
+
chunks.append(finalize_chunk(current_chunk))
|
|
209
|
+
current_chunk = []
|
|
210
|
+
current_tokens = 0
|
|
211
|
+
# Add the large row as its own chunk, with header if needed
|
|
212
|
+
if self.add_header_to_chunks and header:
|
|
213
|
+
chunks.append(finalize_chunk([row]))
|
|
214
|
+
else:
|
|
215
|
+
chunks.append(row)
|
|
216
|
+
continue
|
|
217
|
+
# If adding row would exceed max_tokens, flush current chunk and start new
|
|
218
|
+
if current_tokens + row_tokens > self.max_tokens:
|
|
219
|
+
if current_chunk:
|
|
220
|
+
chunks.append(finalize_chunk(current_chunk))
|
|
221
|
+
current_chunk = [row]
|
|
222
|
+
current_tokens = row_tokens
|
|
223
|
+
else:
|
|
224
|
+
current_chunk.append(row)
|
|
225
|
+
current_tokens += row_tokens
|
|
226
|
+
# Add any remaining rows as the last chunk
|
|
227
|
+
if current_chunk:
|
|
228
|
+
chunks.append(finalize_chunk(current_chunk))
|
|
229
|
+
return chunks
|
|
188
230
|
|
|
189
231
|
def load(self) -> list:
|
|
190
232
|
docs = []
|
|
191
233
|
content_per_sheet = self.get_content()
|
|
192
|
-
|
|
234
|
+
# content_per_sheet is a dict of sheet_name: list of chunk strings
|
|
235
|
+
for sheet_name, content_chunks in content_per_sheet.items():
|
|
193
236
|
metadata = {
|
|
194
237
|
"source": f'{self.file_path}:{sheet_name}',
|
|
195
238
|
"sheet_name": sheet_name,
|
|
196
239
|
"file_type": "excel",
|
|
197
|
-
"excel_by_sheets": self.excel_by_sheets,
|
|
198
|
-
"return_type": self.return_type,
|
|
199
240
|
}
|
|
200
|
-
|
|
241
|
+
# Each chunk is a separate Document
|
|
242
|
+
for chunk in content_chunks:
|
|
243
|
+
docs.append(Document(page_content=chunk, metadata=metadata))
|
|
201
244
|
return docs
|
|
202
245
|
|
|
203
246
|
def read(self, lazy: bool = False):
|
|
@@ -27,6 +27,7 @@ from .AlitaTextLoader import AlitaTextLoader
|
|
|
27
27
|
from .AlitaMarkdownLoader import AlitaMarkdownLoader
|
|
28
28
|
from .AlitaPythonLoader import AlitaPythonLoader
|
|
29
29
|
from enum import Enum
|
|
30
|
+
from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
class LoaderProperties(Enum):
|
|
@@ -34,7 +35,7 @@ class LoaderProperties(Enum):
|
|
|
34
35
|
PROMPT_DEFAULT = 'use_default_prompt'
|
|
35
36
|
PROMPT = 'prompt'
|
|
36
37
|
|
|
37
|
-
DEFAULT_ALLOWED_BASE = {'max_tokens':
|
|
38
|
+
DEFAULT_ALLOWED_BASE = {'max_tokens': LOADER_MAX_TOKENS_DEFAULT}
|
|
38
39
|
|
|
39
40
|
DEFAULT_ALLOWED_WITH_LLM = {
|
|
40
41
|
**DEFAULT_ALLOWED_BASE,
|
|
@@ -43,6 +44,8 @@ DEFAULT_ALLOWED_WITH_LLM = {
|
|
|
43
44
|
LoaderProperties.PROMPT.value: "",
|
|
44
45
|
}
|
|
45
46
|
|
|
47
|
+
DEFAULT_ALLOWED_EXCEL = {**DEFAULT_ALLOWED_WITH_LLM, 'add_header_to_chunks': False, 'header_row_number': 1, 'max_tokens': -1, 'sheet_name': ''}
|
|
48
|
+
|
|
46
49
|
# Image file loaders mapping - directly supported by LLM with image_url
|
|
47
50
|
image_loaders_map = {
|
|
48
51
|
'.png': {
|
|
@@ -162,11 +165,12 @@ document_loaders_map = {
|
|
|
162
165
|
'spreadsheetml.sheet'),
|
|
163
166
|
'is_multimodal_processing': False,
|
|
164
167
|
'kwargs': {
|
|
165
|
-
'
|
|
166
|
-
'
|
|
167
|
-
'
|
|
168
|
+
'add_header_to_chunks': False,
|
|
169
|
+
'header_row_number': 1,
|
|
170
|
+
'max_tokens': -1,
|
|
171
|
+
'sheet_name': ''
|
|
168
172
|
},
|
|
169
|
-
'allowed_to_override':
|
|
173
|
+
'allowed_to_override': DEFAULT_ALLOWED_EXCEL
|
|
170
174
|
},
|
|
171
175
|
'.xls': {
|
|
172
176
|
'class': AlitaExcelLoader,
|
|
@@ -177,7 +181,7 @@ document_loaders_map = {
|
|
|
177
181
|
'raw_content': True,
|
|
178
182
|
'cleanse': False
|
|
179
183
|
},
|
|
180
|
-
'allowed_to_override':
|
|
184
|
+
'allowed_to_override': DEFAULT_ALLOWED_EXCEL
|
|
181
185
|
},
|
|
182
186
|
'.pdf': {
|
|
183
187
|
'class': AlitaPDFLoader,
|
|
@@ -635,6 +635,7 @@ def create_graph(
|
|
|
635
635
|
output_variables=output_vars,
|
|
636
636
|
input_variables=node.get('input', ['messages']),
|
|
637
637
|
structured_output=node.get('structured_output', False),
|
|
638
|
+
tool_execution_timeout=node.get('tool_execution_timeout', 900),
|
|
638
639
|
available_tools=available_tools,
|
|
639
640
|
tool_names=tool_names,
|
|
640
641
|
steps_limit=kwargs.get('steps_limit', 25)
|
|
@@ -1010,7 +1011,7 @@ class LangGraphAgentRunnable(CompiledStateGraph):
|
|
|
1010
1011
|
thread_id: str,
|
|
1011
1012
|
current_recursion_limit: int,
|
|
1012
1013
|
) -> dict:
|
|
1013
|
-
"""Handle GraphRecursionError by returning a soft
|
|
1014
|
+
"""Handle GraphRecursionError by returning a soft-boundary response."""
|
|
1014
1015
|
config_state = self.get_state(config)
|
|
1015
1016
|
is_execution_finished = False
|
|
1016
1017
|
|