alita-sdk 0.3.486__py3-none-any.whl → 0.3.515__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (124) hide show
  1. alita_sdk/cli/agent_loader.py +27 -6
  2. alita_sdk/cli/agents.py +10 -1
  3. alita_sdk/cli/inventory.py +12 -195
  4. alita_sdk/cli/tools/filesystem.py +95 -9
  5. alita_sdk/community/inventory/__init__.py +12 -0
  6. alita_sdk/community/inventory/toolkit.py +9 -5
  7. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  8. alita_sdk/configurations/ado.py +144 -0
  9. alita_sdk/configurations/confluence.py +76 -42
  10. alita_sdk/configurations/figma.py +76 -0
  11. alita_sdk/configurations/gitlab.py +2 -0
  12. alita_sdk/configurations/qtest.py +72 -1
  13. alita_sdk/configurations/report_portal.py +96 -0
  14. alita_sdk/configurations/sharepoint.py +148 -0
  15. alita_sdk/configurations/testio.py +83 -0
  16. alita_sdk/runtime/clients/artifact.py +2 -2
  17. alita_sdk/runtime/clients/client.py +64 -40
  18. alita_sdk/runtime/clients/sandbox_client.py +14 -0
  19. alita_sdk/runtime/langchain/assistant.py +48 -2
  20. alita_sdk/runtime/langchain/constants.py +3 -1
  21. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  22. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  23. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +2 -1
  24. alita_sdk/runtime/langchain/document_loaders/constants.py +12 -7
  25. alita_sdk/runtime/langchain/langraph_agent.py +10 -10
  26. alita_sdk/runtime/langchain/utils.py +6 -1
  27. alita_sdk/runtime/toolkits/artifact.py +14 -5
  28. alita_sdk/runtime/toolkits/datasource.py +13 -6
  29. alita_sdk/runtime/toolkits/mcp.py +94 -219
  30. alita_sdk/runtime/toolkits/planning.py +13 -6
  31. alita_sdk/runtime/toolkits/tools.py +60 -25
  32. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  33. alita_sdk/runtime/tools/artifact.py +185 -23
  34. alita_sdk/runtime/tools/function.py +2 -1
  35. alita_sdk/runtime/tools/llm.py +155 -34
  36. alita_sdk/runtime/tools/mcp_remote_tool.py +25 -10
  37. alita_sdk/runtime/tools/mcp_server_tool.py +2 -4
  38. alita_sdk/runtime/tools/vectorstore_base.py +3 -3
  39. alita_sdk/runtime/utils/AlitaCallback.py +136 -21
  40. alita_sdk/runtime/utils/mcp_client.py +492 -0
  41. alita_sdk/runtime/utils/mcp_oauth.py +125 -8
  42. alita_sdk/runtime/utils/mcp_sse_client.py +35 -6
  43. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  44. alita_sdk/runtime/utils/toolkit_utils.py +7 -13
  45. alita_sdk/runtime/utils/utils.py +2 -0
  46. alita_sdk/tools/__init__.py +15 -0
  47. alita_sdk/tools/ado/repos/__init__.py +10 -12
  48. alita_sdk/tools/ado/test_plan/__init__.py +23 -8
  49. alita_sdk/tools/ado/wiki/__init__.py +24 -8
  50. alita_sdk/tools/ado/wiki/ado_wrapper.py +21 -7
  51. alita_sdk/tools/ado/work_item/__init__.py +24 -8
  52. alita_sdk/tools/advanced_jira_mining/__init__.py +10 -8
  53. alita_sdk/tools/aws/delta_lake/__init__.py +12 -9
  54. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  55. alita_sdk/tools/azure_ai/search/__init__.py +9 -7
  56. alita_sdk/tools/base/tool.py +5 -1
  57. alita_sdk/tools/base_indexer_toolkit.py +26 -1
  58. alita_sdk/tools/bitbucket/__init__.py +14 -10
  59. alita_sdk/tools/bitbucket/api_wrapper.py +50 -2
  60. alita_sdk/tools/browser/__init__.py +5 -4
  61. alita_sdk/tools/carrier/__init__.py +5 -6
  62. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  63. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +2 -0
  64. alita_sdk/tools/chunkers/universal_chunker.py +1 -0
  65. alita_sdk/tools/cloud/aws/__init__.py +9 -7
  66. alita_sdk/tools/cloud/azure/__init__.py +9 -7
  67. alita_sdk/tools/cloud/gcp/__init__.py +9 -7
  68. alita_sdk/tools/cloud/k8s/__init__.py +9 -7
  69. alita_sdk/tools/code/linter/__init__.py +9 -8
  70. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  71. alita_sdk/tools/code/sonar/__init__.py +9 -7
  72. alita_sdk/tools/confluence/__init__.py +15 -10
  73. alita_sdk/tools/confluence/api_wrapper.py +63 -14
  74. alita_sdk/tools/custom_open_api/__init__.py +11 -5
  75. alita_sdk/tools/elastic/__init__.py +10 -8
  76. alita_sdk/tools/elitea_base.py +387 -9
  77. alita_sdk/tools/figma/__init__.py +8 -7
  78. alita_sdk/tools/github/__init__.py +12 -14
  79. alita_sdk/tools/github/github_client.py +68 -2
  80. alita_sdk/tools/github/tool.py +5 -1
  81. alita_sdk/tools/gitlab/__init__.py +14 -11
  82. alita_sdk/tools/gitlab/api_wrapper.py +81 -1
  83. alita_sdk/tools/gitlab_org/__init__.py +9 -8
  84. alita_sdk/tools/google/bigquery/__init__.py +12 -12
  85. alita_sdk/tools/google/bigquery/tool.py +5 -1
  86. alita_sdk/tools/google_places/__init__.py +9 -8
  87. alita_sdk/tools/jira/__init__.py +15 -10
  88. alita_sdk/tools/keycloak/__init__.py +10 -8
  89. alita_sdk/tools/localgit/__init__.py +8 -3
  90. alita_sdk/tools/localgit/local_git.py +62 -54
  91. alita_sdk/tools/localgit/tool.py +5 -1
  92. alita_sdk/tools/memory/__init__.py +11 -3
  93. alita_sdk/tools/ocr/__init__.py +10 -8
  94. alita_sdk/tools/openapi/__init__.py +6 -2
  95. alita_sdk/tools/pandas/__init__.py +9 -7
  96. alita_sdk/tools/postman/__init__.py +10 -11
  97. alita_sdk/tools/pptx/__init__.py +9 -9
  98. alita_sdk/tools/qtest/__init__.py +9 -8
  99. alita_sdk/tools/rally/__init__.py +9 -8
  100. alita_sdk/tools/report_portal/__init__.py +11 -9
  101. alita_sdk/tools/salesforce/__init__.py +9 -9
  102. alita_sdk/tools/servicenow/__init__.py +10 -8
  103. alita_sdk/tools/sharepoint/__init__.py +9 -8
  104. alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
  105. alita_sdk/tools/slack/__init__.py +8 -7
  106. alita_sdk/tools/sql/__init__.py +9 -8
  107. alita_sdk/tools/testio/__init__.py +9 -8
  108. alita_sdk/tools/testrail/__init__.py +10 -8
  109. alita_sdk/tools/utils/__init__.py +9 -4
  110. alita_sdk/tools/utils/text_operations.py +254 -0
  111. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +16 -18
  112. alita_sdk/tools/xray/__init__.py +10 -8
  113. alita_sdk/tools/yagmail/__init__.py +8 -3
  114. alita_sdk/tools/zephyr/__init__.py +8 -7
  115. alita_sdk/tools/zephyr_enterprise/__init__.py +10 -8
  116. alita_sdk/tools/zephyr_essential/__init__.py +9 -8
  117. alita_sdk/tools/zephyr_scale/__init__.py +9 -8
  118. alita_sdk/tools/zephyr_squad/__init__.py +9 -8
  119. {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/METADATA +1 -1
  120. {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/RECORD +124 -119
  121. {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/WHEEL +0 -0
  122. {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/entry_points.txt +0 -0
  123. {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/licenses/LICENSE +0 -0
  124. {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/top_level.txt +0 -0
@@ -21,14 +21,16 @@ from openpyxl import load_workbook
21
21
  from xlrd import open_workbook
22
22
  from langchain_core.documents import Document
23
23
  from .AlitaTableLoader import AlitaTableLoader
24
+ from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
24
25
 
25
26
  cell_delimiter = " | "
26
27
 
27
28
  class AlitaExcelLoader(AlitaTableLoader):
28
- excel_by_sheets: bool = False
29
29
  sheet_name: str = None
30
- return_type: str = 'str'
31
30
  file_name: str = None
31
+ max_tokens: int = LOADER_MAX_TOKENS_DEFAULT
32
+ add_header_to_chunks: bool = False
33
+ header_row_number: int = 1
32
34
 
33
35
  def __init__(self, **kwargs):
34
36
  if not kwargs.get('file_path'):
@@ -39,9 +41,22 @@ class AlitaExcelLoader(AlitaTableLoader):
39
41
  else:
40
42
  self.file_name = kwargs.get('file_path')
41
43
  super().__init__(**kwargs)
42
- self.excel_by_sheets = kwargs.get('excel_by_sheets')
43
- self.return_type = kwargs.get('return_type')
44
44
  self.sheet_name = kwargs.get('sheet_name')
45
+ # Set and validate chunking parameters only once
46
+ self.max_tokens = int(kwargs.get('max_tokens', LOADER_MAX_TOKENS_DEFAULT))
47
+ self.add_header_to_chunks = bool(kwargs.get('add_header_to_chunks', False))
48
+ header_row_number = kwargs.get('header_row_number', 1)
49
+ # Validate header_row_number
50
+ try:
51
+ header_row_number = int(header_row_number)
52
+ if header_row_number > 0:
53
+ self.header_row_number = header_row_number
54
+ else:
55
+ self.header_row_number = 1
56
+ self.add_header_to_chunks = False
57
+ except (ValueError, TypeError):
58
+ self.header_row_number = 1
59
+ self.add_header_to_chunks = False
45
60
 
46
61
  def get_content(self):
47
62
  try:
@@ -64,59 +79,32 @@ class AlitaExcelLoader(AlitaTableLoader):
64
79
  Reads .xlsx files using openpyxl.
65
80
  """
66
81
  workbook = load_workbook(self.file_path, data_only=True) # `data_only=True` ensures we get cell values, not formulas
67
-
82
+ sheets = workbook.sheetnames
68
83
  if self.sheet_name:
69
- # If a specific sheet name is provided, parse only that sheet
70
- if self.sheet_name in workbook.sheetnames:
84
+ if self.sheet_name in sheets:
71
85
  sheet_content = self.parse_sheet(workbook[self.sheet_name])
72
- return sheet_content
73
86
  else:
74
- raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
75
- elif self.excel_by_sheets:
76
- # Parse each sheet individually and return as a dictionary
77
- result = {}
78
- for sheet_name in workbook.sheetnames:
79
- sheet_content = self.parse_sheet(workbook[sheet_name])
80
- result[sheet_name] = sheet_content
81
- return result
87
+ sheet_content = [f"Sheet '{self.sheet_name}' does not exist in the workbook."]
88
+ return {self.sheet_name: sheet_content}
82
89
  else:
83
- # Combine all sheets into a single string result
84
- result = []
85
- for sheet_name in workbook.sheetnames:
86
- sheet_content = self.parse_sheet(workbook[sheet_name])
87
- result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
88
- return "\n\n".join(result)
90
+ # Dictionary comprehension for all sheets
91
+ return {name: self.parse_sheet(workbook[name]) for name in sheets}
89
92
 
90
93
  def _read_xls(self):
91
94
  """
92
95
  Reads .xls files using xlrd.
93
96
  """
94
97
  workbook = open_workbook(filename=self.file_name, file_contents=self.file_content)
95
-
98
+ sheets = workbook.sheet_names()
96
99
  if self.sheet_name:
97
- # If a specific sheet name is provided, parse only that sheet
98
- if self.sheet_name in workbook.sheet_names():
100
+ if self.sheet_name in sheets:
99
101
  sheet = workbook.sheet_by_name(self.sheet_name)
100
- sheet_content = self.parse_sheet_xls(sheet)
101
- return sheet_content
102
+ return {self.sheet_name: self.parse_sheet_xls(sheet)}
102
103
  else:
103
- raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
104
- elif self.excel_by_sheets:
105
- # Parse each sheet individually and return as a dictionary
106
- result = {}
107
- for sheet_name in workbook.sheet_names():
108
- sheet = workbook.sheet_by_name(sheet_name)
109
- sheet_content = self.parse_sheet_xls(sheet)
110
- result[sheet_name] = sheet_content
111
- return result
104
+ return {self.sheet_name: [f"Sheet '{self.sheet_name}' does not exist in the workbook."]}
112
105
  else:
113
- # Combine all sheets into a single string result
114
- result = []
115
- for sheet_name in workbook.sheet_names():
116
- sheet = workbook.sheet_by_name(sheet_name)
117
- sheet_content = self.parse_sheet_xls(sheet)
118
- result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
119
- return "\n\n".join(result)
106
+ # Dictionary comprehension for all sheets
107
+ return {name: self.parse_sheet_xls(workbook.sheet_by_name(name)) for name in sheets}
120
108
 
121
109
  def parse_sheet(self, sheet):
122
110
  """
@@ -170,34 +158,89 @@ class AlitaExcelLoader(AlitaTableLoader):
170
158
  # Format the sheet content based on the return type
171
159
  return self._format_sheet_content(sheet_content)
172
160
 
173
- def _format_sheet_content(self, sheet_content):
161
+ def _format_sheet_content(self, rows):
174
162
  """
175
- Formats the sheet content based on the return type.
163
+ Specification:
164
+ Formats a list of sheet rows into a list of string chunks according to the following rules:
165
+ 1. If max_tokens < 1, returns a single chunk (list of one string) with all rows joined by a newline ('\n').
166
+ - If add_header_to_chunks is True and header_row_number is valid, the specified header row is prepended as the first line.
167
+ 2. If max_tokens >= 1:
168
+ a. Each chunk is a string containing one or more rows, separated by newlines ('\n'), such that the total token count (as measured by tiktoken) does not exceed max_tokens.
169
+ b. If add_header_to_chunks is True and header_row_number is valid, the specified header row is prepended once at the top of each chunk (not before every row).
170
+ c. If a single row exceeds max_tokens, it is placed in its own chunk without splitting, with the header prepended if applicable.
171
+ 3. Returns: List[str], where each string is a chunk ready for further processing.
176
172
  """
177
- if self.return_type == 'dict':
178
- # Convert to a list of dictionaries (each row is a dictionary)
179
- headers = sheet_content[0].split(cell_delimiter) if sheet_content else []
180
- data_rows = sheet_content[1:] if len(sheet_content) > 1 else []
181
- return [dict(zip(headers, row.split(cell_delimiter))) for row in data_rows]
182
- elif self.return_type == 'csv':
183
- # Return as CSV (newline-separated rows, comma-separated values)
184
- return "\n".join([",".join(row.split(cell_delimiter)) for row in sheet_content])
185
- else:
186
- # Default: Return as plain text (newline-separated rows, pipe-separated values)
187
- return "\n".join(sheet_content)
173
+ import tiktoken
174
+ encoding = tiktoken.get_encoding('cl100k_base')
175
+
176
+ # --- Inner functions ---
177
+ def count_tokens(text):
178
+ """Count tokens in text using tiktoken encoding."""
179
+ return len(encoding.encode(text))
180
+
181
+ def finalize_chunk(chunk_rows):
182
+ """Join rows for a chunk, prepending header if needed."""
183
+ if self.add_header_to_chunks and header:
184
+ return '\n'.join([header] + chunk_rows)
185
+ else:
186
+ return '\n'.join(chunk_rows)
187
+ # --- End inner functions ---
188
+
189
+ # If max_tokens < 1, return all rows as a single chunk
190
+ if self.max_tokens < 1:
191
+ return ['\n'.join(rows)]
192
+
193
+ # Extract header if needed
194
+ header = None
195
+ if self.add_header_to_chunks and rows:
196
+ header_idx = self.header_row_number - 1
197
+ header = rows.pop(header_idx)
198
+
199
+ chunks = [] # List to store final chunks
200
+ current_chunk = [] # Accumulate rows for the current chunk
201
+ current_tokens = 0 # Token count for the current chunk
202
+
203
+ for row in rows:
204
+ row_tokens = count_tokens(row)
205
+ # If row itself exceeds max_tokens, flush current chunk and add row as its own chunk (with header if needed)
206
+ if row_tokens > self.max_tokens:
207
+ if current_chunk:
208
+ chunks.append(finalize_chunk(current_chunk))
209
+ current_chunk = []
210
+ current_tokens = 0
211
+ # Add the large row as its own chunk, with header if needed
212
+ if self.add_header_to_chunks and header:
213
+ chunks.append(finalize_chunk([row]))
214
+ else:
215
+ chunks.append(row)
216
+ continue
217
+ # If adding row would exceed max_tokens, flush current chunk and start new
218
+ if current_tokens + row_tokens > self.max_tokens:
219
+ if current_chunk:
220
+ chunks.append(finalize_chunk(current_chunk))
221
+ current_chunk = [row]
222
+ current_tokens = row_tokens
223
+ else:
224
+ current_chunk.append(row)
225
+ current_tokens += row_tokens
226
+ # Add any remaining rows as the last chunk
227
+ if current_chunk:
228
+ chunks.append(finalize_chunk(current_chunk))
229
+ return chunks
188
230
 
189
231
  def load(self) -> list:
190
232
  docs = []
191
233
  content_per_sheet = self.get_content()
192
- for sheet_name, content in content_per_sheet.items():
234
+ # content_per_sheet is a dict of sheet_name: list of chunk strings
235
+ for sheet_name, content_chunks in content_per_sheet.items():
193
236
  metadata = {
194
237
  "source": f'{self.file_path}:{sheet_name}',
195
238
  "sheet_name": sheet_name,
196
239
  "file_type": "excel",
197
- "excel_by_sheets": self.excel_by_sheets,
198
- "return_type": self.return_type,
199
240
  }
200
- docs.append(Document(page_content=f"Sheet: {sheet_name}\n {str(content)}", metadata=metadata))
241
+ # Each chunk is a separate Document
242
+ for chunk in content_chunks:
243
+ docs.append(Document(page_content=chunk, metadata=metadata))
201
244
  return docs
202
245
 
203
246
  def read(self, lazy: bool = False):
@@ -0,0 +1,77 @@
1
+ from .AlitaJSONLoader import AlitaJSONLoader
2
+ import json
3
+ from io import StringIO
4
+ from typing import List, Iterator
5
+
6
+ from langchain_core.documents import Document
7
+ from langchain_core.tools import ToolException
8
+
9
+
10
+ class AlitaJSONLinesLoader(AlitaJSONLoader):
11
+ """Load local JSONL files (one JSON object per line) using AlitaJSONLoader behavior.
12
+
13
+ Behavior:
14
+ - Supports both `file_path` and `file_content` (bytes or file-like object), same as AlitaJSONLoader.
15
+ - Treats each non-empty line as an independent JSON object.
16
+ - Aggregates all parsed JSON objects into a list and feeds them through the same
17
+ RecursiveJsonSplitter-based chunking used by AlitaJSONLoader.lazy_load.
18
+ - Returns a list of Documents with chunked JSON content.
19
+ """
20
+
21
+ def __init__(self, **kwargs):
22
+ # Reuse AlitaJSONLoader initialization logic (file_path / file_content handling, encoding, etc.)
23
+ super().__init__(**kwargs)
24
+
25
+ def _iter_lines(self) -> Iterator[str]:
26
+ """Yield lines from file_path or file_content, mirroring AlitaJSONLoader sources."""
27
+ # Prefer file_path if available
28
+ if hasattr(self, "file_path") and self.file_path:
29
+ with open(self.file_path, "r", encoding=self.encoding) as f:
30
+ for line in f:
31
+ yield line
32
+ # Fallback to file_content if available
33
+ elif hasattr(self, "file_content") and self.file_content:
34
+ # file_content may be bytes or a file-like object
35
+ if isinstance(self.file_content, (bytes, bytearray)):
36
+ text = self.file_content.decode(self.encoding)
37
+ for line in StringIO(text):
38
+ yield line
39
+ else:
40
+ # Assume it's a text file-like object positioned at the beginning
41
+ self.file_content.seek(0)
42
+ for line in self.file_content:
43
+ yield line
44
+ else:
45
+ raise ToolException("'file_path' or 'file_content' parameter should be provided.")
46
+
47
+ def load(self) -> List[Document]: # type: ignore[override]
48
+ """Load JSONL content by delegating each non-empty line to AlitaJSONLoader.
49
+
50
+ For each non-empty line in the underlying source (file_path or file_content):
51
+ - Create a temporary AlitaJSONLoader instance with that line as file_content.
52
+ - Call lazy_load() on that instance to apply the same RecursiveJsonSplitter logic
53
+ as for a normal JSON file.
54
+ - Accumulate all Documents from all lines and return them as a single list.
55
+ """
56
+ docs: List[Document] = []
57
+
58
+ for raw_line in self._iter_lines():
59
+ line = raw_line.strip()
60
+ if not line:
61
+ continue
62
+ try:
63
+ # Instantiate a per-line AlitaJSONLoader using the same configuration
64
+ line_loader = AlitaJSONLoader(
65
+ file_content=line,
66
+ file_name=getattr(self, "file_name", str(getattr(self, "file_path", "no_name"))),
67
+ encoding=self.encoding,
68
+ autodetect_encoding=self.autodetect_encoding,
69
+ max_tokens=self.max_tokens,
70
+ )
71
+
72
+ for doc in line_loader.lazy_load():
73
+ docs.append(doc)
74
+ except Exception as e:
75
+ raise ToolException(f"Error processing JSONL line: {line[:100]}... Error: {e}") from e
76
+
77
+ return docs
@@ -32,6 +32,8 @@ class AlitaJSONLoader(BaseLoader):
32
32
  elif hasattr(self, 'file_content') and self.file_content:
33
33
  if isinstance(self.file_content, bytes):
34
34
  return json.loads(self.file_content.decode(self.encoding))
35
+ elif isinstance(self.file_content, str):
36
+ return json.loads(self.file_content)
35
37
  else:
36
38
  return json.load(self.file_content)
37
39
  else:
@@ -45,7 +47,6 @@ class AlitaJSONLoader(BaseLoader):
45
47
  try:
46
48
  with open(self.file_path, encoding=encoding.encoding) as f:
47
49
  return f.read()
48
- break
49
50
  except UnicodeDecodeError:
50
51
  continue
51
52
  elif hasattr(self, 'file_content') and self.file_content:
@@ -21,12 +21,14 @@ from .AlitaDocxMammothLoader import AlitaDocxMammothLoader
21
21
  from .AlitaExcelLoader import AlitaExcelLoader
22
22
  from .AlitaImageLoader import AlitaImageLoader
23
23
  from .AlitaJSONLoader import AlitaJSONLoader
24
+ from .AlitaJSONLinesLoader import AlitaJSONLinesLoader
24
25
  from .AlitaPDFLoader import AlitaPDFLoader
25
26
  from .AlitaPowerPointLoader import AlitaPowerPointLoader
26
27
  from .AlitaTextLoader import AlitaTextLoader
27
28
  from .AlitaMarkdownLoader import AlitaMarkdownLoader
28
29
  from .AlitaPythonLoader import AlitaPythonLoader
29
30
  from enum import Enum
31
+ from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
30
32
 
31
33
 
32
34
  class LoaderProperties(Enum):
@@ -34,7 +36,7 @@ class LoaderProperties(Enum):
34
36
  PROMPT_DEFAULT = 'use_default_prompt'
35
37
  PROMPT = 'prompt'
36
38
 
37
- DEFAULT_ALLOWED_BASE = {'max_tokens': 512}
39
+ DEFAULT_ALLOWED_BASE = {'max_tokens': LOADER_MAX_TOKENS_DEFAULT}
38
40
 
39
41
  DEFAULT_ALLOWED_WITH_LLM = {
40
42
  **DEFAULT_ALLOWED_BASE,
@@ -43,6 +45,8 @@ DEFAULT_ALLOWED_WITH_LLM = {
43
45
  LoaderProperties.PROMPT.value: "",
44
46
  }
45
47
 
48
+ DEFAULT_ALLOWED_EXCEL = {**DEFAULT_ALLOWED_WITH_LLM, 'add_header_to_chunks': False, 'header_row_number': 1, 'max_tokens': -1, 'sheet_name': ''}
49
+
46
50
  # Image file loaders mapping - directly supported by LLM with image_url
47
51
  image_loaders_map = {
48
52
  '.png': {
@@ -162,11 +166,12 @@ document_loaders_map = {
162
166
  'spreadsheetml.sheet'),
163
167
  'is_multimodal_processing': False,
164
168
  'kwargs': {
165
- 'excel_by_sheets': True,
166
- 'raw_content': True,
167
- 'cleanse': False
169
+ 'add_header_to_chunks': False,
170
+ 'header_row_number': 1,
171
+ 'max_tokens': -1,
172
+ 'sheet_name': ''
168
173
  },
169
- 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
174
+ 'allowed_to_override': DEFAULT_ALLOWED_EXCEL
170
175
  },
171
176
  '.xls': {
172
177
  'class': AlitaExcelLoader,
@@ -177,7 +182,7 @@ document_loaders_map = {
177
182
  'raw_content': True,
178
183
  'cleanse': False
179
184
  },
180
- 'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
185
+ 'allowed_to_override': DEFAULT_ALLOWED_EXCEL
181
186
  },
182
187
  '.pdf': {
183
188
  'class': AlitaPDFLoader,
@@ -204,7 +209,7 @@ document_loaders_map = {
204
209
  'allowed_to_override': DEFAULT_ALLOWED_BASE
205
210
  },
206
211
  '.jsonl': {
207
- 'class': AirbyteJSONLoader,
212
+ 'class': AlitaJSONLinesLoader,
208
213
  'mime_type': 'application/jsonl',
209
214
  'is_multimodal_processing': False,
210
215
  'kwargs': {},
@@ -30,7 +30,7 @@ from ..tools.loop import LoopNode
30
30
  from ..tools.loop_output import LoopToolNode
31
31
  from ..tools.tool import ToolNode
32
32
  from ..utils.evaluate import EvaluateTemplate
33
- from ..utils.utils import clean_string, TOOLKIT_SPLITTER
33
+ from ..utils.utils import clean_string
34
34
  from ..tools.router import RouterNode
35
35
 
36
36
  logger = logging.getLogger(__name__)
@@ -191,7 +191,7 @@ Answer only with step name, no need to add descrip in case none of the steps are
191
191
  additional_info = """### Additoinal info: """
192
192
  additional_info += "{field}: {value}\n".format(field=field, value=state.get(field, ""))
193
193
  decision_input.append(HumanMessage(
194
- self.prompt.format(steps=self.steps, description=self.description, additional_info=additional_info)))
194
+ self.prompt.format(steps=self.steps, description=safe_format(self.description, state), additional_info=additional_info)))
195
195
  completion = self.client.invoke(decision_input)
196
196
  result = clean_string(completion.content.strip())
197
197
  logger.info(f"Plan to transition to: {result}")
@@ -483,8 +483,7 @@ def create_graph(
483
483
  node_id = clean_string(node['id'])
484
484
  toolkit_name = node.get('toolkit_name')
485
485
  tool_name = clean_string(node.get('tool', node_id))
486
- if toolkit_name:
487
- tool_name = f"{clean_string(toolkit_name)}{TOOLKIT_SPLITTER}{tool_name}"
486
+ # Tool names are now clean (no prefix needed)
488
487
  logger.info(f"Node: {node_id} : {node_type} - {tool_name}")
489
488
  if node_type in ['function', 'toolkit', 'mcp', 'tool', 'loop', 'loop_from_tool', 'indexer', 'subgraph', 'pipeline', 'agent']:
490
489
  if node_type == 'mcp' and tool_name not in [tool.name for tool in tools]:
@@ -550,8 +549,8 @@ def create_graph(
550
549
  loop_toolkit_name = node.get('loop_toolkit_name')
551
550
  loop_tool_name = node.get('loop_tool')
552
551
  if (loop_toolkit_name and loop_tool_name) or loop_tool_name:
553
- loop_tool_name = f"{clean_string(loop_toolkit_name)}{TOOLKIT_SPLITTER}{loop_tool_name}" if loop_toolkit_name else clean_string(
554
- loop_tool_name)
552
+ # Use clean tool name (no prefix)
553
+ loop_tool_name = clean_string(loop_tool_name)
555
554
  for t in tools:
556
555
  if t.name == loop_tool_name:
557
556
  logger.debug(f"Loop tool discovered: {t}")
@@ -609,10 +608,10 @@ def create_graph(
609
608
  tool_names = []
610
609
  if isinstance(connected_tools, dict):
611
610
  for toolkit, selected_tools in connected_tools.items():
612
- for tool in selected_tools:
613
- tool_names.append(f"{toolkit}{TOOLKIT_SPLITTER}{tool}")
611
+ # Add tool names directly (no prefix)
612
+ tool_names.extend(selected_tools)
614
613
  elif isinstance(connected_tools, list):
615
- # for cases when tools are provided as a list of names with already bound toolkit_name
614
+ # Use provided tool names as-is
616
615
  tool_names = connected_tools
617
616
 
618
617
  if tool_names:
@@ -635,6 +634,7 @@ def create_graph(
635
634
  output_variables=output_vars,
636
635
  input_variables=node.get('input', ['messages']),
637
636
  structured_output=node.get('structured_output', False),
637
+ tool_execution_timeout=node.get('tool_execution_timeout', 900),
638
638
  available_tools=available_tools,
639
639
  tool_names=tool_names,
640
640
  steps_limit=kwargs.get('steps_limit', 25)
@@ -1010,7 +1010,7 @@ class LangGraphAgentRunnable(CompiledStateGraph):
1010
1010
  thread_id: str,
1011
1011
  current_recursion_limit: int,
1012
1012
  ) -> dict:
1013
- """Handle GraphRecursionError by returning a soft\-boundary response."""
1013
+ """Handle GraphRecursionError by returning a soft-boundary response."""
1014
1014
  config_state = self.get_state(config)
1015
1015
  is_execution_finished = False
1016
1016
 
@@ -208,7 +208,12 @@ def safe_format(template, mapping):
208
208
  def create_pydantic_model(model_name: str, variables: dict[str, dict]):
209
209
  fields = {}
210
210
  for var_name, var_data in variables.items():
211
- fields[var_name] = (parse_pydantic_type(var_data['type']), Field(description=var_data.get('description', None)))
211
+ if 'default' in var_data:
212
+ # allow user to define if it is required or not
213
+ fields[var_name] = (parse_pydantic_type(var_data['type']),
214
+ Field(description=var_data.get('description', None), default=var_data.get('default')))
215
+ else:
216
+ fields[var_name] = (parse_pydantic_type(var_data['type']), Field(description=var_data.get('description', None)))
212
217
  return create_model(model_name, **fields)
213
218
 
214
219
  def parse_pydantic_type(type_name: str):
@@ -1,6 +1,7 @@
1
1
  from typing import List, Any, Literal, Optional
2
2
 
3
- from alita_sdk.tools.utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
3
+ from alita_sdk.tools.utils import clean_string, get_max_toolkit_length
4
+ from alita_sdk.tools.elitea_base import filter_missconfigured_index_tools
4
5
  from langchain_community.agent_toolkits.base import BaseToolkit
5
6
  from langchain_core.tools import BaseTool
6
7
  from pydantic import create_model, BaseModel, ConfigDict, Field
@@ -40,26 +41,34 @@ class ArtifactToolkit(BaseToolkit):
40
41
  )
41
42
 
42
43
  @classmethod
44
+ @filter_missconfigured_index_tools
43
45
  def get_toolkit(cls, client: Any, bucket: str, toolkit_name: Optional[str] = None, selected_tools: list[str] = [], **kwargs):
44
46
  if selected_tools is None:
45
47
  selected_tools = []
48
+
46
49
  tools = []
47
50
  wrapper_payload = {
48
51
  **kwargs,
49
52
  **(kwargs.get('pgvector_configuration') or {}),
50
53
  }
51
54
  artifact_wrapper = ArtifactWrapper(alita=client, bucket=bucket, **wrapper_payload)
52
- prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
55
+ # Use clean toolkit name for context (max 1000 chars in description)
56
+ toolkit_context = f" [Toolkit: {clean_string(toolkit_name, 0)}]" if toolkit_name else ''
53
57
  available_tools = artifact_wrapper.get_available_tools()
54
58
  for tool in available_tools:
55
59
  if selected_tools:
56
60
  if tool["name"] not in selected_tools:
57
61
  continue
62
+ # Add toolkit context to description with character limit
63
+ description = tool["description"]
64
+ if toolkit_context and len(description + toolkit_context) <= 1000:
65
+ description = description + toolkit_context
58
66
  tools.append(BaseAction(
59
67
  api_wrapper=artifact_wrapper,
60
- name=prefix + tool["name"],
61
- description=tool["description"],
62
- args_schema=tool["args_schema"]
68
+ name=tool["name"],
69
+ description=description,
70
+ args_schema=tool["args_schema"],
71
+ metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
63
72
  ))
64
73
  return cls(tools=tools)
65
74
 
@@ -3,7 +3,7 @@ from pydantic import create_model, BaseModel, Field
3
3
  from langchain_community.agent_toolkits.base import BaseToolkit
4
4
  from langchain_core.tools import BaseTool, ToolException
5
5
  from ..tools.datasource import DatasourcePredict, DatasourceSearch, datasourceToolSchema
6
- from alita_sdk.tools.utils import clean_string, TOOLKIT_SPLITTER
6
+ from alita_sdk.tools.utils import clean_string
7
7
 
8
8
 
9
9
  class DatasourcesToolkit(BaseToolkit):
@@ -21,21 +21,28 @@ class DatasourcesToolkit(BaseToolkit):
21
21
  @classmethod
22
22
  def get_toolkit(cls, client: Any, datasource_ids: list[int], toolkit_name: Optional[str] = None, selected_tools: list[str] = []):
23
23
  tools = []
24
- prefix = clean_string(toolkit_name) + TOOLKIT_SPLITTER if toolkit_name else ''
24
+ # Use clean toolkit name for context (max 1000 chars in description)
25
+ toolkit_context = f" [Toolkit: {clean_string(toolkit_name)}]" if toolkit_name else ''
25
26
  for datasource_id in datasource_ids:
26
27
  datasource = client.datasource(datasource_id)
27
28
  ds_name = clean_string(datasource.name)
28
29
  if len(ds_name) == 0:
29
30
  raise ToolException(f'Datasource with id {datasource_id} has incorrect name (i.e. special characters, etc.)')
30
31
  if len(selected_tools) == 0 or 'chat' in selected_tools:
31
- tools.append(DatasourcePredict(name=f'{prefix}chat',
32
- description=f'Search and summarize. {datasource.description}',
32
+ description = f'Search and summarize. {datasource.description}'
33
+ if toolkit_context and len(description + toolkit_context) <= 1000:
34
+ description = description + toolkit_context
35
+ tools.append(DatasourcePredict(name=f'chat',
36
+ description=description,
33
37
  datasource=datasource,
34
38
  args_schema=datasourceToolSchema,
35
39
  return_type='str'))
36
40
  if len(selected_tools) == 0 or 'search' in selected_tools:
37
- tools.append(DatasourceSearch(name=f'{prefix}search',
38
- description=f'Search return results. {datasource.description}',
41
+ description = f'Search return results. {datasource.description}'
42
+ if toolkit_context and len(description + toolkit_context) <= 1000:
43
+ description = description + toolkit_context
44
+ tools.append(DatasourceSearch(name=f'search',
45
+ description=description,
39
46
  datasource=datasource,
40
47
  args_schema=datasourceToolSchema,
41
48
  return_type='str'))