alita-sdk 0.3.486__py3-none-any.whl → 0.3.515__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/agent_loader.py +27 -6
- alita_sdk/cli/agents.py +10 -1
- alita_sdk/cli/inventory.py +12 -195
- alita_sdk/cli/tools/filesystem.py +95 -9
- alita_sdk/community/inventory/__init__.py +12 -0
- alita_sdk/community/inventory/toolkit.py +9 -5
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/configurations/ado.py +144 -0
- alita_sdk/configurations/confluence.py +76 -42
- alita_sdk/configurations/figma.py +76 -0
- alita_sdk/configurations/gitlab.py +2 -0
- alita_sdk/configurations/qtest.py +72 -1
- alita_sdk/configurations/report_portal.py +96 -0
- alita_sdk/configurations/sharepoint.py +148 -0
- alita_sdk/configurations/testio.py +83 -0
- alita_sdk/runtime/clients/artifact.py +2 -2
- alita_sdk/runtime/clients/client.py +64 -40
- alita_sdk/runtime/clients/sandbox_client.py +14 -0
- alita_sdk/runtime/langchain/assistant.py +48 -2
- alita_sdk/runtime/langchain/constants.py +3 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +2 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +12 -7
- alita_sdk/runtime/langchain/langraph_agent.py +10 -10
- alita_sdk/runtime/langchain/utils.py +6 -1
- alita_sdk/runtime/toolkits/artifact.py +14 -5
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +94 -219
- alita_sdk/runtime/toolkits/planning.py +13 -6
- alita_sdk/runtime/toolkits/tools.py +60 -25
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/artifact.py +185 -23
- alita_sdk/runtime/tools/function.py +2 -1
- alita_sdk/runtime/tools/llm.py +155 -34
- alita_sdk/runtime/tools/mcp_remote_tool.py +25 -10
- alita_sdk/runtime/tools/mcp_server_tool.py +2 -4
- alita_sdk/runtime/tools/vectorstore_base.py +3 -3
- alita_sdk/runtime/utils/AlitaCallback.py +136 -21
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +125 -8
- alita_sdk/runtime/utils/mcp_sse_client.py +35 -6
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/toolkit_utils.py +7 -13
- alita_sdk/runtime/utils/utils.py +2 -0
- alita_sdk/tools/__init__.py +15 -0
- alita_sdk/tools/ado/repos/__init__.py +10 -12
- alita_sdk/tools/ado/test_plan/__init__.py +23 -8
- alita_sdk/tools/ado/wiki/__init__.py +24 -8
- alita_sdk/tools/ado/wiki/ado_wrapper.py +21 -7
- alita_sdk/tools/ado/work_item/__init__.py +24 -8
- alita_sdk/tools/advanced_jira_mining/__init__.py +10 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +12 -9
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +9 -7
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +26 -1
- alita_sdk/tools/bitbucket/__init__.py +14 -10
- alita_sdk/tools/bitbucket/api_wrapper.py +50 -2
- alita_sdk/tools/browser/__init__.py +5 -4
- alita_sdk/tools/carrier/__init__.py +5 -6
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +2 -0
- alita_sdk/tools/chunkers/universal_chunker.py +1 -0
- alita_sdk/tools/cloud/aws/__init__.py +9 -7
- alita_sdk/tools/cloud/azure/__init__.py +9 -7
- alita_sdk/tools/cloud/gcp/__init__.py +9 -7
- alita_sdk/tools/cloud/k8s/__init__.py +9 -7
- alita_sdk/tools/code/linter/__init__.py +9 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +9 -7
- alita_sdk/tools/confluence/__init__.py +15 -10
- alita_sdk/tools/confluence/api_wrapper.py +63 -14
- alita_sdk/tools/custom_open_api/__init__.py +11 -5
- alita_sdk/tools/elastic/__init__.py +10 -8
- alita_sdk/tools/elitea_base.py +387 -9
- alita_sdk/tools/figma/__init__.py +8 -7
- alita_sdk/tools/github/__init__.py +12 -14
- alita_sdk/tools/github/github_client.py +68 -2
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/gitlab/__init__.py +14 -11
- alita_sdk/tools/gitlab/api_wrapper.py +81 -1
- alita_sdk/tools/gitlab_org/__init__.py +9 -8
- alita_sdk/tools/google/bigquery/__init__.py +12 -12
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +9 -8
- alita_sdk/tools/jira/__init__.py +15 -10
- alita_sdk/tools/keycloak/__init__.py +10 -8
- alita_sdk/tools/localgit/__init__.py +8 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +11 -3
- alita_sdk/tools/ocr/__init__.py +10 -8
- alita_sdk/tools/openapi/__init__.py +6 -2
- alita_sdk/tools/pandas/__init__.py +9 -7
- alita_sdk/tools/postman/__init__.py +10 -11
- alita_sdk/tools/pptx/__init__.py +9 -9
- alita_sdk/tools/qtest/__init__.py +9 -8
- alita_sdk/tools/rally/__init__.py +9 -8
- alita_sdk/tools/report_portal/__init__.py +11 -9
- alita_sdk/tools/salesforce/__init__.py +9 -9
- alita_sdk/tools/servicenow/__init__.py +10 -8
- alita_sdk/tools/sharepoint/__init__.py +9 -8
- alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
- alita_sdk/tools/slack/__init__.py +8 -7
- alita_sdk/tools/sql/__init__.py +9 -8
- alita_sdk/tools/testio/__init__.py +9 -8
- alita_sdk/tools/testrail/__init__.py +10 -8
- alita_sdk/tools/utils/__init__.py +9 -4
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +16 -18
- alita_sdk/tools/xray/__init__.py +10 -8
- alita_sdk/tools/yagmail/__init__.py +8 -3
- alita_sdk/tools/zephyr/__init__.py +8 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +10 -8
- alita_sdk/tools/zephyr_essential/__init__.py +9 -8
- alita_sdk/tools/zephyr_scale/__init__.py +9 -8
- alita_sdk/tools/zephyr_squad/__init__.py +9 -8
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/RECORD +124 -119
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/entry_points.txt +0 -0
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/top_level.txt +0 -0
|
@@ -21,14 +21,16 @@ from openpyxl import load_workbook
|
|
|
21
21
|
from xlrd import open_workbook
|
|
22
22
|
from langchain_core.documents import Document
|
|
23
23
|
from .AlitaTableLoader import AlitaTableLoader
|
|
24
|
+
from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
|
|
24
25
|
|
|
25
26
|
cell_delimiter = " | "
|
|
26
27
|
|
|
27
28
|
class AlitaExcelLoader(AlitaTableLoader):
|
|
28
|
-
excel_by_sheets: bool = False
|
|
29
29
|
sheet_name: str = None
|
|
30
|
-
return_type: str = 'str'
|
|
31
30
|
file_name: str = None
|
|
31
|
+
max_tokens: int = LOADER_MAX_TOKENS_DEFAULT
|
|
32
|
+
add_header_to_chunks: bool = False
|
|
33
|
+
header_row_number: int = 1
|
|
32
34
|
|
|
33
35
|
def __init__(self, **kwargs):
|
|
34
36
|
if not kwargs.get('file_path'):
|
|
@@ -39,9 +41,22 @@ class AlitaExcelLoader(AlitaTableLoader):
|
|
|
39
41
|
else:
|
|
40
42
|
self.file_name = kwargs.get('file_path')
|
|
41
43
|
super().__init__(**kwargs)
|
|
42
|
-
self.excel_by_sheets = kwargs.get('excel_by_sheets')
|
|
43
|
-
self.return_type = kwargs.get('return_type')
|
|
44
44
|
self.sheet_name = kwargs.get('sheet_name')
|
|
45
|
+
# Set and validate chunking parameters only once
|
|
46
|
+
self.max_tokens = int(kwargs.get('max_tokens', LOADER_MAX_TOKENS_DEFAULT))
|
|
47
|
+
self.add_header_to_chunks = bool(kwargs.get('add_header_to_chunks', False))
|
|
48
|
+
header_row_number = kwargs.get('header_row_number', 1)
|
|
49
|
+
# Validate header_row_number
|
|
50
|
+
try:
|
|
51
|
+
header_row_number = int(header_row_number)
|
|
52
|
+
if header_row_number > 0:
|
|
53
|
+
self.header_row_number = header_row_number
|
|
54
|
+
else:
|
|
55
|
+
self.header_row_number = 1
|
|
56
|
+
self.add_header_to_chunks = False
|
|
57
|
+
except (ValueError, TypeError):
|
|
58
|
+
self.header_row_number = 1
|
|
59
|
+
self.add_header_to_chunks = False
|
|
45
60
|
|
|
46
61
|
def get_content(self):
|
|
47
62
|
try:
|
|
@@ -64,59 +79,32 @@ class AlitaExcelLoader(AlitaTableLoader):
|
|
|
64
79
|
Reads .xlsx files using openpyxl.
|
|
65
80
|
"""
|
|
66
81
|
workbook = load_workbook(self.file_path, data_only=True) # `data_only=True` ensures we get cell values, not formulas
|
|
67
|
-
|
|
82
|
+
sheets = workbook.sheetnames
|
|
68
83
|
if self.sheet_name:
|
|
69
|
-
|
|
70
|
-
if self.sheet_name in workbook.sheetnames:
|
|
84
|
+
if self.sheet_name in sheets:
|
|
71
85
|
sheet_content = self.parse_sheet(workbook[self.sheet_name])
|
|
72
|
-
return sheet_content
|
|
73
86
|
else:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
# Parse each sheet individually and return as a dictionary
|
|
77
|
-
result = {}
|
|
78
|
-
for sheet_name in workbook.sheetnames:
|
|
79
|
-
sheet_content = self.parse_sheet(workbook[sheet_name])
|
|
80
|
-
result[sheet_name] = sheet_content
|
|
81
|
-
return result
|
|
87
|
+
sheet_content = [f"Sheet '{self.sheet_name}' does not exist in the workbook."]
|
|
88
|
+
return {self.sheet_name: sheet_content}
|
|
82
89
|
else:
|
|
83
|
-
#
|
|
84
|
-
|
|
85
|
-
for sheet_name in workbook.sheetnames:
|
|
86
|
-
sheet_content = self.parse_sheet(workbook[sheet_name])
|
|
87
|
-
result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
|
|
88
|
-
return "\n\n".join(result)
|
|
90
|
+
# Dictionary comprehension for all sheets
|
|
91
|
+
return {name: self.parse_sheet(workbook[name]) for name in sheets}
|
|
89
92
|
|
|
90
93
|
def _read_xls(self):
|
|
91
94
|
"""
|
|
92
95
|
Reads .xls files using xlrd.
|
|
93
96
|
"""
|
|
94
97
|
workbook = open_workbook(filename=self.file_name, file_contents=self.file_content)
|
|
95
|
-
|
|
98
|
+
sheets = workbook.sheet_names()
|
|
96
99
|
if self.sheet_name:
|
|
97
|
-
|
|
98
|
-
if self.sheet_name in workbook.sheet_names():
|
|
100
|
+
if self.sheet_name in sheets:
|
|
99
101
|
sheet = workbook.sheet_by_name(self.sheet_name)
|
|
100
|
-
|
|
101
|
-
return sheet_content
|
|
102
|
+
return {self.sheet_name: self.parse_sheet_xls(sheet)}
|
|
102
103
|
else:
|
|
103
|
-
|
|
104
|
-
elif self.excel_by_sheets:
|
|
105
|
-
# Parse each sheet individually and return as a dictionary
|
|
106
|
-
result = {}
|
|
107
|
-
for sheet_name in workbook.sheet_names():
|
|
108
|
-
sheet = workbook.sheet_by_name(sheet_name)
|
|
109
|
-
sheet_content = self.parse_sheet_xls(sheet)
|
|
110
|
-
result[sheet_name] = sheet_content
|
|
111
|
-
return result
|
|
104
|
+
return {self.sheet_name: [f"Sheet '{self.sheet_name}' does not exist in the workbook."]}
|
|
112
105
|
else:
|
|
113
|
-
#
|
|
114
|
-
|
|
115
|
-
for sheet_name in workbook.sheet_names():
|
|
116
|
-
sheet = workbook.sheet_by_name(sheet_name)
|
|
117
|
-
sheet_content = self.parse_sheet_xls(sheet)
|
|
118
|
-
result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
|
|
119
|
-
return "\n\n".join(result)
|
|
106
|
+
# Dictionary comprehension for all sheets
|
|
107
|
+
return {name: self.parse_sheet_xls(workbook.sheet_by_name(name)) for name in sheets}
|
|
120
108
|
|
|
121
109
|
def parse_sheet(self, sheet):
|
|
122
110
|
"""
|
|
@@ -170,34 +158,89 @@ class AlitaExcelLoader(AlitaTableLoader):
|
|
|
170
158
|
# Format the sheet content based on the return type
|
|
171
159
|
return self._format_sheet_content(sheet_content)
|
|
172
160
|
|
|
173
|
-
def _format_sheet_content(self,
|
|
161
|
+
def _format_sheet_content(self, rows):
|
|
174
162
|
"""
|
|
175
|
-
|
|
163
|
+
Specification:
|
|
164
|
+
Formats a list of sheet rows into a list of string chunks according to the following rules:
|
|
165
|
+
1. If max_tokens < 1, returns a single chunk (list of one string) with all rows joined by a newline ('\n').
|
|
166
|
+
- If add_header_to_chunks is True and header_row_number is valid, the specified header row is prepended as the first line.
|
|
167
|
+
2. If max_tokens >= 1:
|
|
168
|
+
a. Each chunk is a string containing one or more rows, separated by newlines ('\n'), such that the total token count (as measured by tiktoken) does not exceed max_tokens.
|
|
169
|
+
b. If add_header_to_chunks is True and header_row_number is valid, the specified header row is prepended once at the top of each chunk (not before every row).
|
|
170
|
+
c. If a single row exceeds max_tokens, it is placed in its own chunk without splitting, with the header prepended if applicable.
|
|
171
|
+
3. Returns: List[str], where each string is a chunk ready for further processing.
|
|
176
172
|
"""
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
173
|
+
import tiktoken
|
|
174
|
+
encoding = tiktoken.get_encoding('cl100k_base')
|
|
175
|
+
|
|
176
|
+
# --- Inner functions ---
|
|
177
|
+
def count_tokens(text):
|
|
178
|
+
"""Count tokens in text using tiktoken encoding."""
|
|
179
|
+
return len(encoding.encode(text))
|
|
180
|
+
|
|
181
|
+
def finalize_chunk(chunk_rows):
|
|
182
|
+
"""Join rows for a chunk, prepending header if needed."""
|
|
183
|
+
if self.add_header_to_chunks and header:
|
|
184
|
+
return '\n'.join([header] + chunk_rows)
|
|
185
|
+
else:
|
|
186
|
+
return '\n'.join(chunk_rows)
|
|
187
|
+
# --- End inner functions ---
|
|
188
|
+
|
|
189
|
+
# If max_tokens < 1, return all rows as a single chunk
|
|
190
|
+
if self.max_tokens < 1:
|
|
191
|
+
return ['\n'.join(rows)]
|
|
192
|
+
|
|
193
|
+
# Extract header if needed
|
|
194
|
+
header = None
|
|
195
|
+
if self.add_header_to_chunks and rows:
|
|
196
|
+
header_idx = self.header_row_number - 1
|
|
197
|
+
header = rows.pop(header_idx)
|
|
198
|
+
|
|
199
|
+
chunks = [] # List to store final chunks
|
|
200
|
+
current_chunk = [] # Accumulate rows for the current chunk
|
|
201
|
+
current_tokens = 0 # Token count for the current chunk
|
|
202
|
+
|
|
203
|
+
for row in rows:
|
|
204
|
+
row_tokens = count_tokens(row)
|
|
205
|
+
# If row itself exceeds max_tokens, flush current chunk and add row as its own chunk (with header if needed)
|
|
206
|
+
if row_tokens > self.max_tokens:
|
|
207
|
+
if current_chunk:
|
|
208
|
+
chunks.append(finalize_chunk(current_chunk))
|
|
209
|
+
current_chunk = []
|
|
210
|
+
current_tokens = 0
|
|
211
|
+
# Add the large row as its own chunk, with header if needed
|
|
212
|
+
if self.add_header_to_chunks and header:
|
|
213
|
+
chunks.append(finalize_chunk([row]))
|
|
214
|
+
else:
|
|
215
|
+
chunks.append(row)
|
|
216
|
+
continue
|
|
217
|
+
# If adding row would exceed max_tokens, flush current chunk and start new
|
|
218
|
+
if current_tokens + row_tokens > self.max_tokens:
|
|
219
|
+
if current_chunk:
|
|
220
|
+
chunks.append(finalize_chunk(current_chunk))
|
|
221
|
+
current_chunk = [row]
|
|
222
|
+
current_tokens = row_tokens
|
|
223
|
+
else:
|
|
224
|
+
current_chunk.append(row)
|
|
225
|
+
current_tokens += row_tokens
|
|
226
|
+
# Add any remaining rows as the last chunk
|
|
227
|
+
if current_chunk:
|
|
228
|
+
chunks.append(finalize_chunk(current_chunk))
|
|
229
|
+
return chunks
|
|
188
230
|
|
|
189
231
|
def load(self) -> list:
|
|
190
232
|
docs = []
|
|
191
233
|
content_per_sheet = self.get_content()
|
|
192
|
-
|
|
234
|
+
# content_per_sheet is a dict of sheet_name: list of chunk strings
|
|
235
|
+
for sheet_name, content_chunks in content_per_sheet.items():
|
|
193
236
|
metadata = {
|
|
194
237
|
"source": f'{self.file_path}:{sheet_name}',
|
|
195
238
|
"sheet_name": sheet_name,
|
|
196
239
|
"file_type": "excel",
|
|
197
|
-
"excel_by_sheets": self.excel_by_sheets,
|
|
198
|
-
"return_type": self.return_type,
|
|
199
240
|
}
|
|
200
|
-
|
|
241
|
+
# Each chunk is a separate Document
|
|
242
|
+
for chunk in content_chunks:
|
|
243
|
+
docs.append(Document(page_content=chunk, metadata=metadata))
|
|
201
244
|
return docs
|
|
202
245
|
|
|
203
246
|
def read(self, lazy: bool = False):
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from .AlitaJSONLoader import AlitaJSONLoader
|
|
2
|
+
import json
|
|
3
|
+
from io import StringIO
|
|
4
|
+
from typing import List, Iterator
|
|
5
|
+
|
|
6
|
+
from langchain_core.documents import Document
|
|
7
|
+
from langchain_core.tools import ToolException
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AlitaJSONLinesLoader(AlitaJSONLoader):
|
|
11
|
+
"""Load local JSONL files (one JSON object per line) using AlitaJSONLoader behavior.
|
|
12
|
+
|
|
13
|
+
Behavior:
|
|
14
|
+
- Supports both `file_path` and `file_content` (bytes or file-like object), same as AlitaJSONLoader.
|
|
15
|
+
- Treats each non-empty line as an independent JSON object.
|
|
16
|
+
- Aggregates all parsed JSON objects into a list and feeds them through the same
|
|
17
|
+
RecursiveJsonSplitter-based chunking used by AlitaJSONLoader.lazy_load.
|
|
18
|
+
- Returns a list of Documents with chunked JSON content.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, **kwargs):
|
|
22
|
+
# Reuse AlitaJSONLoader initialization logic (file_path / file_content handling, encoding, etc.)
|
|
23
|
+
super().__init__(**kwargs)
|
|
24
|
+
|
|
25
|
+
def _iter_lines(self) -> Iterator[str]:
|
|
26
|
+
"""Yield lines from file_path or file_content, mirroring AlitaJSONLoader sources."""
|
|
27
|
+
# Prefer file_path if available
|
|
28
|
+
if hasattr(self, "file_path") and self.file_path:
|
|
29
|
+
with open(self.file_path, "r", encoding=self.encoding) as f:
|
|
30
|
+
for line in f:
|
|
31
|
+
yield line
|
|
32
|
+
# Fallback to file_content if available
|
|
33
|
+
elif hasattr(self, "file_content") and self.file_content:
|
|
34
|
+
# file_content may be bytes or a file-like object
|
|
35
|
+
if isinstance(self.file_content, (bytes, bytearray)):
|
|
36
|
+
text = self.file_content.decode(self.encoding)
|
|
37
|
+
for line in StringIO(text):
|
|
38
|
+
yield line
|
|
39
|
+
else:
|
|
40
|
+
# Assume it's a text file-like object positioned at the beginning
|
|
41
|
+
self.file_content.seek(0)
|
|
42
|
+
for line in self.file_content:
|
|
43
|
+
yield line
|
|
44
|
+
else:
|
|
45
|
+
raise ToolException("'file_path' or 'file_content' parameter should be provided.")
|
|
46
|
+
|
|
47
|
+
def load(self) -> List[Document]: # type: ignore[override]
|
|
48
|
+
"""Load JSONL content by delegating each non-empty line to AlitaJSONLoader.
|
|
49
|
+
|
|
50
|
+
For each non-empty line in the underlying source (file_path or file_content):
|
|
51
|
+
- Create a temporary AlitaJSONLoader instance with that line as file_content.
|
|
52
|
+
- Call lazy_load() on that instance to apply the same RecursiveJsonSplitter logic
|
|
53
|
+
as for a normal JSON file.
|
|
54
|
+
- Accumulate all Documents from all lines and return them as a single list.
|
|
55
|
+
"""
|
|
56
|
+
docs: List[Document] = []
|
|
57
|
+
|
|
58
|
+
for raw_line in self._iter_lines():
|
|
59
|
+
line = raw_line.strip()
|
|
60
|
+
if not line:
|
|
61
|
+
continue
|
|
62
|
+
try:
|
|
63
|
+
# Instantiate a per-line AlitaJSONLoader using the same configuration
|
|
64
|
+
line_loader = AlitaJSONLoader(
|
|
65
|
+
file_content=line,
|
|
66
|
+
file_name=getattr(self, "file_name", str(getattr(self, "file_path", "no_name"))),
|
|
67
|
+
encoding=self.encoding,
|
|
68
|
+
autodetect_encoding=self.autodetect_encoding,
|
|
69
|
+
max_tokens=self.max_tokens,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
for doc in line_loader.lazy_load():
|
|
73
|
+
docs.append(doc)
|
|
74
|
+
except Exception as e:
|
|
75
|
+
raise ToolException(f"Error processing JSONL line: {line[:100]}... Error: {e}") from e
|
|
76
|
+
|
|
77
|
+
return docs
|
|
@@ -32,6 +32,8 @@ class AlitaJSONLoader(BaseLoader):
|
|
|
32
32
|
elif hasattr(self, 'file_content') and self.file_content:
|
|
33
33
|
if isinstance(self.file_content, bytes):
|
|
34
34
|
return json.loads(self.file_content.decode(self.encoding))
|
|
35
|
+
elif isinstance(self.file_content, str):
|
|
36
|
+
return json.loads(self.file_content)
|
|
35
37
|
else:
|
|
36
38
|
return json.load(self.file_content)
|
|
37
39
|
else:
|
|
@@ -45,7 +47,6 @@ class AlitaJSONLoader(BaseLoader):
|
|
|
45
47
|
try:
|
|
46
48
|
with open(self.file_path, encoding=encoding.encoding) as f:
|
|
47
49
|
return f.read()
|
|
48
|
-
break
|
|
49
50
|
except UnicodeDecodeError:
|
|
50
51
|
continue
|
|
51
52
|
elif hasattr(self, 'file_content') and self.file_content:
|
|
@@ -21,12 +21,14 @@ from .AlitaDocxMammothLoader import AlitaDocxMammothLoader
|
|
|
21
21
|
from .AlitaExcelLoader import AlitaExcelLoader
|
|
22
22
|
from .AlitaImageLoader import AlitaImageLoader
|
|
23
23
|
from .AlitaJSONLoader import AlitaJSONLoader
|
|
24
|
+
from .AlitaJSONLinesLoader import AlitaJSONLinesLoader
|
|
24
25
|
from .AlitaPDFLoader import AlitaPDFLoader
|
|
25
26
|
from .AlitaPowerPointLoader import AlitaPowerPointLoader
|
|
26
27
|
from .AlitaTextLoader import AlitaTextLoader
|
|
27
28
|
from .AlitaMarkdownLoader import AlitaMarkdownLoader
|
|
28
29
|
from .AlitaPythonLoader import AlitaPythonLoader
|
|
29
30
|
from enum import Enum
|
|
31
|
+
from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
|
|
30
32
|
|
|
31
33
|
|
|
32
34
|
class LoaderProperties(Enum):
|
|
@@ -34,7 +36,7 @@ class LoaderProperties(Enum):
|
|
|
34
36
|
PROMPT_DEFAULT = 'use_default_prompt'
|
|
35
37
|
PROMPT = 'prompt'
|
|
36
38
|
|
|
37
|
-
DEFAULT_ALLOWED_BASE = {'max_tokens':
|
|
39
|
+
DEFAULT_ALLOWED_BASE = {'max_tokens': LOADER_MAX_TOKENS_DEFAULT}
|
|
38
40
|
|
|
39
41
|
DEFAULT_ALLOWED_WITH_LLM = {
|
|
40
42
|
**DEFAULT_ALLOWED_BASE,
|
|
@@ -43,6 +45,8 @@ DEFAULT_ALLOWED_WITH_LLM = {
|
|
|
43
45
|
LoaderProperties.PROMPT.value: "",
|
|
44
46
|
}
|
|
45
47
|
|
|
48
|
+
DEFAULT_ALLOWED_EXCEL = {**DEFAULT_ALLOWED_WITH_LLM, 'add_header_to_chunks': False, 'header_row_number': 1, 'max_tokens': -1, 'sheet_name': ''}
|
|
49
|
+
|
|
46
50
|
# Image file loaders mapping - directly supported by LLM with image_url
|
|
47
51
|
image_loaders_map = {
|
|
48
52
|
'.png': {
|
|
@@ -162,11 +166,12 @@ document_loaders_map = {
|
|
|
162
166
|
'spreadsheetml.sheet'),
|
|
163
167
|
'is_multimodal_processing': False,
|
|
164
168
|
'kwargs': {
|
|
165
|
-
'
|
|
166
|
-
'
|
|
167
|
-
'
|
|
169
|
+
'add_header_to_chunks': False,
|
|
170
|
+
'header_row_number': 1,
|
|
171
|
+
'max_tokens': -1,
|
|
172
|
+
'sheet_name': ''
|
|
168
173
|
},
|
|
169
|
-
'allowed_to_override':
|
|
174
|
+
'allowed_to_override': DEFAULT_ALLOWED_EXCEL
|
|
170
175
|
},
|
|
171
176
|
'.xls': {
|
|
172
177
|
'class': AlitaExcelLoader,
|
|
@@ -177,7 +182,7 @@ document_loaders_map = {
|
|
|
177
182
|
'raw_content': True,
|
|
178
183
|
'cleanse': False
|
|
179
184
|
},
|
|
180
|
-
'allowed_to_override':
|
|
185
|
+
'allowed_to_override': DEFAULT_ALLOWED_EXCEL
|
|
181
186
|
},
|
|
182
187
|
'.pdf': {
|
|
183
188
|
'class': AlitaPDFLoader,
|
|
@@ -204,7 +209,7 @@ document_loaders_map = {
|
|
|
204
209
|
'allowed_to_override': DEFAULT_ALLOWED_BASE
|
|
205
210
|
},
|
|
206
211
|
'.jsonl': {
|
|
207
|
-
'class':
|
|
212
|
+
'class': AlitaJSONLinesLoader,
|
|
208
213
|
'mime_type': 'application/jsonl',
|
|
209
214
|
'is_multimodal_processing': False,
|
|
210
215
|
'kwargs': {},
|
|
@@ -30,7 +30,7 @@ from ..tools.loop import LoopNode
|
|
|
30
30
|
from ..tools.loop_output import LoopToolNode
|
|
31
31
|
from ..tools.tool import ToolNode
|
|
32
32
|
from ..utils.evaluate import EvaluateTemplate
|
|
33
|
-
from ..utils.utils import clean_string
|
|
33
|
+
from ..utils.utils import clean_string
|
|
34
34
|
from ..tools.router import RouterNode
|
|
35
35
|
|
|
36
36
|
logger = logging.getLogger(__name__)
|
|
@@ -191,7 +191,7 @@ Answer only with step name, no need to add descrip in case none of the steps are
|
|
|
191
191
|
additional_info = """### Additoinal info: """
|
|
192
192
|
additional_info += "{field}: {value}\n".format(field=field, value=state.get(field, ""))
|
|
193
193
|
decision_input.append(HumanMessage(
|
|
194
|
-
self.prompt.format(steps=self.steps, description=self.description, additional_info=additional_info)))
|
|
194
|
+
self.prompt.format(steps=self.steps, description=safe_format(self.description, state), additional_info=additional_info)))
|
|
195
195
|
completion = self.client.invoke(decision_input)
|
|
196
196
|
result = clean_string(completion.content.strip())
|
|
197
197
|
logger.info(f"Plan to transition to: {result}")
|
|
@@ -483,8 +483,7 @@ def create_graph(
|
|
|
483
483
|
node_id = clean_string(node['id'])
|
|
484
484
|
toolkit_name = node.get('toolkit_name')
|
|
485
485
|
tool_name = clean_string(node.get('tool', node_id))
|
|
486
|
-
|
|
487
|
-
tool_name = f"{clean_string(toolkit_name)}{TOOLKIT_SPLITTER}{tool_name}"
|
|
486
|
+
# Tool names are now clean (no prefix needed)
|
|
488
487
|
logger.info(f"Node: {node_id} : {node_type} - {tool_name}")
|
|
489
488
|
if node_type in ['function', 'toolkit', 'mcp', 'tool', 'loop', 'loop_from_tool', 'indexer', 'subgraph', 'pipeline', 'agent']:
|
|
490
489
|
if node_type == 'mcp' and tool_name not in [tool.name for tool in tools]:
|
|
@@ -550,8 +549,8 @@ def create_graph(
|
|
|
550
549
|
loop_toolkit_name = node.get('loop_toolkit_name')
|
|
551
550
|
loop_tool_name = node.get('loop_tool')
|
|
552
551
|
if (loop_toolkit_name and loop_tool_name) or loop_tool_name:
|
|
553
|
-
|
|
554
|
-
|
|
552
|
+
# Use clean tool name (no prefix)
|
|
553
|
+
loop_tool_name = clean_string(loop_tool_name)
|
|
555
554
|
for t in tools:
|
|
556
555
|
if t.name == loop_tool_name:
|
|
557
556
|
logger.debug(f"Loop tool discovered: {t}")
|
|
@@ -609,10 +608,10 @@ def create_graph(
|
|
|
609
608
|
tool_names = []
|
|
610
609
|
if isinstance(connected_tools, dict):
|
|
611
610
|
for toolkit, selected_tools in connected_tools.items():
|
|
612
|
-
|
|
613
|
-
|
|
611
|
+
# Add tool names directly (no prefix)
|
|
612
|
+
tool_names.extend(selected_tools)
|
|
614
613
|
elif isinstance(connected_tools, list):
|
|
615
|
-
#
|
|
614
|
+
# Use provided tool names as-is
|
|
616
615
|
tool_names = connected_tools
|
|
617
616
|
|
|
618
617
|
if tool_names:
|
|
@@ -635,6 +634,7 @@ def create_graph(
|
|
|
635
634
|
output_variables=output_vars,
|
|
636
635
|
input_variables=node.get('input', ['messages']),
|
|
637
636
|
structured_output=node.get('structured_output', False),
|
|
637
|
+
tool_execution_timeout=node.get('tool_execution_timeout', 900),
|
|
638
638
|
available_tools=available_tools,
|
|
639
639
|
tool_names=tool_names,
|
|
640
640
|
steps_limit=kwargs.get('steps_limit', 25)
|
|
@@ -1010,7 +1010,7 @@ class LangGraphAgentRunnable(CompiledStateGraph):
|
|
|
1010
1010
|
thread_id: str,
|
|
1011
1011
|
current_recursion_limit: int,
|
|
1012
1012
|
) -> dict:
|
|
1013
|
-
"""Handle GraphRecursionError by returning a soft
|
|
1013
|
+
"""Handle GraphRecursionError by returning a soft-boundary response."""
|
|
1014
1014
|
config_state = self.get_state(config)
|
|
1015
1015
|
is_execution_finished = False
|
|
1016
1016
|
|
|
@@ -208,7 +208,12 @@ def safe_format(template, mapping):
|
|
|
208
208
|
def create_pydantic_model(model_name: str, variables: dict[str, dict]):
|
|
209
209
|
fields = {}
|
|
210
210
|
for var_name, var_data in variables.items():
|
|
211
|
-
|
|
211
|
+
if 'default' in var_data:
|
|
212
|
+
# allow user to define if it is required or not
|
|
213
|
+
fields[var_name] = (parse_pydantic_type(var_data['type']),
|
|
214
|
+
Field(description=var_data.get('description', None), default=var_data.get('default')))
|
|
215
|
+
else:
|
|
216
|
+
fields[var_name] = (parse_pydantic_type(var_data['type']), Field(description=var_data.get('description', None)))
|
|
212
217
|
return create_model(model_name, **fields)
|
|
213
218
|
|
|
214
219
|
def parse_pydantic_type(type_name: str):
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import List, Any, Literal, Optional
|
|
2
2
|
|
|
3
|
-
from alita_sdk.tools.utils import clean_string,
|
|
3
|
+
from alita_sdk.tools.utils import clean_string, get_max_toolkit_length
|
|
4
|
+
from alita_sdk.tools.elitea_base import filter_missconfigured_index_tools
|
|
4
5
|
from langchain_community.agent_toolkits.base import BaseToolkit
|
|
5
6
|
from langchain_core.tools import BaseTool
|
|
6
7
|
from pydantic import create_model, BaseModel, ConfigDict, Field
|
|
@@ -40,26 +41,34 @@ class ArtifactToolkit(BaseToolkit):
|
|
|
40
41
|
)
|
|
41
42
|
|
|
42
43
|
@classmethod
|
|
44
|
+
@filter_missconfigured_index_tools
|
|
43
45
|
def get_toolkit(cls, client: Any, bucket: str, toolkit_name: Optional[str] = None, selected_tools: list[str] = [], **kwargs):
|
|
44
46
|
if selected_tools is None:
|
|
45
47
|
selected_tools = []
|
|
48
|
+
|
|
46
49
|
tools = []
|
|
47
50
|
wrapper_payload = {
|
|
48
51
|
**kwargs,
|
|
49
52
|
**(kwargs.get('pgvector_configuration') or {}),
|
|
50
53
|
}
|
|
51
54
|
artifact_wrapper = ArtifactWrapper(alita=client, bucket=bucket, **wrapper_payload)
|
|
52
|
-
|
|
55
|
+
# Use clean toolkit name for context (max 1000 chars in description)
|
|
56
|
+
toolkit_context = f" [Toolkit: {clean_string(toolkit_name, 0)}]" if toolkit_name else ''
|
|
53
57
|
available_tools = artifact_wrapper.get_available_tools()
|
|
54
58
|
for tool in available_tools:
|
|
55
59
|
if selected_tools:
|
|
56
60
|
if tool["name"] not in selected_tools:
|
|
57
61
|
continue
|
|
62
|
+
# Add toolkit context to description with character limit
|
|
63
|
+
description = tool["description"]
|
|
64
|
+
if toolkit_context and len(description + toolkit_context) <= 1000:
|
|
65
|
+
description = description + toolkit_context
|
|
58
66
|
tools.append(BaseAction(
|
|
59
67
|
api_wrapper=artifact_wrapper,
|
|
60
|
-
name=
|
|
61
|
-
description=
|
|
62
|
-
args_schema=tool["args_schema"]
|
|
68
|
+
name=tool["name"],
|
|
69
|
+
description=description,
|
|
70
|
+
args_schema=tool["args_schema"],
|
|
71
|
+
metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
|
|
63
72
|
))
|
|
64
73
|
return cls(tools=tools)
|
|
65
74
|
|
|
@@ -3,7 +3,7 @@ from pydantic import create_model, BaseModel, Field
|
|
|
3
3
|
from langchain_community.agent_toolkits.base import BaseToolkit
|
|
4
4
|
from langchain_core.tools import BaseTool, ToolException
|
|
5
5
|
from ..tools.datasource import DatasourcePredict, DatasourceSearch, datasourceToolSchema
|
|
6
|
-
from alita_sdk.tools.utils import clean_string
|
|
6
|
+
from alita_sdk.tools.utils import clean_string
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class DatasourcesToolkit(BaseToolkit):
|
|
@@ -21,21 +21,28 @@ class DatasourcesToolkit(BaseToolkit):
|
|
|
21
21
|
@classmethod
|
|
22
22
|
def get_toolkit(cls, client: Any, datasource_ids: list[int], toolkit_name: Optional[str] = None, selected_tools: list[str] = []):
|
|
23
23
|
tools = []
|
|
24
|
-
|
|
24
|
+
# Use clean toolkit name for context (max 1000 chars in description)
|
|
25
|
+
toolkit_context = f" [Toolkit: {clean_string(toolkit_name)}]" if toolkit_name else ''
|
|
25
26
|
for datasource_id in datasource_ids:
|
|
26
27
|
datasource = client.datasource(datasource_id)
|
|
27
28
|
ds_name = clean_string(datasource.name)
|
|
28
29
|
if len(ds_name) == 0:
|
|
29
30
|
raise ToolException(f'Datasource with id {datasource_id} has incorrect name (i.e. special characters, etc.)')
|
|
30
31
|
if len(selected_tools) == 0 or 'chat' in selected_tools:
|
|
31
|
-
|
|
32
|
-
|
|
32
|
+
description = f'Search and summarize. {datasource.description}'
|
|
33
|
+
if toolkit_context and len(description + toolkit_context) <= 1000:
|
|
34
|
+
description = description + toolkit_context
|
|
35
|
+
tools.append(DatasourcePredict(name=f'chat',
|
|
36
|
+
description=description,
|
|
33
37
|
datasource=datasource,
|
|
34
38
|
args_schema=datasourceToolSchema,
|
|
35
39
|
return_type='str'))
|
|
36
40
|
if len(selected_tools) == 0 or 'search' in selected_tools:
|
|
37
|
-
|
|
38
|
-
|
|
41
|
+
description = f'Search return results. {datasource.description}'
|
|
42
|
+
if toolkit_context and len(description + toolkit_context) <= 1000:
|
|
43
|
+
description = description + toolkit_context
|
|
44
|
+
tools.append(DatasourceSearch(name=f'search',
|
|
45
|
+
description=description,
|
|
39
46
|
datasource=datasource,
|
|
40
47
|
args_schema=datasourceToolSchema,
|
|
41
48
|
return_type='str'))
|