alita-sdk 0.3.347__py3-none-any.whl → 0.3.348__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/runtime/clients/artifact.py +11 -1
- alita_sdk/runtime/clients/client.py +2 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +117 -31
- alita_sdk/runtime/tools/vectorstore.py +1 -1
- alita_sdk/runtime/tools/vectorstore_base.py +1 -1
- {alita_sdk-0.3.347.dist-info → alita_sdk-0.3.348.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.347.dist-info → alita_sdk-0.3.348.dist-info}/RECORD +10 -10
- {alita_sdk-0.3.347.dist-info → alita_sdk-0.3.348.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.347.dist-info → alita_sdk-0.3.348.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.347.dist-info → alita_sdk-0.3.348.dist-info}/top_level.txt +0 -0
|
@@ -42,7 +42,17 @@ class Artifact:
|
|
|
42
42
|
return f"{data['error']}. {data['content'] if data['content'] else ''}"
|
|
43
43
|
detected = chardet.detect(data)
|
|
44
44
|
if detected['encoding'] is not None:
|
|
45
|
-
|
|
45
|
+
try:
|
|
46
|
+
return data.decode(detected['encoding'])
|
|
47
|
+
except Exception:
|
|
48
|
+
logger.error("Error while default encoding")
|
|
49
|
+
return parse_file_content(file_name=artifact_name,
|
|
50
|
+
file_content=data,
|
|
51
|
+
is_capture_image=is_capture_image,
|
|
52
|
+
page_number=page_number,
|
|
53
|
+
sheet_name=sheet_name,
|
|
54
|
+
excel_by_sheets=excel_by_sheets,
|
|
55
|
+
llm=llm)
|
|
46
56
|
else:
|
|
47
57
|
return parse_file_content(file_name=artifact_name,
|
|
48
58
|
file_content=data,
|
|
@@ -69,6 +69,7 @@ class AlitaClient:
|
|
|
69
69
|
self.configurations_url = f'{self.base_url}{self.api_path}/integrations/integrations/default/{self.project_id}?section=configurations&unsecret=true'
|
|
70
70
|
self.ai_section_url = f'{self.base_url}{self.api_path}/integrations/integrations/default/{self.project_id}?section=ai'
|
|
71
71
|
self.configurations: list = configurations or []
|
|
72
|
+
self.model_timeout = kwargs.get('model_timeout', 120)
|
|
72
73
|
|
|
73
74
|
def get_mcp_toolkits(self):
|
|
74
75
|
if user_id := self._get_real_user_id():
|
|
@@ -184,6 +185,7 @@ class AlitaClient:
|
|
|
184
185
|
model=embedding_model,
|
|
185
186
|
api_key=self.auth_token,
|
|
186
187
|
openai_organization=str(self.project_id),
|
|
188
|
+
request_timeout=self.model_timeout
|
|
187
189
|
)
|
|
188
190
|
|
|
189
191
|
def get_llm(self, model_name: str, model_config: dict) -> ChatOpenAI:
|
|
@@ -12,27 +12,32 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import io
|
|
15
|
+
import os
|
|
15
16
|
from typing import Iterator
|
|
16
17
|
import pandas as pd
|
|
17
18
|
from json import loads
|
|
18
19
|
|
|
19
20
|
from openpyxl import load_workbook
|
|
21
|
+
from xlrd import open_workbook
|
|
20
22
|
from langchain_core.documents import Document
|
|
21
23
|
from .AlitaTableLoader import AlitaTableLoader
|
|
22
|
-
|
|
23
|
-
cell_delimeter = " | "
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
cell_delimiter = " | "
|
|
26
26
|
|
|
27
|
+
class AlitaExcelLoader(AlitaTableLoader):
|
|
27
28
|
excel_by_sheets: bool = False
|
|
28
29
|
sheet_name: str = None
|
|
29
30
|
return_type: str = 'str'
|
|
31
|
+
file_name: str = None
|
|
30
32
|
|
|
31
33
|
def __init__(self, **kwargs):
|
|
32
34
|
if not kwargs.get('file_path'):
|
|
33
35
|
file_content = kwargs.get('file_content')
|
|
34
36
|
if file_content:
|
|
37
|
+
self.file_name = kwargs.get('file_name')
|
|
35
38
|
kwargs['file_path'] = io.BytesIO(file_content)
|
|
39
|
+
else:
|
|
40
|
+
self.file_name = kwargs.get('file_path')
|
|
36
41
|
super().__init__(**kwargs)
|
|
37
42
|
self.excel_by_sheets = kwargs.get('excel_by_sheets')
|
|
38
43
|
self.return_type = kwargs.get('return_type')
|
|
@@ -40,36 +45,82 @@ class AlitaExcelLoader(AlitaTableLoader):
|
|
|
40
45
|
|
|
41
46
|
def get_content(self):
|
|
42
47
|
try:
|
|
43
|
-
#
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
if
|
|
47
|
-
#
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
|
|
53
|
-
elif self.excel_by_sheets:
|
|
54
|
-
# Parse each sheet individually and return as a dictionary
|
|
55
|
-
result = {}
|
|
56
|
-
for sheet_name in workbook.sheetnames:
|
|
57
|
-
sheet_content = self.parse_sheet(workbook[sheet_name])
|
|
58
|
-
result[sheet_name] = sheet_content
|
|
59
|
-
return result
|
|
48
|
+
# Determine file extension
|
|
49
|
+
file_extension = os.path.splitext(self.file_name)[-1].lower()
|
|
50
|
+
|
|
51
|
+
if file_extension == '.xlsx':
|
|
52
|
+
# Use openpyxl for .xlsx files
|
|
53
|
+
return self._read_xlsx()
|
|
54
|
+
elif file_extension == '.xls':
|
|
55
|
+
# Use xlrd for .xls files
|
|
56
|
+
return self._read_xls()
|
|
60
57
|
else:
|
|
61
|
-
|
|
62
|
-
result = []
|
|
63
|
-
for sheet_name in workbook.sheetnames:
|
|
64
|
-
sheet_content = self.parse_sheet(workbook[sheet_name])
|
|
65
|
-
result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
|
|
66
|
-
return "\n\n".join(result)
|
|
58
|
+
raise ValueError(f"Unsupported file format: {file_extension}")
|
|
67
59
|
except Exception as e:
|
|
68
60
|
return f"Error reading Excel file: {e}"
|
|
69
61
|
|
|
62
|
+
def _read_xlsx(self):
|
|
63
|
+
"""
|
|
64
|
+
Reads .xlsx files using openpyxl.
|
|
65
|
+
"""
|
|
66
|
+
workbook = load_workbook(self.file_path, data_only=True) # `data_only=True` ensures we get cell values, not formulas
|
|
67
|
+
|
|
68
|
+
if self.sheet_name:
|
|
69
|
+
# If a specific sheet name is provided, parse only that sheet
|
|
70
|
+
if self.sheet_name in workbook.sheetnames:
|
|
71
|
+
sheet_content = self.parse_sheet(workbook[self.sheet_name])
|
|
72
|
+
return sheet_content
|
|
73
|
+
else:
|
|
74
|
+
raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
|
|
75
|
+
elif self.excel_by_sheets:
|
|
76
|
+
# Parse each sheet individually and return as a dictionary
|
|
77
|
+
result = {}
|
|
78
|
+
for sheet_name in workbook.sheetnames:
|
|
79
|
+
sheet_content = self.parse_sheet(workbook[sheet_name])
|
|
80
|
+
result[sheet_name] = sheet_content
|
|
81
|
+
return result
|
|
82
|
+
else:
|
|
83
|
+
# Combine all sheets into a single string result
|
|
84
|
+
result = []
|
|
85
|
+
for sheet_name in workbook.sheetnames:
|
|
86
|
+
sheet_content = self.parse_sheet(workbook[sheet_name])
|
|
87
|
+
result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
|
|
88
|
+
return "\n\n".join(result)
|
|
89
|
+
|
|
90
|
+
def _read_xls(self):
|
|
91
|
+
"""
|
|
92
|
+
Reads .xls files using xlrd.
|
|
93
|
+
"""
|
|
94
|
+
workbook = open_workbook(filename=self.file_name, file_contents=self.file_content)
|
|
95
|
+
|
|
96
|
+
if self.sheet_name:
|
|
97
|
+
# If a specific sheet name is provided, parse only that sheet
|
|
98
|
+
if self.sheet_name in workbook.sheet_names():
|
|
99
|
+
sheet = workbook.sheet_by_name(self.sheet_name)
|
|
100
|
+
sheet_content = self.parse_sheet_xls(sheet)
|
|
101
|
+
return sheet_content
|
|
102
|
+
else:
|
|
103
|
+
raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
|
|
104
|
+
elif self.excel_by_sheets:
|
|
105
|
+
# Parse each sheet individually and return as a dictionary
|
|
106
|
+
result = {}
|
|
107
|
+
for sheet_name in workbook.sheet_names():
|
|
108
|
+
sheet = workbook.sheet_by_name(sheet_name)
|
|
109
|
+
sheet_content = self.parse_sheet_xls(sheet)
|
|
110
|
+
result[sheet_name] = sheet_content
|
|
111
|
+
return result
|
|
112
|
+
else:
|
|
113
|
+
# Combine all sheets into a single string result
|
|
114
|
+
result = []
|
|
115
|
+
for sheet_name in workbook.sheet_names():
|
|
116
|
+
sheet = workbook.sheet_by_name(sheet_name)
|
|
117
|
+
sheet_content = self.parse_sheet_xls(sheet)
|
|
118
|
+
result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
|
|
119
|
+
return "\n\n".join(result)
|
|
120
|
+
|
|
70
121
|
def parse_sheet(self, sheet):
|
|
71
122
|
"""
|
|
72
|
-
Parses a single sheet, extracting text and hyperlinks, and formats them.
|
|
123
|
+
Parses a single .xlsx sheet, extracting text and hyperlinks, and formats them.
|
|
73
124
|
"""
|
|
74
125
|
sheet_content = []
|
|
75
126
|
|
|
@@ -85,17 +136,52 @@ class AlitaExcelLoader(AlitaTableLoader):
|
|
|
85
136
|
# If no hyperlink, use the cell value (computed value if formula)
|
|
86
137
|
row_content.append(str(cell.value) if cell.value is not None else "")
|
|
87
138
|
# Join the row content into a single line using `|` as the delimiter
|
|
88
|
-
sheet_content.append(
|
|
139
|
+
sheet_content.append(cell_delimiter.join(row_content))
|
|
140
|
+
|
|
141
|
+
# Format the sheet content based on the return type
|
|
142
|
+
return self._format_sheet_content(sheet_content)
|
|
143
|
+
|
|
144
|
+
def parse_sheet_xls(self, sheet):
|
|
145
|
+
"""
|
|
146
|
+
Parses a single .xls sheet using xlrd, extracting text and hyperlinks, and formats them.
|
|
147
|
+
"""
|
|
148
|
+
sheet_content = []
|
|
149
|
+
|
|
150
|
+
# Extract hyperlink map (if available)
|
|
151
|
+
hyperlink_map = getattr(sheet, 'hyperlink_map', {})
|
|
152
|
+
|
|
153
|
+
for row_idx in range(sheet.nrows):
|
|
154
|
+
row_content = []
|
|
155
|
+
for col_idx in range(sheet.ncols):
|
|
156
|
+
cell = sheet.cell(row_idx, col_idx)
|
|
157
|
+
cell_value = cell.value
|
|
158
|
+
|
|
159
|
+
# Check if the cell has a hyperlink
|
|
160
|
+
cell_address = (row_idx, col_idx)
|
|
161
|
+
if cell_address in hyperlink_map:
|
|
162
|
+
hyperlink = hyperlink_map[cell_address].url_or_path
|
|
163
|
+
if cell_value:
|
|
164
|
+
row_content.append(f"[{cell_value}]({hyperlink})")
|
|
165
|
+
else:
|
|
166
|
+
row_content.append(str(cell_value) if cell_value is not None else "")
|
|
167
|
+
# Join the row content into a single line using `|` as the delimiter
|
|
168
|
+
sheet_content.append(cell_delimiter.join(row_content))
|
|
89
169
|
|
|
90
170
|
# Format the sheet content based on the return type
|
|
171
|
+
return self._format_sheet_content(sheet_content)
|
|
172
|
+
|
|
173
|
+
def _format_sheet_content(self, sheet_content):
|
|
174
|
+
"""
|
|
175
|
+
Formats the sheet content based on the return type.
|
|
176
|
+
"""
|
|
91
177
|
if self.return_type == 'dict':
|
|
92
178
|
# Convert to a list of dictionaries (each row is a dictionary)
|
|
93
|
-
headers = sheet_content[0].split(
|
|
179
|
+
headers = sheet_content[0].split(cell_delimiter) if sheet_content else []
|
|
94
180
|
data_rows = sheet_content[1:] if len(sheet_content) > 1 else []
|
|
95
|
-
return [dict(zip(headers, row.split(
|
|
181
|
+
return [dict(zip(headers, row.split(cell_delimiter))) for row in data_rows]
|
|
96
182
|
elif self.return_type == 'csv':
|
|
97
183
|
# Return as CSV (newline-separated rows, comma-separated values)
|
|
98
|
-
return "\n".join([",".join(row.split(
|
|
184
|
+
return "\n".join([",".join(row.split(cell_delimiter)) for row in sheet_content])
|
|
99
185
|
else:
|
|
100
186
|
# Default: Return as plain text (newline-separated rows, pipe-separated values)
|
|
101
187
|
return "\n".join(sheet_content)
|
|
@@ -137,7 +137,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
137
137
|
embedding_model_params: dict
|
|
138
138
|
vectorstore_type: str
|
|
139
139
|
vectorstore_params: dict
|
|
140
|
-
max_docs_per_add: int =
|
|
140
|
+
max_docs_per_add: int = 20
|
|
141
141
|
dataset: str = None
|
|
142
142
|
embedding: Any = None
|
|
143
143
|
vectorstore: Any = None
|
|
@@ -135,7 +135,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
135
135
|
embedding_model: Optional[str] = None
|
|
136
136
|
vectorstore_type: Optional[str] = None
|
|
137
137
|
vectorstore_params: Optional[dict] = None
|
|
138
|
-
max_docs_per_add: int =
|
|
138
|
+
max_docs_per_add: int = 20
|
|
139
139
|
dataset: Optional[str] = None
|
|
140
140
|
vectorstore: Any = None
|
|
141
141
|
pg_helper: Any = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: alita_sdk
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.348
|
|
4
4
|
Summary: SDK for building langchain agents using resources from Alita
|
|
5
5
|
Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -35,8 +35,8 @@ alita_sdk/configurations/zephyr_enterprise.py,sha256=UaBk3qWcT2-bCzko5HEPvgxArw1
|
|
|
35
35
|
alita_sdk/configurations/zephyr_essential.py,sha256=tUIrh-PRNvdrLBj6rJXqlF-h6oaMXUQI1wgit07kFBw,752
|
|
36
36
|
alita_sdk/runtime/__init__.py,sha256=4W0UF-nl3QF2bvET5lnah4o24CoTwSoKXhuN0YnwvEE,828
|
|
37
37
|
alita_sdk/runtime/clients/__init__.py,sha256=BdehU5GBztN1Qi1Wul0cqlU46FxUfMnI6Vq2Zd_oq1M,296
|
|
38
|
-
alita_sdk/runtime/clients/artifact.py,sha256=
|
|
39
|
-
alita_sdk/runtime/clients/client.py,sha256=
|
|
38
|
+
alita_sdk/runtime/clients/artifact.py,sha256=Tt3aWcxu20bVW6EX7s_iX5CTmcItKhUnkk8Q2gv2vw0,4036
|
|
39
|
+
alita_sdk/runtime/clients/client.py,sha256=T3hmVnT63iLWEGeuJb8k8Httw-sSWUpy6rsrumD0P0w,43699
|
|
40
40
|
alita_sdk/runtime/clients/datasource.py,sha256=HAZovoQN9jBg0_-lIlGBQzb4FJdczPhkHehAiVG3Wx0,1020
|
|
41
41
|
alita_sdk/runtime/clients/prompt.py,sha256=li1RG9eBwgNK_Qf0qUaZ8QNTmsncFrAL2pv3kbxZRZg,1447
|
|
42
42
|
alita_sdk/runtime/langchain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -56,7 +56,7 @@ alita_sdk/runtime/langchain/document_loaders/AlitaCSVLoader.py,sha256=3ne-a5qIkB
|
|
|
56
56
|
alita_sdk/runtime/langchain/document_loaders/AlitaConfluenceLoader.py,sha256=NzpoL4C7UzyzLouTSL_xTQw70MitNt-WZz3Eyl7QkTA,8294
|
|
57
57
|
alita_sdk/runtime/langchain/document_loaders/AlitaDirectoryLoader.py,sha256=fKezkgvIcLG7S2PVJp1a8sZd6C4XQKNZKAFC87DbQts,7003
|
|
58
58
|
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py,sha256=9hi5eHgDIfa9wBWqTuwMM6D6W64czrDTfZl_htooe8Y,5943
|
|
59
|
-
alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py,sha256=
|
|
59
|
+
alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py,sha256=h8x1Xma_IBM4NdGXVVuvHHSlFQgY0S7Xjj8oGZhdFL8,9256
|
|
60
60
|
alita_sdk/runtime/langchain/document_loaders/AlitaGitRepoLoader.py,sha256=5WXGcyHraSVj3ANHj_U6X4EDikoekrIYtS0Q_QqNIng,2608
|
|
61
61
|
alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py,sha256=QwgBJE-BvOasjgT1hYHZc0MP0F_elirUjSzKixoM6fY,6610
|
|
62
62
|
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py,sha256=Nav2cgCQKOHQi_ZgYYn_iFdP_Os56KVlVR5nHGXecBc,3445
|
|
@@ -121,8 +121,8 @@ alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9
|
|
|
121
121
|
alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
|
|
122
122
|
alita_sdk/runtime/tools/sandbox.py,sha256=WNz-aUMtkGCPg84dDy_0BPkyp-6YjoYB-xjIEFFrtKw,11601
|
|
123
123
|
alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
|
|
124
|
-
alita_sdk/runtime/tools/vectorstore.py,sha256=
|
|
125
|
-
alita_sdk/runtime/tools/vectorstore_base.py,sha256=
|
|
124
|
+
alita_sdk/runtime/tools/vectorstore.py,sha256=8vRhi1lGFEs3unvnflEi2p59U2MfV32lStpEizpDms0,34467
|
|
125
|
+
alita_sdk/runtime/tools/vectorstore_base.py,sha256=7ZkbegFG0XTQBYGsJjtrkK-zrqKwketfx8vSJzuPCug,27292
|
|
126
126
|
alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
|
|
127
127
|
alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
128
128
|
alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
|
|
@@ -350,8 +350,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
|
|
|
350
350
|
alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
|
|
351
351
|
alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
|
|
352
352
|
alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
|
|
353
|
-
alita_sdk-0.3.
|
|
354
|
-
alita_sdk-0.3.
|
|
355
|
-
alita_sdk-0.3.
|
|
356
|
-
alita_sdk-0.3.
|
|
357
|
-
alita_sdk-0.3.
|
|
353
|
+
alita_sdk-0.3.348.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
354
|
+
alita_sdk-0.3.348.dist-info/METADATA,sha256=_oiAJpxGjG23s01B-P41PkJqG1oUgdAgS7QUnlyS5gc,19015
|
|
355
|
+
alita_sdk-0.3.348.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
356
|
+
alita_sdk-0.3.348.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
|
|
357
|
+
alita_sdk-0.3.348.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|