alita-sdk 0.3.347__py3-none-any.whl → 0.3.348__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

@@ -42,7 +42,17 @@ class Artifact:
42
42
  return f"{data['error']}. {data['content'] if data['content'] else ''}"
43
43
  detected = chardet.detect(data)
44
44
  if detected['encoding'] is not None:
45
- return data.decode(detected['encoding'])
45
+ try:
46
+ return data.decode(detected['encoding'])
47
+ except Exception:
48
+ logger.error("Error while default encoding")
49
+ return parse_file_content(file_name=artifact_name,
50
+ file_content=data,
51
+ is_capture_image=is_capture_image,
52
+ page_number=page_number,
53
+ sheet_name=sheet_name,
54
+ excel_by_sheets=excel_by_sheets,
55
+ llm=llm)
46
56
  else:
47
57
  return parse_file_content(file_name=artifact_name,
48
58
  file_content=data,
@@ -69,6 +69,7 @@ class AlitaClient:
69
69
  self.configurations_url = f'{self.base_url}{self.api_path}/integrations/integrations/default/{self.project_id}?section=configurations&unsecret=true'
70
70
  self.ai_section_url = f'{self.base_url}{self.api_path}/integrations/integrations/default/{self.project_id}?section=ai'
71
71
  self.configurations: list = configurations or []
72
+ self.model_timeout = kwargs.get('model_timeout', 120)
72
73
 
73
74
  def get_mcp_toolkits(self):
74
75
  if user_id := self._get_real_user_id():
@@ -184,6 +185,7 @@ class AlitaClient:
184
185
  model=embedding_model,
185
186
  api_key=self.auth_token,
186
187
  openai_organization=str(self.project_id),
188
+ request_timeout=self.model_timeout
187
189
  )
188
190
 
189
191
  def get_llm(self, model_name: str, model_config: dict) -> ChatOpenAI:
@@ -12,27 +12,32 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import io
15
+ import os
15
16
  from typing import Iterator
16
17
  import pandas as pd
17
18
  from json import loads
18
19
 
19
20
  from openpyxl import load_workbook
21
+ from xlrd import open_workbook
20
22
  from langchain_core.documents import Document
21
23
  from .AlitaTableLoader import AlitaTableLoader
22
-
23
- cell_delimeter = " | "
24
24
 
25
- class AlitaExcelLoader(AlitaTableLoader):
25
+ cell_delimiter = " | "
26
26
 
27
+ class AlitaExcelLoader(AlitaTableLoader):
27
28
  excel_by_sheets: bool = False
28
29
  sheet_name: str = None
29
30
  return_type: str = 'str'
31
+ file_name: str = None
30
32
 
31
33
  def __init__(self, **kwargs):
32
34
  if not kwargs.get('file_path'):
33
35
  file_content = kwargs.get('file_content')
34
36
  if file_content:
37
+ self.file_name = kwargs.get('file_name')
35
38
  kwargs['file_path'] = io.BytesIO(file_content)
39
+ else:
40
+ self.file_name = kwargs.get('file_path')
36
41
  super().__init__(**kwargs)
37
42
  self.excel_by_sheets = kwargs.get('excel_by_sheets')
38
43
  self.return_type = kwargs.get('return_type')
@@ -40,36 +45,82 @@ class AlitaExcelLoader(AlitaTableLoader):
40
45
 
41
46
  def get_content(self):
42
47
  try:
43
- # Load the workbook
44
- workbook = load_workbook(self.file_path, data_only=True) # `data_only=True` ensures we get cell values, not formulas
45
-
46
- if self.sheet_name:
47
- # If a specific sheet name is provided, parse only that sheet
48
- if self.sheet_name in workbook.sheetnames:
49
- sheet_content = self.parse_sheet(workbook[self.sheet_name])
50
- return sheet_content
51
- else:
52
- raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
53
- elif self.excel_by_sheets:
54
- # Parse each sheet individually and return as a dictionary
55
- result = {}
56
- for sheet_name in workbook.sheetnames:
57
- sheet_content = self.parse_sheet(workbook[sheet_name])
58
- result[sheet_name] = sheet_content
59
- return result
48
+ # Determine file extension
49
+ file_extension = os.path.splitext(self.file_name)[-1].lower()
50
+
51
+ if file_extension == '.xlsx':
52
+ # Use openpyxl for .xlsx files
53
+ return self._read_xlsx()
54
+ elif file_extension == '.xls':
55
+ # Use xlrd for .xls files
56
+ return self._read_xls()
60
57
  else:
61
- # Combine all sheets into a single string result
62
- result = []
63
- for sheet_name in workbook.sheetnames:
64
- sheet_content = self.parse_sheet(workbook[sheet_name])
65
- result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
66
- return "\n\n".join(result)
58
+ raise ValueError(f"Unsupported file format: {file_extension}")
67
59
  except Exception as e:
68
60
  return f"Error reading Excel file: {e}"
69
61
 
62
+ def _read_xlsx(self):
63
+ """
64
+ Reads .xlsx files using openpyxl.
65
+ """
66
+ workbook = load_workbook(self.file_path, data_only=True) # `data_only=True` ensures we get cell values, not formulas
67
+
68
+ if self.sheet_name:
69
+ # If a specific sheet name is provided, parse only that sheet
70
+ if self.sheet_name in workbook.sheetnames:
71
+ sheet_content = self.parse_sheet(workbook[self.sheet_name])
72
+ return sheet_content
73
+ else:
74
+ raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
75
+ elif self.excel_by_sheets:
76
+ # Parse each sheet individually and return as a dictionary
77
+ result = {}
78
+ for sheet_name in workbook.sheetnames:
79
+ sheet_content = self.parse_sheet(workbook[sheet_name])
80
+ result[sheet_name] = sheet_content
81
+ return result
82
+ else:
83
+ # Combine all sheets into a single string result
84
+ result = []
85
+ for sheet_name in workbook.sheetnames:
86
+ sheet_content = self.parse_sheet(workbook[sheet_name])
87
+ result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
88
+ return "\n\n".join(result)
89
+
90
+ def _read_xls(self):
91
+ """
92
+ Reads .xls files using xlrd.
93
+ """
94
+ workbook = open_workbook(filename=self.file_name, file_contents=self.file_content)
95
+
96
+ if self.sheet_name:
97
+ # If a specific sheet name is provided, parse only that sheet
98
+ if self.sheet_name in workbook.sheet_names():
99
+ sheet = workbook.sheet_by_name(self.sheet_name)
100
+ sheet_content = self.parse_sheet_xls(sheet)
101
+ return sheet_content
102
+ else:
103
+ raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
104
+ elif self.excel_by_sheets:
105
+ # Parse each sheet individually and return as a dictionary
106
+ result = {}
107
+ for sheet_name in workbook.sheet_names():
108
+ sheet = workbook.sheet_by_name(sheet_name)
109
+ sheet_content = self.parse_sheet_xls(sheet)
110
+ result[sheet_name] = sheet_content
111
+ return result
112
+ else:
113
+ # Combine all sheets into a single string result
114
+ result = []
115
+ for sheet_name in workbook.sheet_names():
116
+ sheet = workbook.sheet_by_name(sheet_name)
117
+ sheet_content = self.parse_sheet_xls(sheet)
118
+ result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
119
+ return "\n\n".join(result)
120
+
70
121
  def parse_sheet(self, sheet):
71
122
  """
72
- Parses a single sheet, extracting text and hyperlinks, and formats them.
123
+ Parses a single .xlsx sheet, extracting text and hyperlinks, and formats them.
73
124
  """
74
125
  sheet_content = []
75
126
 
@@ -85,17 +136,52 @@ class AlitaExcelLoader(AlitaTableLoader):
85
136
  # If no hyperlink, use the cell value (computed value if formula)
86
137
  row_content.append(str(cell.value) if cell.value is not None else "")
87
138
  # Join the row content into a single line using `|` as the delimiter
88
- sheet_content.append(cell_delimeter.join(row_content))
139
+ sheet_content.append(cell_delimiter.join(row_content))
140
+
141
+ # Format the sheet content based on the return type
142
+ return self._format_sheet_content(sheet_content)
143
+
144
+ def parse_sheet_xls(self, sheet):
145
+ """
146
+ Parses a single .xls sheet using xlrd, extracting text and hyperlinks, and formats them.
147
+ """
148
+ sheet_content = []
149
+
150
+ # Extract hyperlink map (if available)
151
+ hyperlink_map = getattr(sheet, 'hyperlink_map', {})
152
+
153
+ for row_idx in range(sheet.nrows):
154
+ row_content = []
155
+ for col_idx in range(sheet.ncols):
156
+ cell = sheet.cell(row_idx, col_idx)
157
+ cell_value = cell.value
158
+
159
+ # Check if the cell has a hyperlink
160
+ cell_address = (row_idx, col_idx)
161
+ if cell_address in hyperlink_map:
162
+ hyperlink = hyperlink_map[cell_address].url_or_path
163
+ if cell_value:
164
+ row_content.append(f"[{cell_value}]({hyperlink})")
165
+ else:
166
+ row_content.append(str(cell_value) if cell_value is not None else "")
167
+ # Join the row content into a single line using `|` as the delimiter
168
+ sheet_content.append(cell_delimiter.join(row_content))
89
169
 
90
170
  # Format the sheet content based on the return type
171
+ return self._format_sheet_content(sheet_content)
172
+
173
+ def _format_sheet_content(self, sheet_content):
174
+ """
175
+ Formats the sheet content based on the return type.
176
+ """
91
177
  if self.return_type == 'dict':
92
178
  # Convert to a list of dictionaries (each row is a dictionary)
93
- headers = sheet_content[0].split(cell_delimeter) if sheet_content else []
179
+ headers = sheet_content[0].split(cell_delimiter) if sheet_content else []
94
180
  data_rows = sheet_content[1:] if len(sheet_content) > 1 else []
95
- return [dict(zip(headers, row.split(cell_delimeter))) for row in data_rows]
181
+ return [dict(zip(headers, row.split(cell_delimiter))) for row in data_rows]
96
182
  elif self.return_type == 'csv':
97
183
  # Return as CSV (newline-separated rows, comma-separated values)
98
- return "\n".join([",".join(row.split(cell_delimeter)) for row in sheet_content])
184
+ return "\n".join([",".join(row.split(cell_delimiter)) for row in sheet_content])
99
185
  else:
100
186
  # Default: Return as plain text (newline-separated rows, pipe-separated values)
101
187
  return "\n".join(sheet_content)
@@ -137,7 +137,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
137
137
  embedding_model_params: dict
138
138
  vectorstore_type: str
139
139
  vectorstore_params: dict
140
- max_docs_per_add: int = 100
140
+ max_docs_per_add: int = 20
141
141
  dataset: str = None
142
142
  embedding: Any = None
143
143
  vectorstore: Any = None
@@ -135,7 +135,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
135
135
  embedding_model: Optional[str] = None
136
136
  vectorstore_type: Optional[str] = None
137
137
  vectorstore_params: Optional[dict] = None
138
- max_docs_per_add: int = 100
138
+ max_docs_per_add: int = 20
139
139
  dataset: Optional[str] = None
140
140
  vectorstore: Any = None
141
141
  pg_helper: Any = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.347
3
+ Version: 0.3.348
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -35,8 +35,8 @@ alita_sdk/configurations/zephyr_enterprise.py,sha256=UaBk3qWcT2-bCzko5HEPvgxArw1
35
35
  alita_sdk/configurations/zephyr_essential.py,sha256=tUIrh-PRNvdrLBj6rJXqlF-h6oaMXUQI1wgit07kFBw,752
36
36
  alita_sdk/runtime/__init__.py,sha256=4W0UF-nl3QF2bvET5lnah4o24CoTwSoKXhuN0YnwvEE,828
37
37
  alita_sdk/runtime/clients/__init__.py,sha256=BdehU5GBztN1Qi1Wul0cqlU46FxUfMnI6Vq2Zd_oq1M,296
38
- alita_sdk/runtime/clients/artifact.py,sha256=TPvROw1qu4IyUEGuf7x40IKRpb5eFZpYGN3-8LfQE0M,3461
39
- alita_sdk/runtime/clients/client.py,sha256=ZOWsv-JJl54lzQ4JzYFBKslt4DI0ExNZ3zQ_U7zA3uE,43590
38
+ alita_sdk/runtime/clients/artifact.py,sha256=Tt3aWcxu20bVW6EX7s_iX5CTmcItKhUnkk8Q2gv2vw0,4036
39
+ alita_sdk/runtime/clients/client.py,sha256=T3hmVnT63iLWEGeuJb8k8Httw-sSWUpy6rsrumD0P0w,43699
40
40
  alita_sdk/runtime/clients/datasource.py,sha256=HAZovoQN9jBg0_-lIlGBQzb4FJdczPhkHehAiVG3Wx0,1020
41
41
  alita_sdk/runtime/clients/prompt.py,sha256=li1RG9eBwgNK_Qf0qUaZ8QNTmsncFrAL2pv3kbxZRZg,1447
42
42
  alita_sdk/runtime/langchain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -56,7 +56,7 @@ alita_sdk/runtime/langchain/document_loaders/AlitaCSVLoader.py,sha256=3ne-a5qIkB
56
56
  alita_sdk/runtime/langchain/document_loaders/AlitaConfluenceLoader.py,sha256=NzpoL4C7UzyzLouTSL_xTQw70MitNt-WZz3Eyl7QkTA,8294
57
57
  alita_sdk/runtime/langchain/document_loaders/AlitaDirectoryLoader.py,sha256=fKezkgvIcLG7S2PVJp1a8sZd6C4XQKNZKAFC87DbQts,7003
58
58
  alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py,sha256=9hi5eHgDIfa9wBWqTuwMM6D6W64czrDTfZl_htooe8Y,5943
59
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py,sha256=P17csHx94JkXiyo1a2V-CrfP2E5XCG4uZC31ulZ_Ab4,5817
59
+ alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py,sha256=h8x1Xma_IBM4NdGXVVuvHHSlFQgY0S7Xjj8oGZhdFL8,9256
60
60
  alita_sdk/runtime/langchain/document_loaders/AlitaGitRepoLoader.py,sha256=5WXGcyHraSVj3ANHj_U6X4EDikoekrIYtS0Q_QqNIng,2608
61
61
  alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py,sha256=QwgBJE-BvOasjgT1hYHZc0MP0F_elirUjSzKixoM6fY,6610
62
62
  alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py,sha256=Nav2cgCQKOHQi_ZgYYn_iFdP_Os56KVlVR5nHGXecBc,3445
@@ -121,8 +121,8 @@ alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9
121
121
  alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
122
122
  alita_sdk/runtime/tools/sandbox.py,sha256=WNz-aUMtkGCPg84dDy_0BPkyp-6YjoYB-xjIEFFrtKw,11601
123
123
  alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
124
- alita_sdk/runtime/tools/vectorstore.py,sha256=UFBAJ_N2F6uB0xxIy1VMx581tHco-xDl7v2Hl6u0Xzw,34468
125
- alita_sdk/runtime/tools/vectorstore_base.py,sha256=F2EFwq5LFwCpV6U9D5Jq1dxYrV3lxOErLfgWTXqEVRI,27293
124
+ alita_sdk/runtime/tools/vectorstore.py,sha256=8vRhi1lGFEs3unvnflEi2p59U2MfV32lStpEizpDms0,34467
125
+ alita_sdk/runtime/tools/vectorstore_base.py,sha256=7ZkbegFG0XTQBYGsJjtrkK-zrqKwketfx8vSJzuPCug,27292
126
126
  alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
127
127
  alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
128
128
  alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -350,8 +350,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
350
350
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
351
351
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
352
352
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
353
- alita_sdk-0.3.347.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
354
- alita_sdk-0.3.347.dist-info/METADATA,sha256=ZvJklicNWTOf3Q1MrNdtVdLhUEnd4oVDokj_y4J_Ecg,19015
355
- alita_sdk-0.3.347.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
356
- alita_sdk-0.3.347.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
357
- alita_sdk-0.3.347.dist-info/RECORD,,
353
+ alita_sdk-0.3.348.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
354
+ alita_sdk-0.3.348.dist-info/METADATA,sha256=_oiAJpxGjG23s01B-P41PkJqG1oUgdAgS7QUnlyS5gc,19015
355
+ alita_sdk-0.3.348.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
356
+ alita_sdk-0.3.348.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
357
+ alita_sdk-0.3.348.dist-info/RECORD,,