PyPI - alita-sdk - Versions diffs - 0.3.347__py3-none-any.whl → 0.3.348__py3-none-any.whl - Mend

alita-sdk 0.3.347py3-none-any.whl → 0.3.348py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (10) hide show

alita_sdk/runtime/clients/artifact.py CHANGED Viewed

@@ -42,7 +42,17 @@ class Artifact:
             return f"{data['error']}. {data['content'] if data['content'] else ''}"
         detected = chardet.detect(data)
         if detected['encoding'] is not None:
-            return data.decode(detected['encoding'])
+            try:
+                return data.decode(detected['encoding'])
+            except Exception:
+                logger.error("Error while default encoding")
+                return parse_file_content(file_name=artifact_name,
+                                          file_content=data,
+                                          is_capture_image=is_capture_image,
+                                          page_number=page_number,
+                                          sheet_name=sheet_name,
+                                          excel_by_sheets=excel_by_sheets,
+                                          llm=llm)
         else:
             return parse_file_content(file_name=artifact_name,
                                   file_content=data,

alita_sdk/runtime/clients/client.py CHANGED Viewed

@@ -69,6 +69,7 @@ class AlitaClient:
         self.configurations_url = f'{self.base_url}{self.api_path}/integrations/integrations/default/{self.project_id}?section=configurations&unsecret=true'
         self.ai_section_url = f'{self.base_url}{self.api_path}/integrations/integrations/default/{self.project_id}?section=ai'
         self.configurations: list = configurations or []
+        self.model_timeout = kwargs.get('model_timeout', 120)
     def get_mcp_toolkits(self):
         if user_id := self._get_real_user_id():
@@ -184,6 +185,7 @@ class AlitaClient:
             model=embedding_model,
             api_key=self.auth_token,
             openai_organization=str(self.project_id),
+            request_timeout=self.model_timeout
         )
     def get_llm(self, model_name: str, model_config: dict) -> ChatOpenAI:

alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py CHANGED Viewed

@@ -12,27 +12,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import io
+import os
 from typing import Iterator
 import pandas as pd
 from json import loads
 from openpyxl import load_workbook
+from xlrd import open_workbook
 from langchain_core.documents import Document
 from .AlitaTableLoader import AlitaTableLoader
-cell_delimeter = " | "
-class AlitaExcelLoader(AlitaTableLoader):
+cell_delimiter = " | "
+class AlitaExcelLoader(AlitaTableLoader):
     excel_by_sheets: bool = False
     sheet_name: str = None
     return_type: str = 'str'
+    file_name: str = None
     def __init__(self, **kwargs):
         if not kwargs.get('file_path'):
             file_content = kwargs.get('file_content')
             if file_content:
+                self.file_name = kwargs.get('file_name')
                 kwargs['file_path'] = io.BytesIO(file_content)
+        else:
+            self.file_name = kwargs.get('file_path')
         super().__init__(**kwargs)
         self.excel_by_sheets = kwargs.get('excel_by_sheets')
         self.return_type = kwargs.get('return_type')
@@ -40,36 +45,82 @@ class AlitaExcelLoader(AlitaTableLoader):
     def get_content(self):
         try:
-            # Load the workbook
-            workbook = load_workbook(self.file_path, data_only=True)  # `data_only=True` ensures we get cell values, not formulas
-            if self.sheet_name:
-                # If a specific sheet name is provided, parse only that sheet
-                if self.sheet_name in workbook.sheetnames:
-                    sheet_content = self.parse_sheet(workbook[self.sheet_name])
-                    return sheet_content
-                else:
-                    raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
-            elif self.excel_by_sheets:
-                # Parse each sheet individually and return as a dictionary
-                result = {}
-                for sheet_name in workbook.sheetnames:
-                    sheet_content = self.parse_sheet(workbook[sheet_name])
-                    result[sheet_name] = sheet_content
-                return result
+            # Determine file extension
+            file_extension = os.path.splitext(self.file_name)[-1].lower()
+            if file_extension == '.xlsx':
+                # Use openpyxl for .xlsx files
+                return self._read_xlsx()
+            elif file_extension == '.xls':
+                # Use xlrd for .xls files
+                return self._read_xls()
             else:
-                # Combine all sheets into a single string result
-                result = []
-                for sheet_name in workbook.sheetnames:
-                    sheet_content = self.parse_sheet(workbook[sheet_name])
-                    result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
-                return "\n\n".join(result)
+                raise ValueError(f"Unsupported file format: {file_extension}")
         except Exception as e:
             return f"Error reading Excel file: {e}"
+    def _read_xlsx(self):
+        """
+        Reads .xlsx files using openpyxl.
+        """
+        workbook = load_workbook(self.file_path, data_only=True)  # `data_only=True` ensures we get cell values, not formulas
+        if self.sheet_name:
+            # If a specific sheet name is provided, parse only that sheet
+            if self.sheet_name in workbook.sheetnames:
+                sheet_content = self.parse_sheet(workbook[self.sheet_name])
+                return sheet_content
+            else:
+                raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
+        elif self.excel_by_sheets:
+            # Parse each sheet individually and return as a dictionary
+            result = {}
+            for sheet_name in workbook.sheetnames:
+                sheet_content = self.parse_sheet(workbook[sheet_name])
+                result[sheet_name] = sheet_content
+            return result
+        else:
+            # Combine all sheets into a single string result
+            result = []
+            for sheet_name in workbook.sheetnames:
+                sheet_content = self.parse_sheet(workbook[sheet_name])
+                result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
+            return "\n\n".join(result)
+    def _read_xls(self):
+        """
+        Reads .xls files using xlrd.
+        """
+        workbook = open_workbook(filename=self.file_name, file_contents=self.file_content)
+        if self.sheet_name:
+            # If a specific sheet name is provided, parse only that sheet
+            if self.sheet_name in workbook.sheet_names():
+                sheet = workbook.sheet_by_name(self.sheet_name)
+                sheet_content = self.parse_sheet_xls(sheet)
+                return sheet_content
+            else:
+                raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
+        elif self.excel_by_sheets:
+            # Parse each sheet individually and return as a dictionary
+            result = {}
+            for sheet_name in workbook.sheet_names():
+                sheet = workbook.sheet_by_name(sheet_name)
+                sheet_content = self.parse_sheet_xls(sheet)
+                result[sheet_name] = sheet_content
+            return result
+        else:
+            # Combine all sheets into a single string result
+            result = []
+            for sheet_name in workbook.sheet_names():
+                sheet = workbook.sheet_by_name(sheet_name)
+                sheet_content = self.parse_sheet_xls(sheet)
+                result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
+            return "\n\n".join(result)
     def parse_sheet(self, sheet):
         """
-        Parses a single sheet, extracting text and hyperlinks, and formats them.
+        Parses a single .xlsx sheet, extracting text and hyperlinks, and formats them.
         """
         sheet_content = []
@@ -85,17 +136,52 @@ class AlitaExcelLoader(AlitaTableLoader):
                     # If no hyperlink, use the cell value (computed value if formula)
                     row_content.append(str(cell.value) if cell.value is not None else "")
             # Join the row content into a single line using `|` as the delimiter
-            sheet_content.append(cell_delimeter.join(row_content))
+            sheet_content.append(cell_delimiter.join(row_content))
+        # Format the sheet content based on the return type
+        return self._format_sheet_content(sheet_content)
+    def parse_sheet_xls(self, sheet):
+        """
+        Parses a single .xls sheet using xlrd, extracting text and hyperlinks, and formats them.
+        """
+        sheet_content = []
+        # Extract hyperlink map (if available)
+        hyperlink_map = getattr(sheet, 'hyperlink_map', {})
+        for row_idx in range(sheet.nrows):
+            row_content = []
+            for col_idx in range(sheet.ncols):
+                cell = sheet.cell(row_idx, col_idx)
+                cell_value = cell.value
+                # Check if the cell has a hyperlink
+                cell_address = (row_idx, col_idx)
+                if cell_address in hyperlink_map:
+                    hyperlink = hyperlink_map[cell_address].url_or_path
+                    if cell_value:
+                        row_content.append(f"[{cell_value}]({hyperlink})")
+                else:
+                    row_content.append(str(cell_value) if cell_value is not None else "")
+            # Join the row content into a single line using `|` as the delimiter
+            sheet_content.append(cell_delimiter.join(row_content))
         # Format the sheet content based on the return type
+        return self._format_sheet_content(sheet_content)
+    def _format_sheet_content(self, sheet_content):
+        """
+        Formats the sheet content based on the return type.
+        """
         if self.return_type == 'dict':
             # Convert to a list of dictionaries (each row is a dictionary)
-            headers = sheet_content[0].split(cell_delimeter) if sheet_content else []
+            headers = sheet_content[0].split(cell_delimiter) if sheet_content else []
             data_rows = sheet_content[1:] if len(sheet_content) > 1 else []
-            return [dict(zip(headers, row.split(cell_delimeter))) for row in data_rows]
+            return [dict(zip(headers, row.split(cell_delimiter))) for row in data_rows]
         elif self.return_type == 'csv':
             # Return as CSV (newline-separated rows, comma-separated values)
-            return "\n".join([",".join(row.split(cell_delimeter)) for row in sheet_content])
+            return "\n".join([",".join(row.split(cell_delimiter)) for row in sheet_content])
         else:
             # Default: Return as plain text (newline-separated rows, pipe-separated values)
             return "\n".join(sheet_content)

alita_sdk/runtime/tools/vectorstore.py CHANGED Viewed

@@ -137,7 +137,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
     embedding_model_params: dict
     vectorstore_type: str
     vectorstore_params: dict
-    max_docs_per_add: int = 100
+    max_docs_per_add: int = 20
     dataset: str = None
     embedding: Any = None
     vectorstore: Any = None

alita_sdk/runtime/tools/vectorstore_base.py CHANGED Viewed

@@ -135,7 +135,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
     embedding_model: Optional[str] = None
     vectorstore_type: Optional[str]  = None
     vectorstore_params: Optional[dict]  = None
-    max_docs_per_add: int = 100
+    max_docs_per_add: int = 20
     dataset: Optional[str] = None
     vectorstore: Any = None
     pg_helper: Any = None

{alita_sdk-0.3.347.dist-info → alita_sdk-0.3.348.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.347
+Version: 0.3.348
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0

{alita_sdk-0.3.347.dist-info → alita_sdk-0.3.348.dist-info}/RECORD RENAMED Viewed

@@ -35,8 +35,8 @@ alita_sdk/configurations/zephyr_enterprise.py,sha256=UaBk3qWcT2-bCzko5HEPvgxArw1
 alita_sdk/configurations/zephyr_essential.py,sha256=tUIrh-PRNvdrLBj6rJXqlF-h6oaMXUQI1wgit07kFBw,752
 alita_sdk/runtime/__init__.py,sha256=4W0UF-nl3QF2bvET5lnah4o24CoTwSoKXhuN0YnwvEE,828
 alita_sdk/runtime/clients/__init__.py,sha256=BdehU5GBztN1Qi1Wul0cqlU46FxUfMnI6Vq2Zd_oq1M,296
-alita_sdk/runtime/clients/artifact.py,sha256=TPvROw1qu4IyUEGuf7x40IKRpb5eFZpYGN3-8LfQE0M,3461
-alita_sdk/runtime/clients/client.py,sha256=ZOWsv-JJl54lzQ4JzYFBKslt4DI0ExNZ3zQ_U7zA3uE,43590
+alita_sdk/runtime/clients/artifact.py,sha256=Tt3aWcxu20bVW6EX7s_iX5CTmcItKhUnkk8Q2gv2vw0,4036
+alita_sdk/runtime/clients/client.py,sha256=T3hmVnT63iLWEGeuJb8k8Httw-sSWUpy6rsrumD0P0w,43699
 alita_sdk/runtime/clients/datasource.py,sha256=HAZovoQN9jBg0_-lIlGBQzb4FJdczPhkHehAiVG3Wx0,1020
 alita_sdk/runtime/clients/prompt.py,sha256=li1RG9eBwgNK_Qf0qUaZ8QNTmsncFrAL2pv3kbxZRZg,1447
 alita_sdk/runtime/langchain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -56,7 +56,7 @@ alita_sdk/runtime/langchain/document_loaders/AlitaCSVLoader.py,sha256=3ne-a5qIkB
 alita_sdk/runtime/langchain/document_loaders/AlitaConfluenceLoader.py,sha256=NzpoL4C7UzyzLouTSL_xTQw70MitNt-WZz3Eyl7QkTA,8294
 alita_sdk/runtime/langchain/document_loaders/AlitaDirectoryLoader.py,sha256=fKezkgvIcLG7S2PVJp1a8sZd6C4XQKNZKAFC87DbQts,7003
 alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py,sha256=9hi5eHgDIfa9wBWqTuwMM6D6W64czrDTfZl_htooe8Y,5943
-alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py,sha256=P17csHx94JkXiyo1a2V-CrfP2E5XCG4uZC31ulZ_Ab4,5817
+alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py,sha256=h8x1Xma_IBM4NdGXVVuvHHSlFQgY0S7Xjj8oGZhdFL8,9256
 alita_sdk/runtime/langchain/document_loaders/AlitaGitRepoLoader.py,sha256=5WXGcyHraSVj3ANHj_U6X4EDikoekrIYtS0Q_QqNIng,2608
 alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py,sha256=QwgBJE-BvOasjgT1hYHZc0MP0F_elirUjSzKixoM6fY,6610
 alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py,sha256=Nav2cgCQKOHQi_ZgYYn_iFdP_Os56KVlVR5nHGXecBc,3445
@@ -121,8 +121,8 @@ alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9
 alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
 alita_sdk/runtime/tools/sandbox.py,sha256=WNz-aUMtkGCPg84dDy_0BPkyp-6YjoYB-xjIEFFrtKw,11601
 alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
-alita_sdk/runtime/tools/vectorstore.py,sha256=UFBAJ_N2F6uB0xxIy1VMx581tHco-xDl7v2Hl6u0Xzw,34468
-alita_sdk/runtime/tools/vectorstore_base.py,sha256=F2EFwq5LFwCpV6U9D5Jq1dxYrV3lxOErLfgWTXqEVRI,27293
+alita_sdk/runtime/tools/vectorstore.py,sha256=8vRhi1lGFEs3unvnflEi2p59U2MfV32lStpEizpDms0,34467
+alita_sdk/runtime/tools/vectorstore_base.py,sha256=7ZkbegFG0XTQBYGsJjtrkK-zrqKwketfx8vSJzuPCug,27292
 alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
 alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -350,8 +350,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
 alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
 alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
 alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
-alita_sdk-0.3.347.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-alita_sdk-0.3.347.dist-info/METADATA,sha256=ZvJklicNWTOf3Q1MrNdtVdLhUEnd4oVDokj_y4J_Ecg,19015
-alita_sdk-0.3.347.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-alita_sdk-0.3.347.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
-alita_sdk-0.3.347.dist-info/RECORD,,
+alita_sdk-0.3.348.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+alita_sdk-0.3.348.dist-info/METADATA,sha256=_oiAJpxGjG23s01B-P41PkJqG1oUgdAgS7QUnlyS5gc,19015
+alita_sdk-0.3.348.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+alita_sdk-0.3.348.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
+alita_sdk-0.3.348.dist-info/RECORD,,

{alita_sdk-0.3.347.dist-info → alita_sdk-0.3.348.dist-info}/WHEEL RENAMED Viewed

File without changes

{alita_sdk-0.3.347.dist-info → alita_sdk-0.3.348.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{alita_sdk-0.3.347.dist-info → alita_sdk-0.3.348.dist-info}/top_level.txt RENAMED Viewed

File without changes

alita-sdk 0.3.347__py3-none-any.whl → 0.3.348__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.347py3-none-any.whl → 0.3.348py3-none-any.whl