PyPI - alita-sdk - Versions diffs - 0.3.211__py3-none-any.whl → 0.3.212__py3-none-any.whl - Mend

alita-sdk 0.3.211py3-none-any.whl → 0.3.212py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

alita_sdk/runtime/clients/client.py CHANGED Viewed

@@ -249,9 +249,9 @@ class AlitaClient:
         elif app_type == "llama":
             app_type = "react"
         elif app_type == "dial":
-            app_type = "openai"
+            app_type = "react"
         elif app_type == 'autogen':
-            app_type = "openai"
+            app_type = "react"
         if runtime == 'nonrunnable':
             return LangChainAssistant(self, data, llm, chat_history, app_type,
                                       tools=tools, memory=memory, store=store)

alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py CHANGED Viewed

@@ -1,16 +1,15 @@
-import base64
 import re
+from io import BytesIO
 import mammoth.images
 import pytesseract
 from PIL import Image
 from langchain_core.document_loaders import BaseLoader
 from langchain_core.documents import Document
-from langchain_core.messages import HumanMessage
 from mammoth import convert_to_html
 from markdownify import markdownify
-from ..constants import DEFAULT_MULTIMODAL_PROMPT
+from .utils import perform_llm_prediction_for_image_bytes
 class AlitaDocxMammothLoader(BaseLoader):
@@ -18,7 +17,7 @@ class AlitaDocxMammothLoader(BaseLoader):
     Loader for Docx files using Mammoth to convert to HTML, with image handling,
     and then Markdownify to convert HTML to markdown.
     """
-    def __init__(self, file_path: str, **kwargs):
+    def __init__(self, **kwargs):
         """
         Initializes AlitaDocxMammothLoader.
@@ -30,7 +29,10 @@ class AlitaDocxMammothLoader(BaseLoader):
         Raises:
             ValueError: If the 'path' parameter is not provided.
         """
-        self.path = file_path
+        self.path =  kwargs.get('file_path')
+        self.file_content = kwargs.get('file_content')
+        self.file_name = kwargs.get('file_name')
+        self.extract_images = kwargs.get('extract_images')
         self.llm = kwargs.get("llm")
         self.prompt = kwargs.get("prompt")
@@ -52,20 +54,7 @@ class AlitaDocxMammothLoader(BaseLoader):
             if self.llm:
                 # Use LLM for image understanding
                 with image.open() as image_bytes:
-                    base64_string = base64.b64encode(image_bytes.read()).decode()
-                url_path = f"data:image/{image.content_type};base64,{base64_string}"
-                result = self.llm.invoke([
-                    HumanMessage(
-                        content=[
-                            {"type": "text",
-                             "text": self.prompt if self.prompt is not None else DEFAULT_MULTIMODAL_PROMPT},
-                            {
-                                "type": "image_url",
-                                "image_url": {"url": url_path},
-                            },
-                        ]
-                    )
-                ]).content
+                    result = perform_llm_prediction_for_image_bytes(image_bytes, self.llm, self.prompt)
                 output['src'] = result  # LLM image transcript in src
                 return output
             else:
@@ -114,9 +103,44 @@ class AlitaDocxMammothLoader(BaseLoader):
             List[Document]: A list containing a single Document with the markdown content
                           and metadata including the source file path.
         """
-        with open(self.path, 'rb') as docx_file:
-            result = convert_to_html(docx_file, convert_image=mammoth.images.img_element(self.__handle_image))
-            content = markdownify(result.value, heading_style="ATX")
-            result_content = self.__postprocess_original_md(content)
-            return [Document(page_content=result_content, metadata={'source': str(self.path)})]
+        result_content = self.get_content()
+        return [Document(page_content=result_content, metadata={'source': str(self.path)})]
+    def get_content(self):
+        """
+        Extracts and converts the content of the Docx file to markdown format.
+        Handles both file paths and in-memory file content.
+        Returns:
+            str: The markdown content extracted from the Docx file.
+        """
+        if self.path:
+            # If path is provided, read from file system
+            with open(self.path, 'rb') as docx_file:
+                return self._convert_docx_to_markdown(docx_file)
+        elif self.file_content and self.file_name:
+            # If file_content and file_name are provided, read from memory
+            docx_file = BytesIO(self.file_content)
+            return self._convert_docx_to_markdown(docx_file)
+        else:
+            raise ValueError("Either 'path' or 'file_content' and 'file_name' must be provided.")
+    def _convert_docx_to_markdown(self, docx_file):
+        """
+        Converts the content of a Docx file to markdown format.
+        Args:
+            docx_file (BinaryIO): The Docx file object.
+        Returns:
+            str: The markdown content extracted from the Docx file.
+        """
+        if self.extract_images:
+            # Extract images using the provided image handler
+            result = convert_to_html(docx_file, convert_image=mammoth.images.img_element(self.__handle_image))
+        else:
+            # Ignore images
+            result = convert_to_html(docx_file, convert_image=lambda image: "")
+        content = markdownify(result.value, heading_style="ATX")
+        return self.__postprocess_original_md(content)

alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py CHANGED Viewed

@@ -11,14 +11,60 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import io
 from typing import Iterator
 import pandas as pd
 from json import loads
+from langchain_core.tools import ToolException
 from .AlitaTableLoader import AlitaTableLoader
 class AlitaExcelLoader(AlitaTableLoader):
+    excel_by_sheets: bool = False
+    sheet_name: str = None
+    return_type: str = 'str'
+    def __init__(self, **kwargs):
+        if not kwargs.get('file_path'):
+            file_content = kwargs.get('file_content')
+            if file_content:
+                kwargs['file_path'] = io.BytesIO(file_content)
+        super().__init__(**kwargs)
+        self.excel_by_sheets = kwargs.get('excel_by_sheets')
+        self.return_type = kwargs.get('return_type')
+        self.sheet_name = kwargs.get('sheet_name')
+    def get_content(self):
+        try:
+            dfs = pd.read_excel(self.file_path, sheet_name=self.sheet_name)
+            if self.excel_by_sheets:
+                result = {}
+                for sheet_name, df in dfs.items():
+                    df.fillna('', inplace=True)
+                    result[sheet_name] = self.parse_sheet(df)
+                return result
+            else:
+                result = []
+                for sheet_name, df in dfs.items():
+                    string_content = self.parse_sheet(df)
+                    result.append(f"====== Sheet name: {sheet_name} ======\n{string_content}")
+                return "\n\n".join(result)
+        except Exception as e:
+            return ToolException(f"Error reading Excel file: {e}")
+    def parse_sheet(self, df):
+        df.fillna('', inplace=True)
+        if self.return_type == 'dict':
+            return df.to_dict(orient='records')
+        elif self.return_type == 'csv':
+            return df.to_csv()
+        else:
+            return df.to_string(index=False)
     def read(self):
         df = pd.read_excel(self.file_path, sheet_name=None)
         docs = []

alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py CHANGED Viewed

@@ -6,10 +6,10 @@ import pytesseract
 from PIL import Image
 from langchain_core.document_loaders import BaseLoader
 from langchain_core.documents import Document
-from langchain_core.messages import HumanMessage
 from reportlab.graphics import renderPM
 from svglib.svglib import svg2rlg
+from .utils import perform_llm_prediction_for_image_bytes
 from ..constants import DEFAULT_MULTIMODAL_PROMPT
 from ..tools.utils import image_to_byte_array, bytes_to_base64
@@ -25,6 +25,9 @@ class AlitaImageLoader(BaseLoader):
             self.file_path = file_path
         elif kwargs.get('path'):
             self.file_path = kwargs['path']
+        elif kwargs.get('file_content'):
+            self.file_content = kwargs['file_content']
+            self.file_name = kwargs['file_name']
         else:
             raise ValueError(
                 "Path parameter is required (either as 'file_path' positional argument or 'path' keyword argument)")
@@ -33,22 +36,107 @@ class AlitaImageLoader(BaseLoader):
         self.prompt = kwargs.get('prompt') if kwargs.get(
             'prompt') is not None else DEFAULT_MULTIMODAL_PROMPT  # Use provided prompt or default
+    def get_content(self):
+        """
+        Retrieves the text content from the file or in-memory content.
+        Depending on the file type (SVG or raster image) and the availability of LLM,
+        processes the file appropriately using OCR or LLM.
+        Returns:
+            str: Extracted text content from the file.
+        """
+        try:
+            if hasattr(self, 'file_path'):
+                # If file_path is provided
+                file_path = Path(self.file_path)
+                if not file_path.exists():
+                    raise FileNotFoundError(f"File not found: {self.file_path}")
+                if file_path.suffix.lower() == '.svg':
+                    text_content = self._process_svg(self.file_path)
+                else:
+                    text_content = self._process_raster_image(self.file_path)
+            elif hasattr(self, 'file_content') and hasattr(self, 'file_name'):
+                # If file_content and file_name are provided
+                file_name = Path(self.file_name)
+                if file_name.suffix.lower() == '.svg':
+                    text_content = self._process_svg(BytesIO(self.file_content))
+                else:
+                    text_content = self._process_raster_image(BytesIO(self.file_content))
+            else:
+                raise ValueError("Either 'file_path' or 'file_content' and 'file_name' must be provided.")
+        except pytesseract.TesseractError as e:
+            raise ValueError(f"Error during OCR: {e}")
+        except ImportError as e:
+            raise ImportError(
+                f"Error: SVG processing dependencies not installed. Please install svglib and reportlab: {e}")
+        except Exception as e:
+            raise ValueError(f"Error opening image or processing SVG: {e}")
+        return text_content
+    def _process_svg(self, svg_source):
+        """
+        Processes an SVG file or in-memory SVG content.
+        If an LLM is available, the SVG is processed using LLM. Otherwise, the SVG
+        is converted to PNG and processed using OCR.
+        Args:
+            svg_source (str, Path, or BytesIO): The SVG file path or in-memory content.
+        Returns:
+            str: Extracted text content from the SVG.
+        """
+        if self.llm:
+            if isinstance(svg_source, (str, Path)):
+                with open(svg_source, 'rb') as f:
+                    svg_content = f.read()
+            else:
+                svg_content = svg_source.read()
+            return self.__process_svg_with_llm(svg_content, self.llm, self.prompt)
+        else:
+            # For OCR on SVG, convert SVG to PNG and then use OCR
+            if isinstance(svg_source, (str, Path)):
+                drawing = svg2rlg(str(svg_source))  # svglib requires path as a string
+            else:
+                drawing = svg2rlg(svg_source)  # svglib supports BytesIO
+            img_data = BytesIO()
+            renderPM.drawToFile(drawing, img_data, fmt="PNG")
+            img_data.seek(0)
+            image = Image.open(img_data)
+            return pytesseract.image_to_string(image, lang=self.ocr_language)
+    def _process_raster_image(self, image_source):
+        """
+        Processes a raster image (e.g., PNG, JPG).
+        If an LLM is available, the image is processed using LLM. Otherwise, OCR is used
+        to extract text content from the image.
+        Args:
+            image_source (str, Path, or BytesIO): The image file path or in-memory content.
+        Returns:
+            str: Extracted text content from the raster image.
+        """
+        image = Image.open(image_source)
+        if self.llm:
+            try:
+                return self.__perform_llm_prediction_for_image(image, self.llm, self.prompt)
+            except Exception as e:
+                print(f"Warning: Error during LLM processing of image: {e}. Falling back to OCR.")
+                return pytesseract.image_to_string(image, lang=self.ocr_language)
+        else:
+            return pytesseract.image_to_string(image, lang=self.ocr_language)
     def __perform_llm_prediction_for_image(self, image: Image, llm, prompt: str) -> str:
         """Performs LLM prediction for image content."""
         byte_array = image_to_byte_array(image)
-        base64_string = bytes_to_base64(byte_array)
-        result = llm.invoke([
-            HumanMessage(
-                content=[
-                    {"type": "text", "text": prompt},
-                    {
-                        "type": "image_url",
-                        "image_url": {"url": f"data:image/png;base64,{base64_string}"},
-                    },
-                ]
-            )
-        ])
-        return result.content
+        return perform_llm_prediction_for_image_bytes(byte_array, llm, prompt)
     def __process_svg_with_llm(self, svg_content: bytes, llm, prompt: str) -> str:
         """Processes SVG content using LLM."""
@@ -61,41 +149,7 @@ class AlitaImageLoader(BaseLoader):
     def load(self) -> List[Document]:
         """Load text from image using OCR or LLM if llm is provided, supports SVG."""
-        file_path = Path(self.file_path)
-        try:
-            if file_path.suffix.lower() == '.svg':
-                if self.llm:
-                    with open(self.file_path, 'rb') as f:
-                        svg_content = f.read()
-                    text_content = self.__process_svg_with_llm(svg_content, self.llm, self.prompt)
-                else:
-                    # For OCR on SVG, we first convert SVG to PNG then use OCR
-                    drawing = svg2rlg(str(self.file_path))  # svglib requires path as string
-                    img_data = BytesIO()
-                    renderPM.drawToFile(drawing, img_data, fmt="PNG")
-                    img_data.seek(0)
-                    image = Image.open(img_data)
-                    text_content = pytesseract.image_to_string(image, lang=self.ocr_language)
-            else:  # For raster images (png, jpg, etc.)
-                image = Image.open(self.file_path)
-                if self.llm:
-                    try:
-                        text_content = self.__perform_llm_prediction_for_image(image, self.llm, self.prompt)
-                    except Exception as e:
-                        print(f"Warning: Error during LLM processing of image: {e}. Falling back to OCR.")
-                        text_content = pytesseract.image_to_string(image,
-                                                                   lang=self.ocr_language)  # Fallback to OCR if LLM fails
-                else:
-                    text_content = pytesseract.image_to_string(image, lang=self.ocr_language)
-        except FileNotFoundError:
-            raise FileNotFoundError(f"File not found: {self.file_path}")
-        except pytesseract.TesseractError as e:
-            raise ValueError(f"Error during OCR: {e}")
-        except ImportError as e:  # svglib or reportlab missing
-            raise ImportError(
-                f"Error: SVG processing dependencies not installed. Please install svglib and reportlab: {e}")
-        except Exception as e:
-            raise ValueError(f"Error opening image or processing SVG: {e}")
+        text_content = self.get_content()
         metadata = {"source": str(self.file_path)}  # Ensure source is always a string for metadata
         return [Document(page_content=text_content, metadata=metadata)]

alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py ADDED Viewed

@@ -0,0 +1,63 @@
+import pymupdf
+from langchain_community.document_loaders import PyPDFLoader
+from .utils import perform_llm_prediction_for_image_bytes, create_temp_file
+from langchain_core.tools import ToolException
+class AlitaPDFLoader:
+    def __init__(self, **kwargs):
+        if kwargs.get('file_path'):
+            self.file_path = kwargs.get('file_path')
+        elif kwargs.get('file_content'):
+            self.file_content = kwargs.get('file_content')
+        else:
+            raise ToolException("'file_path' or 'file_content' parameter should be provided.")
+        self.password = kwargs.get('password', None)
+        self.page_number = kwargs.get('page_number', None)
+        self.extract_images = kwargs.get('extract_images', False)
+        self.llm = kwargs.get('llm', None)
+        self.prompt = kwargs.get('prompt', "Describe image")
+        self.headers = kwargs.get('headers', None)
+        self.extraction_mode = kwargs.get('extraction_mode', "plain")
+        self.extraction_kwargs = kwargs.get('extraction_kwargs', None)
+    def get_content(self):
+        if hasattr(self, 'file_path'):
+            with pymupdf.open(filename=self.file_path, filetype="pdf") as report:
+                return self.parse_report(report)
+        else:
+            with pymupdf.open(stream=self.file_content, filetype="pdf") as report:
+                return self.parse_report(report)
+    def parse_report(self, report):
+        text_content = ''
+        if self.page_number is not None:
+            page = report.load_page(self.page_number - 1)
+            text_content += self.read_pdf_page(report, page, self.page_number)
+        else:
+            for index, page in enumerate(report, start=1):
+                text_content += self.read_pdf_page(report, page, index)
+        return text_content
+    def read_pdf_page(self, report, page, index):
+        text_content = f'Page: {index}\n'
+        text_content += page.get_text()
+        if self.extract_images:
+            images = page.get_images(full=True)
+            for i, img in enumerate(images):
+                xref = img[0]
+                base_image = report.extract_image(xref)
+                img_bytes = base_image["image"]
+                text_content += "\n**Image Transcript:**\n" + perform_llm_prediction_for_image_bytes(img_bytes, self.llm, self.prompt)  + "\n--------------------\n"
+        return text_content
+    def load(self):
+        if not hasattr(self, 'file_path'):
+            self.file_path = create_temp_file(self.file_content)
+        return PyPDFLoader(file_path=self.file_path,
+                           password=self.password,
+                           headers=self.headers,
+                           extract_images=self.extract_images,
+                           extraction_mode=self.extraction_mode,
+                           extraction_kwargs=self.extraction_kwargs).load()

alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py ADDED Viewed

@@ -0,0 +1,54 @@
+import io
+from langchain_community.document_loaders import UnstructuredPowerPointLoader
+from langchain_core.tools import ToolException
+from pptx import Presentation
+from .utils import perform_llm_prediction_for_image_bytes, create_temp_file
+from pptx.enum.shapes import MSO_SHAPE_TYPE
+class AlitaPowerPointLoader:
+    def __init__(self, file_path=None, file_content=None, mode=None, **unstructured_kwargs):
+        if file_path:
+            self.file_path = file_path
+        elif file_content:
+            self.file_content = file_content
+        else:
+            raise ToolException("'file_path' or 'file_content' parameter should be provided.")
+        self.mode=mode
+        self.unstructured_kwargs = unstructured_kwargs
+        self.page_number = unstructured_kwargs.get('page_number', None)
+        self.extract_images = unstructured_kwargs.get('extract_images', False)
+        self.llm = unstructured_kwargs.get('llm', None)
+        self.prompt = unstructured_kwargs.get('prompt', "Describe image")
+    def get_content(self):
+        prs = Presentation(io.BytesIO(self.file_content))
+        text_content = ''
+        if self.page_number is not None:
+            text_content += self.read_pptx_slide(prs.slides[self.page_number - 1], self.page_number)
+        else:
+            for index, slide in enumerate(prs.slides, start=1):
+                text_content += self.read_pptx_slide(slide, index)
+        return text_content
+    def read_pptx_slide(self, slide, index):
+        text_content = f'Slide: {index}\n'
+        for shape in slide.shapes:
+            if hasattr(shape, "text"):
+                text_content += shape.text + "\n"
+            elif self.extract_images and shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
+                try:
+                    caption = perform_llm_prediction_for_image_bytes(shape.image.blob, self.llm)
+                except:
+                    caption = "unknown"
+                text_content += "\n**Image Transcript:**\n" + caption + "\n--------------------\n"
+        return text_content
+    def load(self):
+        if not self.file_path:
+            self.file_path = create_temp_file(self.file_content)
+        return UnstructuredPowerPointLoader(file_path=self.file_path,
+                           mode=self.mode,
+                           **self.unstructured_kwargs).load()

alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py ADDED Viewed

@@ -0,0 +1,66 @@
+from typing import Iterator
+from langchain_core.documents import Document
+from langchain_community.document_loaders.base import BaseLoader
+from langchain_community.document_loaders.helpers import detect_file_encodings
+from langchain_core.tools import ToolException
+class AlitaTextLoader(BaseLoader):
+    def __init__(self, **kwargs):
+        """Initialize with file path."""
+        if kwargs.get('file_path'):
+            self.file_path = kwargs['file_path']
+        elif  kwargs.get('file_content'):
+            self.file_content = kwargs['file_content']
+            self.file_name = kwargs['file_name']
+        else:
+            raise ToolException("'file_path' or 'file_content' parameter should be provided.")
+        self.encoding = kwargs.get('encoding', 'utf-8')
+        self.autodetect_encoding = kwargs.get('autodetect_encoding', False)
+    def get_content(self):
+        text = ""
+        try:
+            if hasattr(self, 'file_path') and self.file_path:
+                with open(self.file_path, encoding=self.encoding) as f:
+                    text = f.read()
+            elif hasattr(self, 'file_content') and self.file_content:
+                text = self.file_content.decode(self.encoding)
+            else:
+                raise ValueError("Neither file_path nor file_content is provided.")
+        except UnicodeDecodeError as e:
+            if self.autodetect_encoding:
+                if hasattr(self, 'file_path') and self.file_path:
+                    detected_encodings = detect_file_encodings(self.file_path)
+                    for encoding in detected_encodings:
+                        try:
+                            with open(self.file_path, encoding=encoding.encoding) as f:
+                                text = f.read()
+                            break
+                        except UnicodeDecodeError:
+                            continue
+                elif hasattr(self, 'file_content') and self.file_content:
+                    detected_encodings = detect_file_encodings(self.file_content)
+                    for encoding in detected_encodings:
+                        try:
+                            text = self.file_content.decode(encoding.encoding)
+                            break
+                        except UnicodeDecodeError:
+                            continue
+                else:
+                    raise ValueError("Neither file_path nor file_content is provided for encoding detection.")
+            else:
+                raise RuntimeError(f"Error loading content with encoding {self.encoding}.") from e
+        except Exception as e:
+            raise RuntimeError(f"Error loading content.") from e
+        return text
+    def lazy_load(self) -> Iterator[Document]:
+        """Load from file path."""
+        text = self.get_content()
+        metadata = {"source": str(self.file_path) if hasattr(self, 'file_path') else self.file_name}
+        yield Document(page_content=text, metadata=metadata)

alita_sdk/runtime/langchain/document_loaders/constants.py CHANGED Viewed

@@ -12,24 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from langchain_community.document_loaders import (TextLoader,
+from langchain_community.document_loaders import (
         UnstructuredMarkdownLoader,
-        PyPDFLoader,
-        UnstructuredPDFLoader,UnstructuredWordDocumentLoader,
-        JSONLoader, AirbyteJSONLoader, UnstructuredHTMLLoader,
-        UnstructuredPowerPointLoader, PythonLoader)
-from langchain_community.document_loaders import (TextLoader,
-        UnstructuredMarkdownLoader,
-        PyPDFLoader,
-        UnstructuredPDFLoader,UnstructuredWordDocumentLoader,
-        JSONLoader, AirbyteJSONLoader, UnstructuredHTMLLoader,
+        AirbyteJSONLoader, UnstructuredHTMLLoader,
         UnstructuredPowerPointLoader, PythonLoader)
 from .AlitaCSVLoader import AlitaCSVLoader
 from .AlitaDocxMammothLoader import AlitaDocxMammothLoader
 from .AlitaExcelLoader import AlitaExcelLoader
 from .AlitaImageLoader import AlitaImageLoader
+from .AlitaPDFLoader import AlitaPDFLoader
+from .AlitaTextLoader import AlitaTextLoader
+from .AlitaPowerPointLoader import AlitaPowerPointLoader
 loaders_map = {
     '.png': {
@@ -63,28 +57,28 @@ loaders_map = {
         'kwargs': {}
     },
     '.txt': {
-        'class': TextLoader,
+        'class': AlitaTextLoader,
         'is_multimodal_processing': False,
         'kwargs': {
             'autodetect_encoding': True
         }
     },
     '.yml': {
-        'class': TextLoader,
+        'class': AlitaTextLoader,
         'is_multimodal_processing': False,
         'kwargs': {
             'autodetect_encoding': True
         }
     },
     '.yaml': {
-        'class': TextLoader,
+        'class': AlitaTextLoader,
         'is_multimodal_processing': False,
         'kwargs': {
             'autodetect_encoding': True
         }
     },
     '.groovy': {
-        'class': TextLoader,
+        'class': AlitaTextLoader,
         'is_multimodal_processing': False,
         'kwargs': {
             'autodetect_encoding': True
@@ -121,7 +115,7 @@ loaders_map = {
         }
     },
     '.pdf': {
-        'class': PyPDFLoader,
+        'class': AlitaPDFLoader,
         'is_multimodal_processing': False,
         'kwargs': {}
     },
@@ -131,7 +125,7 @@ loaders_map = {
         'kwargs': {}
     },
     '.json': {
-        'class': TextLoader,
+        'class': AlitaTextLoader,
         'is_multimodal_processing': False,
         'kwargs': {
             'autodetect_encoding': True
@@ -153,12 +147,12 @@ loaders_map = {
         'kwargs': {}
     },
     '.ppt': {
-        'class': UnstructuredPowerPointLoader,
+        'class': AlitaPowerPointLoader,
         'is_multimodal_processing': False,
         'kwargs': {}
     },
     '.pptx': {
-        'class': UnstructuredPowerPointLoader,
+        'class': AlitaPowerPointLoader,
         'is_multimodal_processing': False,
         'kwargs': {}
     },

alita-sdk 0.3.211__py3-none-any.whl → 0.3.212__py3-none-any.whl

alita-sdk 0.3.211py3-none-any.whl → 0.3.212py3-none-any.whl