PyPI - alita-sdk - Versions diffs - 0.3.345__py3-none-any.whl → 0.3.346__py3-none-any.whl - Mend

alita-sdk 0.3.345py3-none-any.whl → 0.3.346py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (9) hide show

alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import pymupdf
 import fitz
-from langchain_community.document_loaders import PyPDFLoader
+from langchain_community.document_loaders import PyPDFium2Loader
 from .ImageParser import ImageParser
 from .utils import perform_llm_prediction_for_image_bytes, create_temp_file
@@ -23,6 +23,7 @@ class AlitaPDFLoader:
         self.headers = kwargs.get('headers', None)
         self.extraction_mode = kwargs.get('extraction_mode', "plain")
         self.extraction_kwargs = kwargs.get('extraction_kwargs', None)
+        self.images_parser=ImageParser(llm=self.llm, prompt=self.prompt)
     def get_content(self):
         if hasattr(self, 'file_path'):
@@ -119,13 +120,13 @@ class AlitaPDFLoader:
             return self._load_docs()
     def _load_docs(self):
-        docs = PyPDFLoader(file_path=self.file_path,
-                        password=self.password,
-                        headers=self.headers,
-                        extract_images=self.extract_images,
-                        extraction_mode=self.extraction_mode,
-                        images_parser=ImageParser(llm=self.llm, prompt=self.prompt),
-                        extraction_kwargs=self.extraction_kwargs).load()
+        docs = PyPDFium2Loader(
+                file_path = self.file_path,
+                password=self.password,
+                headers=self.headers,
+                extract_images = self.extract_images,
+                images_parser = ImageParser(llm=self.llm, prompt=self.prompt),
+            ).load()
         for doc in docs:
             doc.metadata['chunk_id'] = doc.metadata['page']
         return docs

alita_sdk/runtime/langchain/document_loaders/ImageParser.py CHANGED Viewed

@@ -1,4 +1,8 @@
+from typing import Iterator
 from langchain_community.document_loaders.parsers.images import BaseImageBlobParser
+from langchain_core.documents import Document
+from langchain_core.documents.base import Blob
 from alita_sdk.runtime.langchain.document_loaders.AlitaImageLoader import AlitaImageLoader
@@ -8,10 +12,19 @@ class ImageParser(BaseImageBlobParser):
         self.llm = kwargs.get('llm')
         self.prompt = kwargs.get('prompt')
+    def lazy_parse(self, blob: Blob) -> Iterator[Document]:
+        try:
+            yield from super().lazy_parse(blob)
+        except Exception:
+            yield Document(page_content="[Image: Unknown]")
     def _analyze_image(self, img) -> str:
         from io import BytesIO
         byte_stream = BytesIO()
         img.save(byte_stream, format='PNG')
         image_bytes = byte_stream.getvalue()
-        return AlitaImageLoader(file_content=image_bytes, file_name="image.png", prompt=self.prompt, llm=self.llm).get_content()
+        try:
+            return AlitaImageLoader(file_content=image_bytes, file_name="image.png", prompt=self.prompt, llm=self.llm).get_content()
+        except Exception:
+            return "Image: unknown"

alita_sdk/runtime/tools/artifact.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import hashlib
-import json
 import logging
-from typing import Any, Optional, Generator
+import re
+from typing import Any, Optional, Generator, List
 from langchain_core.documents import Document
 from langchain_core.tools import ToolException
@@ -59,18 +59,53 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
     def create_new_bucket(self, bucket_name: str, expiration_measure = "weeks", expiration_value = 1):
         return self.artifact.client.create_bucket(bucket_name, expiration_measure, expiration_value)
+    def _index_tool_params(self):
+        return {
+            'include_extensions': (Optional[List[str]], Field(
+                description="List of file extensions to include when processing: i.e. ['*.png', '*.jpg']. "
+                            "If empty, all files will be processed (except skip_extensions).",
+                default=[])),
+            'skip_extensions': (Optional[List[str]], Field(
+                description="List of file extensions to skip when processing: i.e. ['*.png', '*.jpg']",
+                default=[])),
+        }
     def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
         try:
-            all_files = self.list_files(self.bucket, False)
+            all_files = self.list_files(self.bucket, False)['rows']
         except Exception as e:
             raise ToolException(f"Unable to extract files: {e}")
-        for file in all_files['rows']:
+        include_extensions = kwargs.get('include_extensions', [])
+        skip_extensions = kwargs.get('skip_extensions', [])
+        self._log_tool_event(message=f"Files filtering started. Include extensions: {include_extensions}. "
+                                     f"Skip extensions: {skip_extensions}", tool_name="loader")
+        # show the progress of filtering
+        total_files = len(all_files) if isinstance(all_files, list) else 0
+        filtered_files_count = 0
+        for file in all_files:
+            filtered_files_count += 1
+            if filtered_files_count % 10 == 0 or filtered_files_count == total_files:
+                self._log_tool_event(message=f"Files filtering progress: {filtered_files_count}/{total_files}",
+                                     tool_name="loader")
+            file_name = file['name']
+            # Check if file should be skipped based on skip_extensions
+            if any(re.match(pattern.replace('*', '.*') + '$', file_name, re.IGNORECASE)
+                   for pattern in skip_extensions):
+                continue
+            # Check if file should be included based on include_extensions
+            # If include_extensions is empty, process all files (that weren't skipped)
+            if include_extensions and not (any(re.match(pattern.replace('*', '.*') + '$', file_name, re.IGNORECASE)
+                                               for pattern in include_extensions)):
+                continue
             metadata = {
                 ("updated_on" if k == "modified" else k): str(v)
                 for k, v in file.items()
             }
-            metadata['id'] = self.get_hash_from_bucket_and_file_name(self.bucket, file['name'])
+            metadata['id'] = self.get_hash_from_bucket_and_file_name(self.bucket, file_name)
             yield Document(page_content="", metadata=metadata)
     def get_hash_from_bucket_and_file_name(self, bucket, file_name):

alita_sdk/tools/github/github_client.py CHANGED Viewed

@@ -105,7 +105,7 @@ class GitHubClient(BaseModel):
                     self._github_repo_instance = None
             except Exception as e:
                 # Only raise when accessed, not during initialization
-                return ToolException(e)
+                raise ToolException(e)
         return self._github_repo_instance
     @model_validator(mode='before')

{alita_sdk-0.3.345.dist-info → alita_sdk-0.3.346.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.345
+Version: 0.3.346
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0

{alita_sdk-0.3.345.dist-info → alita_sdk-0.3.346.dist-info}/RECORD RENAMED Viewed

@@ -62,13 +62,13 @@ alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py,sha256=QwgBJE-B
 alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py,sha256=Nav2cgCQKOHQi_ZgYYn_iFdP_Os56KVlVR5nHGXecBc,3445
 alita_sdk/runtime/langchain/document_loaders/AlitaJiraLoader.py,sha256=M2q3YThkps0yAZOjfoLcyE7qycVTYKcXEGtpmp0N6C8,10950
 alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py,sha256=RGHDfleYTn7AAc3H-yFZrjm06L0Ux14ZtEJpFlVBNCA,2474
-alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py,sha256=usSrPnYQ3dDOJDdg6gBDTnBJnHiqjLxd_kvOBfRyVxY,5946
+alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py,sha256=olVThKX9Mmv4muTW0cAQBkgeNqU4IcdLVhqpBuzwly4,5904
 alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py,sha256=CHIaUnP2Alu7D1NHxlL5N98iY7Gqm4tA5wHjBYUsQLc,2833
 alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py,sha256=m_7aq-aCFVb4vXZsJNinfN1hAuyy_S0ylRknv_ahxDc,340
 alita_sdk/runtime/langchain/document_loaders/AlitaQtestLoader.py,sha256=CUVVnisxm7b5yZWV6rn0Q3MEEaO1GWNcfnz5yWz8T0k,13283
 alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py,sha256=nI8lyndVZxVAxbjX3yiqyuFQKFE8MjLPyYSyqRWxHqQ,4077
 alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py,sha256=EiCIAF_OxSrbuwgOFk2IpxRMvFbctITt2jAI0g_atpk,3586
-alita_sdk/runtime/langchain/document_loaders/ImageParser.py,sha256=gao5yCCKdDai_Gx7YdEx5U6oMyJYzn69eYmEvWLh-fc,656
+alita_sdk/runtime/langchain/document_loaders/ImageParser.py,sha256=RQ4zGdSw42ec8c6Eb48uFadayWuiT4FbwhGVwhSw60s,1065
 alita_sdk/runtime/langchain/document_loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 alita_sdk/runtime/langchain/document_loaders/constants.py,sha256=xlOXq2iooepcM41SUehbH4ZUFsdz1gWli_7C9Lt5saI,7528
 alita_sdk/runtime/langchain/document_loaders/utils.py,sha256=9xghESf3axBbwxATyVuS0Yu-TWe8zWZnXgCD1ZVyNW0,2414
@@ -106,7 +106,7 @@ alita_sdk/runtime/toolkits/vectorstore.py,sha256=BGppQADa1ZiLO17fC0uCACTTEvPHlod
 alita_sdk/runtime/tools/__init__.py,sha256=7OA8YPKlEOfXu3-gJA08cyR-VymjSPL-OmbXI-B2xVA,355
 alita_sdk/runtime/tools/agent.py,sha256=m98QxOHwnCRTT9j18Olbb5UPS8-ZGeQaGiUyZJSyFck,3162
 alita_sdk/runtime/tools/application.py,sha256=z3vLZODs-_xEEnZFmGF0fKz1j3VtNJxqsAmg5ovExpQ,3129
-alita_sdk/runtime/tools/artifact.py,sha256=2Jjrhuj7Q-Sc5AKkAG7Pk8cJnGPqnqgtOmE3eDOVX0M,8694
+alita_sdk/runtime/tools/artifact.py,sha256=9kNZENeGDK4wW3cG0tixmJb0FDJhO-VqujuuuxN8kDo,10682
 alita_sdk/runtime/tools/datasource.py,sha256=pvbaSfI-ThQQnjHG-QhYNSTYRnZB0rYtZFpjCfpzxYI,2443
 alita_sdk/runtime/tools/echo.py,sha256=spw9eCweXzixJqHnZofHE1yWiSUa04L4VKycf3KCEaM,486
 alita_sdk/runtime/tools/function.py,sha256=0iZJ-UxaPbtcXAVX9G5Vsn7vmD7lrz3cBG1qylto1gs,2844
@@ -239,7 +239,7 @@ alita_sdk/tools/figma/__init__.py,sha256=W6vIMMkZI2Lmpg6_CRRV3oadaIbVI-qTLmKUh6e
 alita_sdk/tools/figma/api_wrapper.py,sha256=KbKet1Xvjq1Vynz_jEE1MtEAVtLYNlSCg67u4dfhe90,33681
 alita_sdk/tools/github/__init__.py,sha256=2rHu0zZyZGnLC5CkHgDIhe14N9yCyaEfrrt7ydH8478,5191
 alita_sdk/tools/github/api_wrapper.py,sha256=uDwYckdnpYRJtb0uZnDkaz2udvdDLVxuCh1tSwspsiU,8411
-alita_sdk/tools/github/github_client.py,sha256=IhTYcqByJ_wnYg2GFkLkYaiG2j8kFkL8p8CTIVZwmqY,86598
+alita_sdk/tools/github/github_client.py,sha256=0YkpD6Zm4X46jMNN57ZIypo2YObtgxCGQokJAF-laFs,86597
 alita_sdk/tools/github/graphql_client_wrapper.py,sha256=d3AGjzLGH_hdQV2V8HeAX92dJ4dlnE5OXqUlCO_PBr0,71539
 alita_sdk/tools/github/schemas.py,sha256=TxEWR3SjDKVwzo9i2tLnss_uPAv85Mh7oWjvQvYLDQE,14000
 alita_sdk/tools/github/tool.py,sha256=Jnnv5lenV5ds8AAdyo2m8hSzyJ117HZBjzHC6T1ck-M,1037
@@ -350,8 +350,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
 alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
 alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
 alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
-alita_sdk-0.3.345.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-alita_sdk-0.3.345.dist-info/METADATA,sha256=xKGJO9ArLAkIHbt6Ow6scbFIqtp0cqbqca2NPHVk6ao,19015
-alita_sdk-0.3.345.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-alita_sdk-0.3.345.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
-alita_sdk-0.3.345.dist-info/RECORD,,
+alita_sdk-0.3.346.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+alita_sdk-0.3.346.dist-info/METADATA,sha256=OoUbeD3TLR5rkU_L-5H3DOb2tB7yJ9JWqmwAjDpYq_E,19015
+alita_sdk-0.3.346.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+alita_sdk-0.3.346.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
+alita_sdk-0.3.346.dist-info/RECORD,,

{alita_sdk-0.3.345.dist-info → alita_sdk-0.3.346.dist-info}/WHEEL RENAMED Viewed

File without changes

{alita_sdk-0.3.345.dist-info → alita_sdk-0.3.346.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{alita_sdk-0.3.345.dist-info → alita_sdk-0.3.346.dist-info}/top_level.txt RENAMED Viewed

File without changes

alita-sdk 0.3.345__py3-none-any.whl → 0.3.346__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.345py3-none-any.whl → 0.3.346py3-none-any.whl