alita-sdk 0.3.345__py3-none-any.whl → 0.3.346__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  import pymupdf
2
2
  import fitz
3
- from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain_community.document_loaders import PyPDFium2Loader
4
4
 
5
5
  from .ImageParser import ImageParser
6
6
  from .utils import perform_llm_prediction_for_image_bytes, create_temp_file
@@ -23,6 +23,7 @@ class AlitaPDFLoader:
23
23
  self.headers = kwargs.get('headers', None)
24
24
  self.extraction_mode = kwargs.get('extraction_mode', "plain")
25
25
  self.extraction_kwargs = kwargs.get('extraction_kwargs', None)
26
+ self.images_parser=ImageParser(llm=self.llm, prompt=self.prompt)
26
27
 
27
28
  def get_content(self):
28
29
  if hasattr(self, 'file_path'):
@@ -119,13 +120,13 @@ class AlitaPDFLoader:
119
120
  return self._load_docs()
120
121
 
121
122
  def _load_docs(self):
122
- docs = PyPDFLoader(file_path=self.file_path,
123
- password=self.password,
124
- headers=self.headers,
125
- extract_images=self.extract_images,
126
- extraction_mode=self.extraction_mode,
127
- images_parser=ImageParser(llm=self.llm, prompt=self.prompt),
128
- extraction_kwargs=self.extraction_kwargs).load()
123
+ docs = PyPDFium2Loader(
124
+ file_path = self.file_path,
125
+ password=self.password,
126
+ headers=self.headers,
127
+ extract_images = self.extract_images,
128
+ images_parser = ImageParser(llm=self.llm, prompt=self.prompt),
129
+ ).load()
129
130
  for doc in docs:
130
131
  doc.metadata['chunk_id'] = doc.metadata['page']
131
132
  return docs
@@ -1,4 +1,8 @@
1
+ from typing import Iterator
2
+
1
3
  from langchain_community.document_loaders.parsers.images import BaseImageBlobParser
4
+ from langchain_core.documents import Document
5
+ from langchain_core.documents.base import Blob
2
6
 
3
7
  from alita_sdk.runtime.langchain.document_loaders.AlitaImageLoader import AlitaImageLoader
4
8
 
@@ -8,10 +12,19 @@ class ImageParser(BaseImageBlobParser):
8
12
  self.llm = kwargs.get('llm')
9
13
  self.prompt = kwargs.get('prompt')
10
14
 
15
+ def lazy_parse(self, blob: Blob) -> Iterator[Document]:
16
+ try:
17
+ yield from super().lazy_parse(blob)
18
+ except Exception:
19
+ yield Document(page_content="[Image: Unknown]")
20
+
11
21
  def _analyze_image(self, img) -> str:
12
22
  from io import BytesIO
13
23
 
14
24
  byte_stream = BytesIO()
15
25
  img.save(byte_stream, format='PNG')
16
26
  image_bytes = byte_stream.getvalue()
17
- return AlitaImageLoader(file_content=image_bytes, file_name="image.png", prompt=self.prompt, llm=self.llm).get_content()
27
+ try:
28
+ return AlitaImageLoader(file_content=image_bytes, file_name="image.png", prompt=self.prompt, llm=self.llm).get_content()
29
+ except Exception:
30
+ return "Image: unknown"
@@ -1,7 +1,7 @@
1
1
  import hashlib
2
- import json
3
2
  import logging
4
- from typing import Any, Optional, Generator
3
+ import re
4
+ from typing import Any, Optional, Generator, List
5
5
 
6
6
  from langchain_core.documents import Document
7
7
  from langchain_core.tools import ToolException
@@ -59,18 +59,53 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
59
59
  def create_new_bucket(self, bucket_name: str, expiration_measure = "weeks", expiration_value = 1):
60
60
  return self.artifact.client.create_bucket(bucket_name, expiration_measure, expiration_value)
61
61
 
62
+ def _index_tool_params(self):
63
+ return {
64
+ 'include_extensions': (Optional[List[str]], Field(
65
+ description="List of file extensions to include when processing: i.e. ['*.png', '*.jpg']. "
66
+ "If empty, all files will be processed (except skip_extensions).",
67
+ default=[])),
68
+ 'skip_extensions': (Optional[List[str]], Field(
69
+ description="List of file extensions to skip when processing: i.e. ['*.png', '*.jpg']",
70
+ default=[])),
71
+ }
72
+
62
73
  def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
63
74
  try:
64
- all_files = self.list_files(self.bucket, False)
75
+ all_files = self.list_files(self.bucket, False)['rows']
65
76
  except Exception as e:
66
77
  raise ToolException(f"Unable to extract files: {e}")
67
78
 
68
- for file in all_files['rows']:
79
+ include_extensions = kwargs.get('include_extensions', [])
80
+ skip_extensions = kwargs.get('skip_extensions', [])
81
+ self._log_tool_event(message=f"Files filtering started. Include extensions: {include_extensions}. "
82
+ f"Skip extensions: {skip_extensions}", tool_name="loader")
83
+ # show the progress of filtering
84
+ total_files = len(all_files) if isinstance(all_files, list) else 0
85
+ filtered_files_count = 0
86
+ for file in all_files:
87
+ filtered_files_count += 1
88
+ if filtered_files_count % 10 == 0 or filtered_files_count == total_files:
89
+ self._log_tool_event(message=f"Files filtering progress: {filtered_files_count}/{total_files}",
90
+ tool_name="loader")
91
+ file_name = file['name']
92
+
93
+ # Check if file should be skipped based on skip_extensions
94
+ if any(re.match(pattern.replace('*', '.*') + '$', file_name, re.IGNORECASE)
95
+ for pattern in skip_extensions):
96
+ continue
97
+
98
+ # Check if file should be included based on include_extensions
99
+ # If include_extensions is empty, process all files (that weren't skipped)
100
+ if include_extensions and not (any(re.match(pattern.replace('*', '.*') + '$', file_name, re.IGNORECASE)
101
+ for pattern in include_extensions)):
102
+ continue
103
+
69
104
  metadata = {
70
105
  ("updated_on" if k == "modified" else k): str(v)
71
106
  for k, v in file.items()
72
107
  }
73
- metadata['id'] = self.get_hash_from_bucket_and_file_name(self.bucket, file['name'])
108
+ metadata['id'] = self.get_hash_from_bucket_and_file_name(self.bucket, file_name)
74
109
  yield Document(page_content="", metadata=metadata)
75
110
 
76
111
  def get_hash_from_bucket_and_file_name(self, bucket, file_name):
@@ -105,7 +105,7 @@ class GitHubClient(BaseModel):
105
105
  self._github_repo_instance = None
106
106
  except Exception as e:
107
107
  # Only raise when accessed, not during initialization
108
- return ToolException(e)
108
+ raise ToolException(e)
109
109
  return self._github_repo_instance
110
110
 
111
111
  @model_validator(mode='before')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.345
3
+ Version: 0.3.346
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -62,13 +62,13 @@ alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py,sha256=QwgBJE-B
62
62
  alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py,sha256=Nav2cgCQKOHQi_ZgYYn_iFdP_Os56KVlVR5nHGXecBc,3445
63
63
  alita_sdk/runtime/langchain/document_loaders/AlitaJiraLoader.py,sha256=M2q3YThkps0yAZOjfoLcyE7qycVTYKcXEGtpmp0N6C8,10950
64
64
  alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py,sha256=RGHDfleYTn7AAc3H-yFZrjm06L0Ux14ZtEJpFlVBNCA,2474
65
- alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py,sha256=usSrPnYQ3dDOJDdg6gBDTnBJnHiqjLxd_kvOBfRyVxY,5946
65
+ alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py,sha256=olVThKX9Mmv4muTW0cAQBkgeNqU4IcdLVhqpBuzwly4,5904
66
66
  alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py,sha256=CHIaUnP2Alu7D1NHxlL5N98iY7Gqm4tA5wHjBYUsQLc,2833
67
67
  alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py,sha256=m_7aq-aCFVb4vXZsJNinfN1hAuyy_S0ylRknv_ahxDc,340
68
68
  alita_sdk/runtime/langchain/document_loaders/AlitaQtestLoader.py,sha256=CUVVnisxm7b5yZWV6rn0Q3MEEaO1GWNcfnz5yWz8T0k,13283
69
69
  alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py,sha256=nI8lyndVZxVAxbjX3yiqyuFQKFE8MjLPyYSyqRWxHqQ,4077
70
70
  alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py,sha256=EiCIAF_OxSrbuwgOFk2IpxRMvFbctITt2jAI0g_atpk,3586
71
- alita_sdk/runtime/langchain/document_loaders/ImageParser.py,sha256=gao5yCCKdDai_Gx7YdEx5U6oMyJYzn69eYmEvWLh-fc,656
71
+ alita_sdk/runtime/langchain/document_loaders/ImageParser.py,sha256=RQ4zGdSw42ec8c6Eb48uFadayWuiT4FbwhGVwhSw60s,1065
72
72
  alita_sdk/runtime/langchain/document_loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
73
  alita_sdk/runtime/langchain/document_loaders/constants.py,sha256=xlOXq2iooepcM41SUehbH4ZUFsdz1gWli_7C9Lt5saI,7528
74
74
  alita_sdk/runtime/langchain/document_loaders/utils.py,sha256=9xghESf3axBbwxATyVuS0Yu-TWe8zWZnXgCD1ZVyNW0,2414
@@ -106,7 +106,7 @@ alita_sdk/runtime/toolkits/vectorstore.py,sha256=BGppQADa1ZiLO17fC0uCACTTEvPHlod
106
106
  alita_sdk/runtime/tools/__init__.py,sha256=7OA8YPKlEOfXu3-gJA08cyR-VymjSPL-OmbXI-B2xVA,355
107
107
  alita_sdk/runtime/tools/agent.py,sha256=m98QxOHwnCRTT9j18Olbb5UPS8-ZGeQaGiUyZJSyFck,3162
108
108
  alita_sdk/runtime/tools/application.py,sha256=z3vLZODs-_xEEnZFmGF0fKz1j3VtNJxqsAmg5ovExpQ,3129
109
- alita_sdk/runtime/tools/artifact.py,sha256=2Jjrhuj7Q-Sc5AKkAG7Pk8cJnGPqnqgtOmE3eDOVX0M,8694
109
+ alita_sdk/runtime/tools/artifact.py,sha256=9kNZENeGDK4wW3cG0tixmJb0FDJhO-VqujuuuxN8kDo,10682
110
110
  alita_sdk/runtime/tools/datasource.py,sha256=pvbaSfI-ThQQnjHG-QhYNSTYRnZB0rYtZFpjCfpzxYI,2443
111
111
  alita_sdk/runtime/tools/echo.py,sha256=spw9eCweXzixJqHnZofHE1yWiSUa04L4VKycf3KCEaM,486
112
112
  alita_sdk/runtime/tools/function.py,sha256=0iZJ-UxaPbtcXAVX9G5Vsn7vmD7lrz3cBG1qylto1gs,2844
@@ -239,7 +239,7 @@ alita_sdk/tools/figma/__init__.py,sha256=W6vIMMkZI2Lmpg6_CRRV3oadaIbVI-qTLmKUh6e
239
239
  alita_sdk/tools/figma/api_wrapper.py,sha256=KbKet1Xvjq1Vynz_jEE1MtEAVtLYNlSCg67u4dfhe90,33681
240
240
  alita_sdk/tools/github/__init__.py,sha256=2rHu0zZyZGnLC5CkHgDIhe14N9yCyaEfrrt7ydH8478,5191
241
241
  alita_sdk/tools/github/api_wrapper.py,sha256=uDwYckdnpYRJtb0uZnDkaz2udvdDLVxuCh1tSwspsiU,8411
242
- alita_sdk/tools/github/github_client.py,sha256=IhTYcqByJ_wnYg2GFkLkYaiG2j8kFkL8p8CTIVZwmqY,86598
242
+ alita_sdk/tools/github/github_client.py,sha256=0YkpD6Zm4X46jMNN57ZIypo2YObtgxCGQokJAF-laFs,86597
243
243
  alita_sdk/tools/github/graphql_client_wrapper.py,sha256=d3AGjzLGH_hdQV2V8HeAX92dJ4dlnE5OXqUlCO_PBr0,71539
244
244
  alita_sdk/tools/github/schemas.py,sha256=TxEWR3SjDKVwzo9i2tLnss_uPAv85Mh7oWjvQvYLDQE,14000
245
245
  alita_sdk/tools/github/tool.py,sha256=Jnnv5lenV5ds8AAdyo2m8hSzyJ117HZBjzHC6T1ck-M,1037
@@ -350,8 +350,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
350
350
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
351
351
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
352
352
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
353
- alita_sdk-0.3.345.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
354
- alita_sdk-0.3.345.dist-info/METADATA,sha256=xKGJO9ArLAkIHbt6Ow6scbFIqtp0cqbqca2NPHVk6ao,19015
355
- alita_sdk-0.3.345.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
356
- alita_sdk-0.3.345.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
357
- alita_sdk-0.3.345.dist-info/RECORD,,
353
+ alita_sdk-0.3.346.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
354
+ alita_sdk-0.3.346.dist-info/METADATA,sha256=OoUbeD3TLR5rkU_L-5H3DOb2tB7yJ9JWqmwAjDpYq_E,19015
355
+ alita_sdk-0.3.346.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
356
+ alita_sdk-0.3.346.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
357
+ alita_sdk-0.3.346.dist-info/RECORD,,