alita-sdk 0.3.329__py3-none-any.whl → 0.3.330__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- from typing import Iterator
1
+ from typing import Iterator, Generator
2
2
 
3
3
  from langchain_core.documents import Document
4
4
 
@@ -6,6 +6,9 @@ from langchain_community.document_loaders.base import BaseLoader
6
6
  from langchain_community.document_loaders.helpers import detect_file_encodings
7
7
  from langchain_core.tools import ToolException
8
8
 
9
+ from alita_sdk.tools.chunkers import markdown_chunker
10
+
11
+
9
12
  class AlitaTextLoader(BaseLoader):
10
13
 
11
14
  def __init__(self, **kwargs):
@@ -19,6 +22,8 @@ class AlitaTextLoader(BaseLoader):
19
22
  raise ToolException("'file_path' or 'file_content' parameter should be provided.")
20
23
  self.encoding = kwargs.get('encoding', 'utf-8')
21
24
  self.autodetect_encoding = kwargs.get('autodetect_encoding', False)
25
+ self.max_tokens=kwargs.get('max_tokens', 1024)
26
+ self.token_overlap = kwargs.get('token_overlap', 10)
22
27
 
23
28
  def get_content(self):
24
29
  text = ""
@@ -59,8 +64,16 @@ class AlitaTextLoader(BaseLoader):
59
64
 
60
65
  return text
61
66
 
67
+ def generate_document(self, text, metadata) -> Generator[Document, None, None]:
68
+ yield Document(page_content=text, metadata=metadata)
69
+
62
70
  def lazy_load(self) -> Iterator[Document]:
63
71
  """Load from file path."""
64
72
  text = self.get_content()
65
73
  metadata = {"source": str(self.file_path) if hasattr(self, 'file_path') else self.file_name}
66
- yield Document(page_content=text, metadata=metadata)
74
+ chunks = markdown_chunker(file_content_generator=self.generate_document(text, metadata),
75
+ config={
76
+ "max_tokens": self.max_tokens,
77
+ "token_overlap": self.token_overlap
78
+ })
79
+ yield from chunks
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import re
2
3
  import tempfile
3
4
  from logging import getLogger
4
5
  from pathlib import Path
@@ -8,6 +9,7 @@ from langchain_core.documents import Document
8
9
  from langchain_core.tools import ToolException
9
10
 
10
11
  from alita_sdk.runtime.langchain.document_loaders.constants import loaders_map, LoaderProperties
12
+ from ...runtime.langchain.document_loaders.AlitaTextLoader import AlitaTextLoader
11
13
  from ...runtime.utils.utils import IndexerKeywords
12
14
 
13
15
  logger = getLogger(__name__)
@@ -231,7 +233,8 @@ def process_content_by_type(content, filename: str, llm=None, chunking_config=No
231
233
  """Process the content of a file based on its type using a configured loader."""
232
234
  temp_file_path = None
233
235
  try:
234
- extension = "." + filename.split('.')[-1].lower()
236
+ match = re.search(r'\.([^.]+)$', filename)
237
+ extension = f".{match.group(1).lower()}" if match else ".txt"
235
238
 
236
239
  with tempfile.NamedTemporaryFile(mode='w+b', suffix=extension, delete=False) as temp_file:
237
240
  temp_file_path = temp_file.name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.329
3
+ Version: 0.3.330
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -67,7 +67,7 @@ alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py,sha256=SKA
67
67
  alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py,sha256=m_7aq-aCFVb4vXZsJNinfN1hAuyy_S0ylRknv_ahxDc,340
68
68
  alita_sdk/runtime/langchain/document_loaders/AlitaQtestLoader.py,sha256=CUVVnisxm7b5yZWV6rn0Q3MEEaO1GWNcfnz5yWz8T0k,13283
69
69
  alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py,sha256=nI8lyndVZxVAxbjX3yiqyuFQKFE8MjLPyYSyqRWxHqQ,4077
70
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py,sha256=uNcV0En49_0u0RYB1sP1XfNspT2Xc5CacuJr9Jqv79Q,2972
70
+ alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py,sha256=EiCIAF_OxSrbuwgOFk2IpxRMvFbctITt2jAI0g_atpk,3586
71
71
  alita_sdk/runtime/langchain/document_loaders/ImageParser.py,sha256=gao5yCCKdDai_Gx7YdEx5U6oMyJYzn69eYmEvWLh-fc,656
72
72
  alita_sdk/runtime/langchain/document_loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
73
  alita_sdk/runtime/langchain/document_loaders/constants.py,sha256=H5oKHDHZw1L0x9sGiYGLmkGM6dH460bvuj-EycmpR6E,7235
@@ -327,7 +327,7 @@ alita_sdk/tools/testrail/__init__.py,sha256=Xg4nVjULL_D8JpIXLYXppnwUfGF4-lguFwKH
327
327
  alita_sdk/tools/testrail/api_wrapper.py,sha256=PKhtf04C6PFDexGCAJm-hjA9Gpu4crx6EXKT5K-b_Pk,32985
328
328
  alita_sdk/tools/utils/__init__.py,sha256=W9rCCUPtHCP5nGAbWp0n5jaNA84572aiRoqKneBnaS4,3330
329
329
  alita_sdk/tools/utils/available_tools_decorator.py,sha256=IbrdfeQkswxUFgvvN7-dyLMZMyXLiwvX7kgi3phciCk,273
330
- alita_sdk/tools/utils/content_parser.py,sha256=SLwRNNb2oahxM5DW9MJ570NgEFDKXl53bbMZ9021Ee0,14238
330
+ alita_sdk/tools/utils/content_parser.py,sha256=ZgjzopMY3OyEcypa8NEwB4263HZ08V0fKQZRJXPmlj4,14393
331
331
  alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=ypBEAkFRGHv5edW0N9rdo1yKurNGQ4pRVEWtrN_7SeA,17656
332
332
  alita_sdk/tools/vector_adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
333
333
  alita_sdk/tools/xray/__init__.py,sha256=eOMWP8VamFbbJgt1xrGpGPqB9ByOTA0Cd3LCaETzGk4,4376
@@ -349,8 +349,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
349
349
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
350
350
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
351
351
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
352
- alita_sdk-0.3.329.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
353
- alita_sdk-0.3.329.dist-info/METADATA,sha256=BRvAazGNKqm2sPsodjbdSv7A6pfFCEzX_l1jtj7c4DM,18835
354
- alita_sdk-0.3.329.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
355
- alita_sdk-0.3.329.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
356
- alita_sdk-0.3.329.dist-info/RECORD,,
352
+ alita_sdk-0.3.330.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
353
+ alita_sdk-0.3.330.dist-info/METADATA,sha256=tTJb4beJGhlKTftR_US7mWBMNQ4PaJbyxkp2bJ2AxOg,18835
354
+ alita_sdk-0.3.330.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
355
+ alita_sdk-0.3.330.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
356
+ alita_sdk-0.3.330.dist-info/RECORD,,