alita-sdk 0.3.248__py3-none-any.whl → 0.3.250__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,16 @@
1
1
  import os
2
2
  import tempfile
3
+ from copy import deepcopy as copy
3
4
  from logging import getLogger
4
5
  from pathlib import Path
6
+ from typing import Generator
5
7
 
8
+ from langchain_core.documents import Document
6
9
  from langchain_core.tools import ToolException
10
+ from langchain_text_splitters import TokenTextSplitter
7
11
 
8
12
  from alita_sdk.runtime.langchain.document_loaders.constants import loaders_map
13
+ from alita_sdk.tools.chunkers.utils import tiktoken_length
9
14
 
10
15
  logger = getLogger(__name__)
11
16
 
@@ -161,6 +166,61 @@ def load_content_from_bytes(file_content: bytes, extension: str = None, loader_e
161
166
  # Now the file is closed and can be read
162
167
  result = load_content(temp_file_path, extension, loader_extra_config, llm)
163
168
  return result
169
+ finally:
170
+ if temp_file_path and os.path.exists(temp_file_path):
171
+ os.remove(temp_file_path)
172
+
173
+ def process_content_by_type(document: Document, extension_source: str, llm = None, chunking_config={}) -> Generator[Document, None, None]:
174
+ temp_file_path = None
175
+ try:
176
+ extension = "." + extension_source.split('.')[-1].lower()
177
+
178
+ with tempfile.NamedTemporaryFile(mode='w+b', suffix=extension, delete=False) as temp_file:
179
+ temp_file_path = temp_file.name
180
+ content = document.metadata.pop('loader_content')
181
+ temp_file.write(content)
182
+ temp_file.flush()
183
+
184
+ loader_config = loaders_map.get(extension)
185
+ if not loader_config:
186
+ logger.warning(f"No loader found for file extension: {extension}. File: {temp_file_path}")
187
+ return
188
+
189
+ loader_cls = loader_config['class']
190
+ loader_kwargs = loader_config['kwargs']
191
+
192
+ loader = loader_cls(file_path=temp_file_path, **loader_kwargs)
193
+ docs_iterator = loader.load()
194
+ max_tokens = chunking_config.get('max_tokens', 512)
195
+ tokens_overlapping = chunking_config.get('tokens_overlapping', 10)
196
+ chunk_id = 0
197
+ for chunk in docs_iterator:
198
+ if tiktoken_length(chunk.page_content) > max_tokens:
199
+ for subchunk in TokenTextSplitter(encoding_name="cl100k_base",
200
+ chunk_size=max_tokens,
201
+ chunk_overlap=tokens_overlapping
202
+ ).split_text(chunk.page_content):
203
+ chunk_id += 1
204
+ headers_meta = list(chunk.metadata.values())
205
+ docmeta = copy(document.metadata)
206
+ docmeta.update({"headers": "; ".join(str(headers_meta))})
207
+ docmeta['chunk_id'] = chunk_id
208
+ docmeta['chunk_type'] = "document"
209
+ yield Document(
210
+ page_content=subchunk,
211
+ metadata=docmeta
212
+ )
213
+ else:
214
+ chunk_id += 1
215
+ headers_meta = list(chunk.metadata.values())
216
+ docmeta = copy(document.metadata)
217
+ docmeta.update({"headers": "; ".join(str(headers_meta))})
218
+ docmeta['chunk_id'] = chunk_id
219
+ docmeta['chunk_type'] = "document"
220
+ yield Document(
221
+ page_content=chunk.page_content,
222
+ metadata=docmeta
223
+ )
164
224
  finally:
165
225
  if temp_file_path and os.path.exists(temp_file_path):
166
226
  os.remove(temp_file_path)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.248
3
+ Version: 0.3.250
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -1,7 +1,7 @@
1
1
  alita_sdk/__init__.py,sha256=fxeNiqiVpIFAJls31Oomifyrtd5gT9iPUTdkWjDOB2Y,656
2
2
  alita_sdk/community/__init__.py,sha256=8N7wWwPhoyOq3p8wlV3-pb3l3nJCR8TUrtV9iIPLU88,2523
3
3
  alita_sdk/community/utils.py,sha256=lvuCJaNqVPHOORJV6kIPcXJcdprVW_TJvERtYAEgpjM,249
4
- alita_sdk/configurations/__init__.py,sha256=kayVc1lkXzrogAztw0te72f-MQBkDRwXDl8l4aWysgQ,3094
4
+ alita_sdk/configurations/__init__.py,sha256=684m4eHUoe4uyhSuLZsYKGVBW6zW0rpyqkShCHssqQU,3196
5
5
  alita_sdk/configurations/ado.py,sha256=sP6eDLhEqr_u6CXm8Scx45rcn1wf-J_Y2fjkp5n582k,1189
6
6
  alita_sdk/configurations/azure_search.py,sha256=PV2wMeNZI9XTN1nbrT0Li3xDAV7x8S9SJBoEKJqn_KY,809
7
7
  alita_sdk/configurations/bigquery.py,sha256=-hG5HnNKhxeQKRy85V6cunTmQNUobbACNOg4Z1KPc-g,920
@@ -105,7 +105,8 @@ alita_sdk/runtime/tools/pgvector_search.py,sha256=NN2BGAnq4SsDHIhUcFZ8d_dbEOM8Qw
105
105
  alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9PppM,741
106
106
  alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
107
107
  alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
108
- alita_sdk/runtime/tools/vectorstore.py,sha256=qNIDID1VjSdIfH6KTdOBL3_lnyVyfsR-fcRj4XN1jRM,33758
108
+ alita_sdk/runtime/tools/vectorstore.py,sha256=l5wfovwMNvS_RgW-ZHXCh8Cm8gauunRzP0NPkzmshcQ,33852
109
+ alita_sdk/runtime/tools/vectorstore_base.py,sha256=OdJIJkjTmQ0BC-AzAOMP2phAcNATJ8gI5JoBWSSdpNU,27892
109
110
  alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
110
111
  alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
111
112
  alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -117,7 +118,9 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
117
118
  alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
118
119
  alita_sdk/runtime/utils/utils.py,sha256=CpEl3LCeLbhzQySz08lkKPm7Auac6IiLF7WB8wmArMI,589
119
120
  alita_sdk/tools/__init__.py,sha256=ko5TToGYZFmBrho26DRAVvrkHWxQ2sfs8gVAASinYp8,10611
120
- alita_sdk/tools/elitea_base.py,sha256=yRjU1FQwsGdDpUDSmgLb7p-ZYWLLJTgqSzIVC2jcACQ,30268
121
+ alita_sdk/tools/base_indexer_toolkit.py,sha256=qQfMHzsQ2BfusKMV_DNiHOtZVheiQ4gBfy5JXjYi0UY,20231
122
+ alita_sdk/tools/elitea_base.py,sha256=kmfVA965-IkcDBnGQzfv3E14sD6RXLDzJFcj7CJ3fDc,30350
123
+ alita_sdk/tools/non_code_indexer_toolkit.py,sha256=v9uq1POE1fQKCd152mbqDtF-HSe0qoDj83k4E5LAkMI,1080
121
124
  alita_sdk/tools/ado/__init__.py,sha256=bArTObt5cqG1SkijKevWGbsIILHBA3aCStg8Q1jd69k,1243
122
125
  alita_sdk/tools/ado/utils.py,sha256=PTCludvaQmPLakF2EbCGy66Mro4-rjDtavVP-xcB2Wc,1252
123
126
  alita_sdk/tools/ado/repos/__init__.py,sha256=_vjU3yHRXmLg6BDNmJsLiM9qDYRE_JmX5kXI_irMmQQ,5789
@@ -127,7 +130,7 @@ alita_sdk/tools/ado/test_plan/test_plan_wrapper.py,sha256=jQt8kFmdAzsopjByLTMiSn
127
130
  alita_sdk/tools/ado/wiki/__init__.py,sha256=uBKo_Meu2ZxMxcxGsMmvCXyplRE2um1_PIRvdYd37rM,5171
128
131
  alita_sdk/tools/ado/wiki/ado_wrapper.py,sha256=zg6wMRar1DTp-ZRlYaQifBEnpYmTrHXskTNPdrLdy8s,14759
129
132
  alita_sdk/tools/ado/work_item/__init__.py,sha256=coDedNL0pSPLjZ6VVK1UcqWo00zxe2T4XfVXt8bMho8,5383
130
- alita_sdk/tools/ado/work_item/ado_wrapper.py,sha256=ubeF2m8J6CGZF_gnkTEbmW_eh6YWsk7bD2clu9FmZpY,28313
133
+ alita_sdk/tools/ado/work_item/ado_wrapper.py,sha256=gEywCL_kS0k1jWcDhsmYUybpIP08tH8go6CixLJGwT4,28409
131
134
  alita_sdk/tools/advanced_jira_mining/__init__.py,sha256=pUTzECqGvYaR5qWY3JPUhrImrZgc7pCXuqSe5eWIE80,4604
132
135
  alita_sdk/tools/advanced_jira_mining/data_mining_wrapper.py,sha256=nZPtuwVWp8VeHw1B8q9kdwf-6ZvHnlXTOGdcIMDkKpw,44211
133
136
  alita_sdk/tools/aws/__init__.py,sha256=tB6GCOg4XGSpR6qgbgAF4MUQ5-YmQCbWurWgrVKEKQ8,181
@@ -308,7 +311,7 @@ alita_sdk/tools/testio/api_wrapper.py,sha256=BvmL5h634BzG6p7ajnQLmj-uoAw1gjWnd4F
308
311
  alita_sdk/tools/testrail/__init__.py,sha256=0kETjWKLU7R6mugBWsjwEUsh10pipbAeNSGJAO0FBh0,4634
309
312
  alita_sdk/tools/testrail/api_wrapper.py,sha256=K-Gc42RH2z-fK4cXi8zQq3s9A4v_pCJkRB3XKLAhypc,32056
310
313
  alita_sdk/tools/utils/__init__.py,sha256=155xepXPr4OEzs2Mz5YnjXcBpxSv1X2eznRUVoPtyK0,3268
311
- alita_sdk/tools/utils/content_parser.py,sha256=uqJoaJzl9w4Nf9yFy40sm1-qm88bvj7Y1S-1OxHkTks,7410
314
+ alita_sdk/tools/utils/content_parser.py,sha256=0HKQqGTdXHKlcz72GHEwXqLXJsRYXm35F-P1KZz0sNc,10351
312
315
  alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=a6FAsiix_EvATIKUf5YT6vHh5LDyJ5uSP3LJqoxFo04,17367
313
316
  alita_sdk/tools/vector_adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
314
317
  alita_sdk/tools/xray/__init__.py,sha256=GGpbiBdDQ9kMFqJEHYi7XwKpkuMMHi-ZF-IM8yFIgUM,4380
@@ -330,8 +333,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=JAeWf-RXohsxheUpT0iMDClc_izj-
330
333
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
331
334
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
332
335
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
333
- alita_sdk-0.3.248.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
334
- alita_sdk-0.3.248.dist-info/METADATA,sha256=ZG4u4BaGjnEIAXlfom95U0Xjk8uIe0N_dDT_d5MtNdg,18897
335
- alita_sdk-0.3.248.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
336
- alita_sdk-0.3.248.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
337
- alita_sdk-0.3.248.dist-info/RECORD,,
336
+ alita_sdk-0.3.250.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
337
+ alita_sdk-0.3.250.dist-info/METADATA,sha256=vKpZbYSYxt7ruy33ugylQXj66_5YWm3Cuw3Wz-psGSA,18897
338
+ alita_sdk-0.3.250.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
339
+ alita_sdk-0.3.250.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
340
+ alita_sdk-0.3.250.dist-info/RECORD,,