alita-sdk 0.3.248__py3-none-any.whl → 0.3.250__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/configurations/__init__.py +1 -0
- alita_sdk/runtime/tools/vectorstore.py +2 -1
- alita_sdk/runtime/tools/vectorstore_base.py +624 -0
- alita_sdk/tools/ado/work_item/ado_wrapper.py +9 -9
- alita_sdk/tools/base_indexer_toolkit.py +426 -0
- alita_sdk/tools/elitea_base.py +29 -22
- alita_sdk/tools/non_code_indexer_toolkit.py +23 -0
- alita_sdk/tools/utils/content_parser.py +60 -0
- {alita_sdk-0.3.248.dist-info → alita_sdk-0.3.250.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.248.dist-info → alita_sdk-0.3.250.dist-info}/RECORD +13 -10
- {alita_sdk-0.3.248.dist-info → alita_sdk-0.3.250.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.248.dist-info → alita_sdk-0.3.250.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.248.dist-info → alita_sdk-0.3.250.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,16 @@
|
|
1
1
|
import os
|
2
2
|
import tempfile
|
3
|
+
from copy import deepcopy as copy
|
3
4
|
from logging import getLogger
|
4
5
|
from pathlib import Path
|
6
|
+
from typing import Generator
|
5
7
|
|
8
|
+
from langchain_core.documents import Document
|
6
9
|
from langchain_core.tools import ToolException
|
10
|
+
from langchain_text_splitters import TokenTextSplitter
|
7
11
|
|
8
12
|
from alita_sdk.runtime.langchain.document_loaders.constants import loaders_map
|
13
|
+
from alita_sdk.tools.chunkers.utils import tiktoken_length
|
9
14
|
|
10
15
|
logger = getLogger(__name__)
|
11
16
|
|
@@ -161,6 +166,61 @@ def load_content_from_bytes(file_content: bytes, extension: str = None, loader_e
|
|
161
166
|
# Now the file is closed and can be read
|
162
167
|
result = load_content(temp_file_path, extension, loader_extra_config, llm)
|
163
168
|
return result
|
169
|
+
finally:
|
170
|
+
if temp_file_path and os.path.exists(temp_file_path):
|
171
|
+
os.remove(temp_file_path)
|
172
|
+
|
173
|
+
def process_content_by_type(document: Document, extension_source: str, llm = None, chunking_config={}) -> Generator[Document, None, None]:
|
174
|
+
temp_file_path = None
|
175
|
+
try:
|
176
|
+
extension = "." + extension_source.split('.')[-1].lower()
|
177
|
+
|
178
|
+
with tempfile.NamedTemporaryFile(mode='w+b', suffix=extension, delete=False) as temp_file:
|
179
|
+
temp_file_path = temp_file.name
|
180
|
+
content = document.metadata.pop('loader_content')
|
181
|
+
temp_file.write(content)
|
182
|
+
temp_file.flush()
|
183
|
+
|
184
|
+
loader_config = loaders_map.get(extension)
|
185
|
+
if not loader_config:
|
186
|
+
logger.warning(f"No loader found for file extension: {extension}. File: {temp_file_path}")
|
187
|
+
return
|
188
|
+
|
189
|
+
loader_cls = loader_config['class']
|
190
|
+
loader_kwargs = loader_config['kwargs']
|
191
|
+
|
192
|
+
loader = loader_cls(file_path=temp_file_path, **loader_kwargs)
|
193
|
+
docs_iterator = loader.load()
|
194
|
+
max_tokens = chunking_config.get('max_tokens', 512)
|
195
|
+
tokens_overlapping = chunking_config.get('tokens_overlapping', 10)
|
196
|
+
chunk_id = 0
|
197
|
+
for chunk in docs_iterator:
|
198
|
+
if tiktoken_length(chunk.page_content) > max_tokens:
|
199
|
+
for subchunk in TokenTextSplitter(encoding_name="cl100k_base",
|
200
|
+
chunk_size=max_tokens,
|
201
|
+
chunk_overlap=tokens_overlapping
|
202
|
+
).split_text(chunk.page_content):
|
203
|
+
chunk_id += 1
|
204
|
+
headers_meta = list(chunk.metadata.values())
|
205
|
+
docmeta = copy(document.metadata)
|
206
|
+
docmeta.update({"headers": "; ".join(str(headers_meta))})
|
207
|
+
docmeta['chunk_id'] = chunk_id
|
208
|
+
docmeta['chunk_type'] = "document"
|
209
|
+
yield Document(
|
210
|
+
page_content=subchunk,
|
211
|
+
metadata=docmeta
|
212
|
+
)
|
213
|
+
else:
|
214
|
+
chunk_id += 1
|
215
|
+
headers_meta = list(chunk.metadata.values())
|
216
|
+
docmeta = copy(document.metadata)
|
217
|
+
docmeta.update({"headers": "; ".join(str(headers_meta))})
|
218
|
+
docmeta['chunk_id'] = chunk_id
|
219
|
+
docmeta['chunk_type'] = "document"
|
220
|
+
yield Document(
|
221
|
+
page_content=chunk.page_content,
|
222
|
+
metadata=docmeta
|
223
|
+
)
|
164
224
|
finally:
|
165
225
|
if temp_file_path and os.path.exists(temp_file_path):
|
166
226
|
os.remove(temp_file_path)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: alita_sdk
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.250
|
4
4
|
Summary: SDK for building langchain agents using resources from Alita
|
5
5
|
Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
|
6
6
|
License-Expression: Apache-2.0
|
@@ -1,7 +1,7 @@
|
|
1
1
|
alita_sdk/__init__.py,sha256=fxeNiqiVpIFAJls31Oomifyrtd5gT9iPUTdkWjDOB2Y,656
|
2
2
|
alita_sdk/community/__init__.py,sha256=8N7wWwPhoyOq3p8wlV3-pb3l3nJCR8TUrtV9iIPLU88,2523
|
3
3
|
alita_sdk/community/utils.py,sha256=lvuCJaNqVPHOORJV6kIPcXJcdprVW_TJvERtYAEgpjM,249
|
4
|
-
alita_sdk/configurations/__init__.py,sha256=
|
4
|
+
alita_sdk/configurations/__init__.py,sha256=684m4eHUoe4uyhSuLZsYKGVBW6zW0rpyqkShCHssqQU,3196
|
5
5
|
alita_sdk/configurations/ado.py,sha256=sP6eDLhEqr_u6CXm8Scx45rcn1wf-J_Y2fjkp5n582k,1189
|
6
6
|
alita_sdk/configurations/azure_search.py,sha256=PV2wMeNZI9XTN1nbrT0Li3xDAV7x8S9SJBoEKJqn_KY,809
|
7
7
|
alita_sdk/configurations/bigquery.py,sha256=-hG5HnNKhxeQKRy85V6cunTmQNUobbACNOg4Z1KPc-g,920
|
@@ -105,7 +105,8 @@ alita_sdk/runtime/tools/pgvector_search.py,sha256=NN2BGAnq4SsDHIhUcFZ8d_dbEOM8Qw
|
|
105
105
|
alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9PppM,741
|
106
106
|
alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
|
107
107
|
alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
|
108
|
-
alita_sdk/runtime/tools/vectorstore.py,sha256=
|
108
|
+
alita_sdk/runtime/tools/vectorstore.py,sha256=l5wfovwMNvS_RgW-ZHXCh8Cm8gauunRzP0NPkzmshcQ,33852
|
109
|
+
alita_sdk/runtime/tools/vectorstore_base.py,sha256=OdJIJkjTmQ0BC-AzAOMP2phAcNATJ8gI5JoBWSSdpNU,27892
|
109
110
|
alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
|
110
111
|
alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
111
112
|
alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
|
@@ -117,7 +118,9 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
|
|
117
118
|
alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
|
118
119
|
alita_sdk/runtime/utils/utils.py,sha256=CpEl3LCeLbhzQySz08lkKPm7Auac6IiLF7WB8wmArMI,589
|
119
120
|
alita_sdk/tools/__init__.py,sha256=ko5TToGYZFmBrho26DRAVvrkHWxQ2sfs8gVAASinYp8,10611
|
120
|
-
alita_sdk/tools/
|
121
|
+
alita_sdk/tools/base_indexer_toolkit.py,sha256=qQfMHzsQ2BfusKMV_DNiHOtZVheiQ4gBfy5JXjYi0UY,20231
|
122
|
+
alita_sdk/tools/elitea_base.py,sha256=kmfVA965-IkcDBnGQzfv3E14sD6RXLDzJFcj7CJ3fDc,30350
|
123
|
+
alita_sdk/tools/non_code_indexer_toolkit.py,sha256=v9uq1POE1fQKCd152mbqDtF-HSe0qoDj83k4E5LAkMI,1080
|
121
124
|
alita_sdk/tools/ado/__init__.py,sha256=bArTObt5cqG1SkijKevWGbsIILHBA3aCStg8Q1jd69k,1243
|
122
125
|
alita_sdk/tools/ado/utils.py,sha256=PTCludvaQmPLakF2EbCGy66Mro4-rjDtavVP-xcB2Wc,1252
|
123
126
|
alita_sdk/tools/ado/repos/__init__.py,sha256=_vjU3yHRXmLg6BDNmJsLiM9qDYRE_JmX5kXI_irMmQQ,5789
|
@@ -127,7 +130,7 @@ alita_sdk/tools/ado/test_plan/test_plan_wrapper.py,sha256=jQt8kFmdAzsopjByLTMiSn
|
|
127
130
|
alita_sdk/tools/ado/wiki/__init__.py,sha256=uBKo_Meu2ZxMxcxGsMmvCXyplRE2um1_PIRvdYd37rM,5171
|
128
131
|
alita_sdk/tools/ado/wiki/ado_wrapper.py,sha256=zg6wMRar1DTp-ZRlYaQifBEnpYmTrHXskTNPdrLdy8s,14759
|
129
132
|
alita_sdk/tools/ado/work_item/__init__.py,sha256=coDedNL0pSPLjZ6VVK1UcqWo00zxe2T4XfVXt8bMho8,5383
|
130
|
-
alita_sdk/tools/ado/work_item/ado_wrapper.py,sha256=
|
133
|
+
alita_sdk/tools/ado/work_item/ado_wrapper.py,sha256=gEywCL_kS0k1jWcDhsmYUybpIP08tH8go6CixLJGwT4,28409
|
131
134
|
alita_sdk/tools/advanced_jira_mining/__init__.py,sha256=pUTzECqGvYaR5qWY3JPUhrImrZgc7pCXuqSe5eWIE80,4604
|
132
135
|
alita_sdk/tools/advanced_jira_mining/data_mining_wrapper.py,sha256=nZPtuwVWp8VeHw1B8q9kdwf-6ZvHnlXTOGdcIMDkKpw,44211
|
133
136
|
alita_sdk/tools/aws/__init__.py,sha256=tB6GCOg4XGSpR6qgbgAF4MUQ5-YmQCbWurWgrVKEKQ8,181
|
@@ -308,7 +311,7 @@ alita_sdk/tools/testio/api_wrapper.py,sha256=BvmL5h634BzG6p7ajnQLmj-uoAw1gjWnd4F
|
|
308
311
|
alita_sdk/tools/testrail/__init__.py,sha256=0kETjWKLU7R6mugBWsjwEUsh10pipbAeNSGJAO0FBh0,4634
|
309
312
|
alita_sdk/tools/testrail/api_wrapper.py,sha256=K-Gc42RH2z-fK4cXi8zQq3s9A4v_pCJkRB3XKLAhypc,32056
|
310
313
|
alita_sdk/tools/utils/__init__.py,sha256=155xepXPr4OEzs2Mz5YnjXcBpxSv1X2eznRUVoPtyK0,3268
|
311
|
-
alita_sdk/tools/utils/content_parser.py,sha256=
|
314
|
+
alita_sdk/tools/utils/content_parser.py,sha256=0HKQqGTdXHKlcz72GHEwXqLXJsRYXm35F-P1KZz0sNc,10351
|
312
315
|
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=a6FAsiix_EvATIKUf5YT6vHh5LDyJ5uSP3LJqoxFo04,17367
|
313
316
|
alita_sdk/tools/vector_adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
314
317
|
alita_sdk/tools/xray/__init__.py,sha256=GGpbiBdDQ9kMFqJEHYi7XwKpkuMMHi-ZF-IM8yFIgUM,4380
|
@@ -330,8 +333,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=JAeWf-RXohsxheUpT0iMDClc_izj-
|
|
330
333
|
alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
|
331
334
|
alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
|
332
335
|
alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
|
333
|
-
alita_sdk-0.3.
|
334
|
-
alita_sdk-0.3.
|
335
|
-
alita_sdk-0.3.
|
336
|
-
alita_sdk-0.3.
|
337
|
-
alita_sdk-0.3.
|
336
|
+
alita_sdk-0.3.250.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
337
|
+
alita_sdk-0.3.250.dist-info/METADATA,sha256=vKpZbYSYxt7ruy33ugylQXj66_5YWm3Cuw3Wz-psGSA,18897
|
338
|
+
alita_sdk-0.3.250.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
339
|
+
alita_sdk-0.3.250.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
|
340
|
+
alita_sdk-0.3.250.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|