alita-sdk 0.3.363__py3-none-any.whl → 0.3.365__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/runtime/clients/client.py +0 -1
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +2 -2
- alita_sdk/runtime/utils/utils.py +3 -0
- alita_sdk/tools/base_indexer_toolkit.py +59 -4
- {alita_sdk-0.3.363.dist-info → alita_sdk-0.3.365.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.363.dist-info → alita_sdk-0.3.365.dist-info}/RECORD +9 -9
- {alita_sdk-0.3.363.dist-info → alita_sdk-0.3.365.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.363.dist-info → alita_sdk-0.3.365.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.363.dist-info → alita_sdk-0.3.365.dist-info}/top_level.txt +0 -0
|
@@ -213,7 +213,6 @@ class AlitaClient:
|
|
|
213
213
|
streaming=model_config.get("streaming", True),
|
|
214
214
|
stream_usage=model_config.get("stream_usage", True),
|
|
215
215
|
max_tokens=model_config.get("max_tokens", None),
|
|
216
|
-
top_p=model_config.get("top_p"),
|
|
217
216
|
temperature=model_config.get("temperature"),
|
|
218
217
|
max_retries=model_config.get("max_retries", 3),
|
|
219
218
|
seed=model_config.get("seed", None),
|
|
@@ -173,7 +173,7 @@ def get_vectorstore(vectorstore_type, vectorstore_params, embedding_func=None):
|
|
|
173
173
|
#
|
|
174
174
|
raise RuntimeError(f"Unknown VectorStore type: {vectorstore_type}")
|
|
175
175
|
|
|
176
|
-
def add_documents(vectorstore, documents):
|
|
176
|
+
def add_documents(vectorstore, documents, ids = None) -> list[str]:
|
|
177
177
|
""" Add documents to vectorstore """
|
|
178
178
|
if vectorstore is None:
|
|
179
179
|
return None
|
|
@@ -189,7 +189,7 @@ def add_documents(vectorstore, documents):
|
|
|
189
189
|
if isinstance(document.metadata[key], dict):
|
|
190
190
|
document.metadata[key] = dumps(document.metadata[key])
|
|
191
191
|
metadata.append(document.metadata)
|
|
192
|
-
vectorstore.add_texts(texts, metadatas=metadata)
|
|
192
|
+
return vectorstore.add_texts(texts, metadatas=metadata, ids=ids)
|
|
193
193
|
|
|
194
194
|
|
|
195
195
|
def generateResponse(
|
alita_sdk/runtime/utils/utils.py
CHANGED
|
@@ -11,6 +11,9 @@ class IndexerKeywords(Enum):
|
|
|
11
11
|
UPDATED_ON = 'updated_on'
|
|
12
12
|
CONTENT_IN_BYTES = 'loader_content'
|
|
13
13
|
CONTENT_FILE_NAME = 'loader_content_type'
|
|
14
|
+
INDEX_META_TYPE = 'index_meta'
|
|
15
|
+
INDEX_META_IN_PROGRESS = 'in_progress'
|
|
16
|
+
INDEX_META_COMPLETED = 'completed'
|
|
14
17
|
|
|
15
18
|
# This pattern matches characters that are NOT alphanumeric, underscores, or hyphens
|
|
16
19
|
clean_string_pattern = re.compile(r'[^a-zA-Z0-9_.-]')
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, Optional, List, Dict, Generator
|
|
4
5
|
|
|
5
6
|
from langchain_core.documents import Document
|
|
6
7
|
from pydantic import create_model, Field, SecretStr
|
|
@@ -147,6 +148,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
147
148
|
yield from ()
|
|
148
149
|
|
|
149
150
|
def index_data(self, **kwargs):
|
|
151
|
+
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
150
152
|
collection_suffix = kwargs.get("collection_suffix")
|
|
151
153
|
progress_step = kwargs.get("progress_step")
|
|
152
154
|
clean_index = kwargs.get("clean_index")
|
|
@@ -156,6 +158,18 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
156
158
|
if clean_index:
|
|
157
159
|
self._clean_index(collection_suffix)
|
|
158
160
|
#
|
|
161
|
+
# create and add initial index meta document
|
|
162
|
+
index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata={
|
|
163
|
+
"collection": collection_suffix,
|
|
164
|
+
"type": IndexerKeywords.INDEX_META_TYPE.value,
|
|
165
|
+
"indexed": 0,
|
|
166
|
+
"state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
|
|
167
|
+
"index_configuration": kwargs,
|
|
168
|
+
"created_on": time.time(),
|
|
169
|
+
"updated_on": time.time(),
|
|
170
|
+
})
|
|
171
|
+
index_meta_ids = add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
|
|
172
|
+
#
|
|
159
173
|
self._log_tool_event(f"Indexing data into collection with suffix '{collection_suffix}'. It can take some time...")
|
|
160
174
|
self._log_tool_event(f"Loading the documents to index...{kwargs}")
|
|
161
175
|
documents = self._base_loader(**kwargs)
|
|
@@ -164,10 +178,18 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
164
178
|
documents = (doc for doc in documents)
|
|
165
179
|
self._log_tool_event(f"Base documents were pre-loaded. "
|
|
166
180
|
f"Search for possible document duplicates and remove them from the indexing list...")
|
|
167
|
-
|
|
181
|
+
documents = self._reduce_duplicates(documents, collection_suffix)
|
|
168
182
|
self._log_tool_event(f"Duplicates were removed. "
|
|
169
183
|
f"Processing documents to collect dependencies and prepare them for indexing...")
|
|
170
|
-
|
|
184
|
+
result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, collection_suffix=collection_suffix, progress_step=progress_step)
|
|
185
|
+
#
|
|
186
|
+
# update index meta document
|
|
187
|
+
index_meta_doc.metadata["indexed"] = result
|
|
188
|
+
index_meta_doc.metadata["state"] = IndexerKeywords.INDEX_META_COMPLETED.value
|
|
189
|
+
index_meta_doc.metadata["updated_on"] = time.time()
|
|
190
|
+
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
|
|
191
|
+
#
|
|
192
|
+
return {"status": "ok", "message": f"successfully indexed {result} documents"}
|
|
171
193
|
|
|
172
194
|
def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, collection_suffix: Optional[str] = None, progress_step: int = 20):
|
|
173
195
|
self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
|
|
@@ -225,7 +247,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
225
247
|
total_counter += dependent_docs_counter
|
|
226
248
|
if pg_vector_add_docs_chunk:
|
|
227
249
|
add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
|
|
228
|
-
return
|
|
250
|
+
return total_counter
|
|
229
251
|
|
|
230
252
|
def _apply_loaders_chunkers(self, documents: Generator[Document, None, None], chunking_tool: str=None, chunking_config=None) -> Generator[Document, None, None]:
|
|
231
253
|
from ..tools.chunkers import __all__ as chunkers
|
|
@@ -344,8 +366,41 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
344
366
|
filter.update({"collection": {
|
|
345
367
|
"$eq": collection_suffix.strip()
|
|
346
368
|
}})
|
|
369
|
+
filter = {
|
|
370
|
+
"$and": [
|
|
371
|
+
filter,
|
|
372
|
+
{"$or": [
|
|
373
|
+
{"type": {"$exists": False}},
|
|
374
|
+
{"type": {"$ne": IndexerKeywords.INDEX_META_TYPE.value}}
|
|
375
|
+
]},
|
|
376
|
+
]
|
|
377
|
+
}
|
|
347
378
|
return filter
|
|
348
379
|
|
|
380
|
+
def index_meta_read(self):
|
|
381
|
+
from sqlalchemy import func
|
|
382
|
+
from sqlalchemy.orm import Session
|
|
383
|
+
|
|
384
|
+
store = self.vectorstore
|
|
385
|
+
try:
|
|
386
|
+
with Session(store.session_maker.bind) as session:
|
|
387
|
+
meta = session.query(
|
|
388
|
+
store.EmbeddingStore.id,
|
|
389
|
+
store.EmbeddingStore.cmetadata
|
|
390
|
+
).filter(
|
|
391
|
+
func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type') == IndexerKeywords.INDEX_META_TYPE.value
|
|
392
|
+
).all()
|
|
393
|
+
return [
|
|
394
|
+
{"id": id_, "metadata": cmetadata}
|
|
395
|
+
for id_, cmetadata in meta
|
|
396
|
+
]
|
|
397
|
+
except Exception as e:
|
|
398
|
+
logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
|
|
399
|
+
return []
|
|
400
|
+
|
|
401
|
+
def index_meta_delete(self, index_meta_ids: list[str]):
|
|
402
|
+
self.vectorstore.delete(ids=index_meta_ids)
|
|
403
|
+
|
|
349
404
|
def search_index(self,
|
|
350
405
|
query: str,
|
|
351
406
|
collection_suffix: str = "",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: alita_sdk
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.365
|
|
4
4
|
Summary: SDK for building langchain agents using resources from Alita
|
|
5
5
|
Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -36,7 +36,7 @@ alita_sdk/configurations/zephyr_essential.py,sha256=tUIrh-PRNvdrLBj6rJXqlF-h6oaM
|
|
|
36
36
|
alita_sdk/runtime/__init__.py,sha256=4W0UF-nl3QF2bvET5lnah4o24CoTwSoKXhuN0YnwvEE,828
|
|
37
37
|
alita_sdk/runtime/clients/__init__.py,sha256=BdehU5GBztN1Qi1Wul0cqlU46FxUfMnI6Vq2Zd_oq1M,296
|
|
38
38
|
alita_sdk/runtime/clients/artifact.py,sha256=Tt3aWcxu20bVW6EX7s_iX5CTmcItKhUnkk8Q2gv2vw0,4036
|
|
39
|
-
alita_sdk/runtime/clients/client.py,sha256=
|
|
39
|
+
alita_sdk/runtime/clients/client.py,sha256=BIF6QSnhlTfsTQ_dQs-QZjeBJHZsOtSuv_q7_ABUUQg,45737
|
|
40
40
|
alita_sdk/runtime/clients/datasource.py,sha256=HAZovoQN9jBg0_-lIlGBQzb4FJdczPhkHehAiVG3Wx0,1020
|
|
41
41
|
alita_sdk/runtime/clients/prompt.py,sha256=li1RG9eBwgNK_Qf0qUaZ8QNTmsncFrAL2pv3kbxZRZg,1447
|
|
42
42
|
alita_sdk/runtime/langchain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -74,7 +74,7 @@ alita_sdk/runtime/langchain/document_loaders/constants.py,sha256=XUNC63S7U2HjE_1
|
|
|
74
74
|
alita_sdk/runtime/langchain/document_loaders/utils.py,sha256=9xghESf3axBbwxATyVuS0Yu-TWe8zWZnXgCD1ZVyNW0,2414
|
|
75
75
|
alita_sdk/runtime/langchain/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
76
|
alita_sdk/runtime/langchain/interfaces/kwextractor.py,sha256=kSJA9L8g8UArmHu7Bd9dIO0Rrq86JPUb8RYNlnN68FQ,3072
|
|
77
|
-
alita_sdk/runtime/langchain/interfaces/llm_processor.py,sha256=
|
|
77
|
+
alita_sdk/runtime/langchain/interfaces/llm_processor.py,sha256=o4YwgTpR2v6v-rJM3iNq9sfzmAYnRZqZYyf2Si-74Ew,8805
|
|
78
78
|
alita_sdk/runtime/langchain/interfaces/loaders.py,sha256=li-O2dubiDNYn-qfVcDsuD4LqP_IZ61cV2vHUZAqeXc,3337
|
|
79
79
|
alita_sdk/runtime/langchain/interfaces/splitters.py,sha256=tW65-Ejj9VYyxXFZNgPts_CKILQ18bWp_1bZ-24FKGc,3630
|
|
80
80
|
alita_sdk/runtime/langchain/retrievers/AlitaRetriever.py,sha256=osChtJxUlfpsFESpJSE5mnJAkxTXnzgFZnC6l5mUlbo,6148
|
|
@@ -133,9 +133,9 @@ alita_sdk/runtime/utils/save_dataframe.py,sha256=i-E1wp-t4wb17Zq3nA3xYwgSILjoXNi
|
|
|
133
133
|
alita_sdk/runtime/utils/streamlit.py,sha256=GQ69CsjfRMcGXcCrslL0Uoj24Cl07Jeji0rZxELaKTQ,104930
|
|
134
134
|
alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7rUxp2MRR4tmYR8,5136
|
|
135
135
|
alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
|
|
136
|
-
alita_sdk/runtime/utils/utils.py,sha256=
|
|
136
|
+
alita_sdk/runtime/utils/utils.py,sha256=BVEVLkYiiotcUD0XsHyx-wACpHfALsQg7PLZpObqvK8,1008
|
|
137
137
|
alita_sdk/tools/__init__.py,sha256=jUj1ztC2FbkIUB-YYmiqaz_rqW7Il5kWzDPn1mJmj5w,10545
|
|
138
|
-
alita_sdk/tools/base_indexer_toolkit.py,sha256=
|
|
138
|
+
alita_sdk/tools/base_indexer_toolkit.py,sha256=dOdl-n_TUCryYCVuCNNyGYN3fwTQuLjNTMTU5axwzW8,26101
|
|
139
139
|
alita_sdk/tools/code_indexer_toolkit.py,sha256=6QvI1by0OFdnKTx5TfNoDJjnMrvnTi9T56xaDxzeleU,7306
|
|
140
140
|
alita_sdk/tools/elitea_base.py,sha256=up3HshASSDfjlHV_HPrs1aD4JIwwX0Ug26WGTzgIYvY,34724
|
|
141
141
|
alita_sdk/tools/non_code_indexer_toolkit.py,sha256=B3QvhpT1F9QidkCcsOi3J_QrTOaNlTxqWFwe90VivQQ,1329
|
|
@@ -352,8 +352,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
|
|
|
352
352
|
alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
|
|
353
353
|
alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
|
|
354
354
|
alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
|
|
355
|
-
alita_sdk-0.3.
|
|
356
|
-
alita_sdk-0.3.
|
|
357
|
-
alita_sdk-0.3.
|
|
358
|
-
alita_sdk-0.3.
|
|
359
|
-
alita_sdk-0.3.
|
|
355
|
+
alita_sdk-0.3.365.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
356
|
+
alita_sdk-0.3.365.dist-info/METADATA,sha256=MEwGE5tSEKyqZhKFzGM0fQcYLTOzHnrLermRmpJItwU,19071
|
|
357
|
+
alita_sdk-0.3.365.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
358
|
+
alita_sdk-0.3.365.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
|
|
359
|
+
alita_sdk-0.3.365.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|