alita-sdk 0.3.263__py3-none-any.whl → 0.3.265__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -127,11 +127,6 @@ loaders_map = {
127
127
  'extract_images': True
128
128
  }
129
129
  },
130
- '.doc': {
131
- 'class': AlitaTextLoader,
132
- 'is_multimodal_processing': True,
133
- 'kwargs': {}
134
- },
135
130
  '.json': {
136
131
  'class': AlitaJSONLoader,
137
132
  'is_multimodal_processing': False,
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import math
3
+ from collections import OrderedDict
3
4
  from logging import getLogger
4
5
  from typing import Any, Optional, List, Dict, Generator
5
6
 
@@ -398,11 +399,18 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
398
399
 
399
400
  # Initialize document map for tracking by ID
400
401
  doc_map = {
401
- f"{doc.metadata.get('id', f'idx_{i}')}_{doc.metadata['chunk_id']}"
402
- if 'chunk_id' in doc.metadata
403
- else doc.metadata.get('id', f"idx_{i}"): (doc, score)
402
+ (
403
+ f"{doc.metadata.get('id', f'idx_{i}')}_{doc.metadata['chunk_id']}"
404
+ if 'chunk_id' in doc.metadata
405
+ else doc.metadata.get('id', f"idx_{i}")
406
+ ): (doc, 1 - score)
404
407
  for i, (doc, score) in enumerate(vector_items)
405
408
  }
409
+
410
+ # Sort the items by the new score in descending order
411
+ doc_map = OrderedDict(
412
+ sorted(doc_map.items(), key=lambda x: x[1][1], reverse=True)
413
+ )
406
414
 
407
415
  # Process full-text search if configured
408
416
  if full_text_search and full_text_search.get('enabled') and full_text_search.get('fields'):
@@ -452,7 +460,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
452
460
 
453
461
  # Apply cutoff filter
454
462
  if cut_off:
455
- combined_items = [item for item in combined_items if abs(item[1]) <= cut_off]
463
+ combined_items = [item for item in combined_items if abs(item[1]) >= cut_off]
456
464
 
457
465
  # Sort by score and limit results
458
466
  # DISABLED: for chroma we want ascending order (lower score is better), for others descending
@@ -34,7 +34,7 @@ BaseSearchParams = create_model(
34
34
  default={},
35
35
  examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
36
36
  )),
37
- cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5)),
37
+ cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
38
38
  search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
39
39
  full_text_search=(Optional[Dict[str, Any]], Field(
40
40
  description="Full text search parameters. Can be a dictionary with search options.",
@@ -64,7 +64,7 @@ BaseStepbackSearchParams = create_model(
64
64
  default={},
65
65
  examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
66
66
  )),
67
- cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5)),
67
+ cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
68
68
  search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
69
69
  reranker=(Optional[dict], Field(
70
70
  description="Reranker configuration. Can be a dictionary with reranking parameters.",
@@ -100,11 +100,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
100
100
 
101
101
  doctype: str = "document"
102
102
 
103
- llm: Any = None
104
103
  connection_string: Optional[SecretStr] = None
105
104
  collection_name: Optional[str] = None
106
- embedding_model: Optional[str] = "HuggingFaceEmbeddings"
107
- vectorstore_type: Optional[str] = "PGVector"
108
105
  _embedding: Optional[Any] = None
109
106
  alita: Any = None # Elitea client, if available
110
107
 
@@ -1,5 +1,6 @@
1
1
  import logging
2
- from typing import Optional, Generator
2
+ import re
3
+ from typing import Optional, Generator, List
3
4
 
4
5
  from langchain_core.documents import Document
5
6
  from langchain_core.tools import ToolException
@@ -23,7 +24,11 @@ ReadList = create_model(
23
24
  GetFiles = create_model(
24
25
  "GetFiles",
25
26
  folder_name=(Optional[str], Field(description="Folder name to get list of the files.", default=None)),
26
- limit_files=(Optional[int], Field(description="Limit (maximum number) of files to be returned. Can be called with synonyms, such as First, Top, etc., or can be reflected just by a number for example 'Top 10 files'. Use default value if not specified in a query WITH NO EXTRA CONFIRMATION FROM A USER", default=100)),
27
+ limit_files=(Optional[int], Field(description="Limit (maximum number) of files to be returned."
28
+ "Can be called with synonyms, such as First, Top, etc., "
29
+ "or can be reflected just by a number for example 'Top 10 files'. "
30
+ "Use default value if not specified in a query WITH NO EXTRA "
31
+ "CONFIRMATION FROM A USER", default=100)),
27
32
  )
28
33
 
29
34
  ReadDocument = create_model(
@@ -100,7 +105,8 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
100
105
  """ If folder name is specified, lists all files in this folder under Shared Documents path. If folder name is empty, lists all files under root catalog (Shared Documents). Number of files is limited by limit_files (default is 100)."""
101
106
  try:
102
107
  result = []
103
-
108
+ if not limit_files:
109
+ limit_files = 100
104
110
  target_folder_url = f"Shared Documents/{folder_name}" if folder_name else "Shared Documents"
105
111
  files = (self._client.web.get_folder_by_server_relative_path(target_folder_url)
106
112
  .get_files(True)
@@ -146,13 +152,45 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
146
152
  excel_by_sheets=excel_by_sheets,
147
153
  llm=self.llm)
148
154
 
155
+ def _index_tool_params(self):
156
+ return {
157
+ 'limit_files': (Optional[int], Field(
158
+ description="Limit (maximum number) of files to be returned. Can be called with synonyms, "
159
+ "such as First, Top, etc., or can be reflected just by a number for example 'Top 10 files'. "
160
+ "Use default value if not specified in a query WITH NO EXTRA CONFIRMATION FROM A USER",
161
+ default=1000, ge=0)),
162
+ 'include_extensions': (Optional[List[str]], Field(
163
+ description="List of file extensions to include when processing: i.e. ['*.png', '*.jpg']. "
164
+ "If empty, all files will be processed (except skip_extensions).",
165
+ default=[])),
166
+ 'skip_extensions': (Optional[List[str]], Field(
167
+ description="List of file extensions to skip when processing: i.e. ['*.png', '*.jpg']",
168
+ default=[])),
169
+ }
170
+
149
171
  def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
150
172
  try:
151
- all_files = self.get_files_list()
173
+ all_files = self.get_files_list(limit_files=kwargs.get('limit_files', 10000))
152
174
  except Exception as e:
153
175
  raise ToolException(f"Unable to extract files: {e}")
154
176
 
177
+ include_extensions = kwargs.get('include_extensions', [])
178
+ skip_extensions = kwargs.get('skip_extensions', [])
179
+
155
180
  for file in all_files:
181
+ file_name = file.get('Name', '')
182
+
183
+ # Check if file should be skipped based on skip_extensions
184
+ if any(re.match(pattern.replace('*', '.*') + '$', file_name, re.IGNORECASE)
185
+ for pattern in skip_extensions):
186
+ continue
187
+
188
+ # Check if file should be included based on include_extensions
189
+ # If include_extensions is empty, process all files (that weren't skipped)
190
+ if include_extensions and not (any(re.match(pattern.replace('*', '.*') + '$', file_name, re.IGNORECASE)
191
+ for pattern in include_extensions)):
192
+ continue
193
+
156
194
  metadata = {
157
195
  ("updated_on" if k == "Modified" else k): str(v)
158
196
  for k, v in file.items()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.263
3
+ Version: 0.3.265
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -57,7 +57,7 @@ alita_sdk/runtime/langchain/document_loaders/AlitaQtestLoader.py,sha256=CUVVnisx
57
57
  alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py,sha256=o0SRFPZ-VskltgThVRX80rT19qtB4gPzxED9SENTNWo,4145
58
58
  alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py,sha256=uNcV0En49_0u0RYB1sP1XfNspT2Xc5CacuJr9Jqv79Q,2972
59
59
  alita_sdk/runtime/langchain/document_loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
- alita_sdk/runtime/langchain/document_loaders/constants.py,sha256=EL20rusYbnPk2zwOh8-gxSdaEuqThZJcqiyINXphxFw,4607
60
+ alita_sdk/runtime/langchain/document_loaders/constants.py,sha256=TInNFvUQ_Eq05_PP_zDk1ZIMh52xJ_cCvf56dub6nZQ,4489
61
61
  alita_sdk/runtime/langchain/document_loaders/utils.py,sha256=9xghESf3axBbwxATyVuS0Yu-TWe8zWZnXgCD1ZVyNW0,2414
62
62
  alita_sdk/runtime/langchain/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
63
  alita_sdk/runtime/langchain/interfaces/kwextractor.py,sha256=kSJA9L8g8UArmHu7Bd9dIO0Rrq86JPUb8RYNlnN68FQ,3072
@@ -107,7 +107,7 @@ alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9
107
107
  alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
108
108
  alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
109
109
  alita_sdk/runtime/tools/vectorstore.py,sha256=yl6FKJGVQDevftSkxWTkMbqjIskIFz69vXELdEGp9u4,34780
110
- alita_sdk/runtime/tools/vectorstore_base.py,sha256=OM9nMUzQ7SgfQD8QYlzGYLXzKuGMZ1onpqSrBx9vMKk,27381
110
+ alita_sdk/runtime/tools/vectorstore_base.py,sha256=Hxd74XNiuxsc6Fe9CufTrLATWUPnm5278t0a-1YswR8,27638
111
111
  alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
112
112
  alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
113
113
  alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -119,7 +119,7 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
119
119
  alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
120
120
  alita_sdk/runtime/utils/utils.py,sha256=CpEl3LCeLbhzQySz08lkKPm7Auac6IiLF7WB8wmArMI,589
121
121
  alita_sdk/tools/__init__.py,sha256=ko5TToGYZFmBrho26DRAVvrkHWxQ2sfs8gVAASinYp8,10611
122
- alita_sdk/tools/base_indexer_toolkit.py,sha256=UVaTzYkWEvH9LLTaxOEOUtU98CAhcXko9uvFZjhRYd0,17957
122
+ alita_sdk/tools/base_indexer_toolkit.py,sha256=UkCjxQkBudIEjKFwUB2313mx6qQCxtF_rIiDYOAgbIw,17851
123
123
  alita_sdk/tools/elitea_base.py,sha256=PfelIUb5YFTjDN_1jNYT9tJbjfYr11PAUrPQHyW2d5I,32830
124
124
  alita_sdk/tools/non_code_indexer_toolkit.py,sha256=v9uq1POE1fQKCd152mbqDtF-HSe0qoDj83k4E5LAkMI,1080
125
125
  alita_sdk/tools/ado/__init__.py,sha256=u2tdDgufGuDb-7lIgKKQlqgStL9Wd1gzNmRNYems2c0,1267
@@ -299,7 +299,7 @@ alita_sdk/tools/servicenow/__init__.py,sha256=hReiTp8yv07eR0O_1KJThzUO2xhWhIWcjU
299
299
  alita_sdk/tools/servicenow/api_wrapper.py,sha256=WpH-bBLGFdhehs4g-K-WAkNuaD1CSrwsDpdgB3RG53s,6120
300
300
  alita_sdk/tools/servicenow/servicenow_client.py,sha256=Rdqfu-ll-qbnclMzChLZBsfXRDzgoX_FdeI2WLApWxc,3269
301
301
  alita_sdk/tools/sharepoint/__init__.py,sha256=Mofg_N-7zFf5mKm3_0D0dhC_H0MX-bk3YQ5Sl3oXokg,4114
302
- alita_sdk/tools/sharepoint/api_wrapper.py,sha256=Hcd9YypWMr3upDVJHRxUyPdN4k8joqRQOc_uce2ek1A,9250
302
+ alita_sdk/tools/sharepoint/api_wrapper.py,sha256=-k2CPhS-mUjtAXVw6DHhP9c71oDcBjuxAljpK8bUGb0,11347
303
303
  alita_sdk/tools/sharepoint/authorization_helper.py,sha256=n-nL5dlBoLMK70nHu7P2RYCb8C6c9HMA_gEaw8LxuhE,2007
304
304
  alita_sdk/tools/sharepoint/utils.py,sha256=fZ1YzAu5CTjKSZeslowpOPH974902S8vCp1Wu7L44LM,446
305
305
  alita_sdk/tools/slack/__init__.py,sha256=o8BnDMWGC5qA8pVIyIiflM6T__dZ6qAE1UdtJcvmaxk,3901
@@ -335,8 +335,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=HOt9ShtJI_1tVPcwd3Rwk-VS0SMLq
335
335
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
336
336
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
337
337
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
338
- alita_sdk-0.3.263.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
339
- alita_sdk-0.3.263.dist-info/METADATA,sha256=K6tPuxKTufoHXf9VyS4qq7Ag6qvABJVu3mgzVU3baFc,18897
340
- alita_sdk-0.3.263.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
341
- alita_sdk-0.3.263.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
342
- alita_sdk-0.3.263.dist-info/RECORD,,
338
+ alita_sdk-0.3.265.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
339
+ alita_sdk-0.3.265.dist-info/METADATA,sha256=X3vxAP0rhjG5eSPdKzPW5vKsWm7viCAJ0s1AGBbHnmI,18897
340
+ alita_sdk-0.3.265.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
341
+ alita_sdk-0.3.265.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
342
+ alita_sdk-0.3.265.dist-info/RECORD,,