alita-sdk 0.3.253__py3-none-any.whl → 0.3.255__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,6 +27,6 @@ class SlackConfiguration(BaseModel):
27
27
  }
28
28
  }
29
29
  )
30
- name: Optional[SecretStr] = Field(description="Slack Bot Token")
30
+ name: Optional[str] = Field(description="Slack name")
31
31
  slack_token: Optional[SecretStr] = Field(description="Slack Token like XOXB-*****-*****-*****-*****")
32
32
  channel_id:Optional[str] = Field(description="Channel ID")
@@ -602,16 +602,22 @@ class AlitaClient:
602
602
  import logging
603
603
  logger = logging.getLogger(__name__)
604
604
  toolkit_config_parsed_json = None
605
+ events_dispatched = []
606
+
605
607
  try:
606
608
  toolkit_config_type = toolkit_config.get('type')
607
- toolkit_class = get_available_toolkit_models().get(toolkit_config_type)['toolkit_class']
608
- toolkit_config_model_class = toolkit_class.toolkit_config_schema()
609
- toolkit_config_validated_settings = toolkit_config_model_class(
610
- **toolkit_config.get('settings', {})
611
- ).model_dump(mode='json')
612
-
609
+ available_toolkit_models = get_available_toolkit_models().get(toolkit_config_type)
613
610
  toolkit_config_parsed_json = deepcopy(toolkit_config)
614
- toolkit_config_parsed_json['settings'] = toolkit_config_validated_settings
611
+ if available_toolkit_models:
612
+ toolkit_class = available_toolkit_models['toolkit_class']
613
+ toolkit_config_model_class = toolkit_class.toolkit_config_schema()
614
+ toolkit_config_validated_settings = toolkit_config_model_class(
615
+ **toolkit_config.get('settings', {})
616
+ ).model_dump(mode='json')
617
+ toolkit_config_parsed_json['settings'] = toolkit_config_validated_settings
618
+ else:
619
+ logger.warning(f"Toolkit type '{toolkit_config_type}' is skipping model validation")
620
+ toolkit_config_parsed_json['settings'] = None
615
621
  except Exception as toolkit_config_error:
616
622
  logger.error(f"Failed to validate toolkit configuration: {str(toolkit_config_error)}")
617
623
  return {
@@ -635,7 +641,6 @@ class AlitaClient:
635
641
  # Create RunnableConfig for callback support
636
642
  config = None
637
643
  callbacks = []
638
- events_dispatched = []
639
644
 
640
645
  if runtime_config:
641
646
  callbacks = runtime_config.get('callbacks', [])
@@ -9,6 +9,7 @@ from langchain_core.documents import Document
9
9
  from mammoth import convert_to_html
10
10
  from markdownify import markdownify
11
11
 
12
+ from alita_sdk.tools.chunkers.sematic.markdown_chunker import markdown_by_headers_chunker
12
13
  from .utils import perform_llm_prediction_for_image_bytes
13
14
 
14
15
 
@@ -35,6 +36,7 @@ class AlitaDocxMammothLoader(BaseLoader):
35
36
  self.extract_images = kwargs.get('extract_images')
36
37
  self.llm = kwargs.get("llm")
37
38
  self.prompt = kwargs.get("prompt")
39
+ self.max_tokens = kwargs.get('max_tokens', 512)
38
40
 
39
41
  def __handle_image(self, image) -> dict:
40
42
  """
@@ -100,11 +102,11 @@ class AlitaDocxMammothLoader(BaseLoader):
100
102
  Loads and converts the Docx file to markdown format.
101
103
 
102
104
  Returns:
103
- List[Document]: A list containing a single Document with the markdown content
105
+ List[Document]: A list containing a Documents with the markdown content
104
106
  and metadata including the source file path.
105
107
  """
106
108
  result_content = self.get_content()
107
- return [Document(page_content=result_content, metadata={'source': str(self.path)})]
109
+ return list(markdown_by_headers_chunker(iter([Document(page_content=result_content, metadata={'source': str(self.path)})]), config={'max_tokens':self.max_tokens}))
108
110
 
109
111
  def get_content(self):
110
112
  """
@@ -0,0 +1,73 @@
1
+ import json
2
+ from typing import Iterator
3
+
4
+ from langchain_core.documents import Document
5
+
6
+ from langchain_community.document_loaders.base import BaseLoader
7
+ from langchain_community.document_loaders.helpers import detect_file_encodings
8
+ from langchain_core.tools import ToolException
9
+ from langchain_text_splitters import RecursiveJsonSplitter
10
+
11
+
12
+ class AlitaJSONLoader(BaseLoader):
13
+
14
+ def __init__(self, **kwargs):
15
+ """Initialize with file path."""
16
+ if kwargs.get('file_path'):
17
+ self.file_path = kwargs['file_path']
18
+ elif kwargs.get('file_content'):
19
+ self.file_content = kwargs['file_content']
20
+ self.file_name = kwargs['file_name']
21
+ else:
22
+ raise ToolException("'file_path' or 'file_content' parameter should be provided.")
23
+ self.encoding = kwargs.get('encoding', 'utf-8')
24
+ self.autodetect_encoding = kwargs.get('autodetect_encoding', False)
25
+ self.max_tokens = kwargs.get('max_tokens', 512)
26
+
27
+ def get_content(self):
28
+ try:
29
+ if hasattr(self, 'file_path') and self.file_path:
30
+ with open(self.file_path, encoding=self.encoding) as f:
31
+ return json.load(f)
32
+ elif hasattr(self, 'file_content') and self.file_content:
33
+ return json.load(self.file_content)
34
+ else:
35
+ raise ValueError("Neither file_path nor file_content is provided.")
36
+
37
+ except UnicodeDecodeError as e:
38
+ if self.autodetect_encoding:
39
+ if hasattr(self, 'file_path') and self.file_path:
40
+ detected_encodings = detect_file_encodings(self.file_path)
41
+ for encoding in detected_encodings:
42
+ try:
43
+ with open(self.file_path, encoding=encoding.encoding) as f:
44
+ return f.read()
45
+ break
46
+ except UnicodeDecodeError:
47
+ continue
48
+ elif hasattr(self, 'file_content') and self.file_content:
49
+ detected_encodings = detect_file_encodings(self.file_content)
50
+ for encoding in detected_encodings:
51
+ try:
52
+ return self.file_content.decode(encoding.encoding)
53
+ except UnicodeDecodeError:
54
+ continue
55
+ else:
56
+ raise ValueError("Neither file_path nor file_content is provided for encoding detection.")
57
+ else:
58
+ raise RuntimeError(f"Error loading content with encoding {self.encoding}.") from e
59
+ except Exception as e:
60
+ raise RuntimeError(f"Error loading content.") from e
61
+
62
+ def lazy_load(self) -> Iterator[Document]:
63
+ """Load from file path."""
64
+ content_json = self.get_content()
65
+
66
+ if isinstance(content_json, list):
67
+ data_dict = {str(i): item for i, item in enumerate(content_json)}
68
+ else:
69
+ data_dict = content_json
70
+ chunks = RecursiveJsonSplitter(max_chunk_size=self.max_tokens).split_json(json_data=data_dict)
71
+ for chunk in chunks:
72
+ metadata = {"source": str(self.file_path) if hasattr(self, 'file_path') else self.file_name}
73
+ yield Document(page_content=json.dumps(chunk), metadata=metadata)
@@ -13,17 +13,18 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from langchain_community.document_loaders import (
16
- UnstructuredMarkdownLoader,
17
- AirbyteJSONLoader, UnstructuredHTMLLoader,
18
- UnstructuredPowerPointLoader, PythonLoader)
16
+ UnstructuredMarkdownLoader,
17
+ AirbyteJSONLoader, UnstructuredHTMLLoader,
18
+ PythonLoader)
19
19
 
20
20
  from .AlitaCSVLoader import AlitaCSVLoader
21
21
  from .AlitaDocxMammothLoader import AlitaDocxMammothLoader
22
22
  from .AlitaExcelLoader import AlitaExcelLoader
23
23
  from .AlitaImageLoader import AlitaImageLoader
24
+ from .AlitaJSONLoader import AlitaJSONLoader
24
25
  from .AlitaPDFLoader import AlitaPDFLoader
25
- from .AlitaTextLoader import AlitaTextLoader
26
26
  from .AlitaPowerPointLoader import AlitaPowerPointLoader
27
+ from .AlitaTextLoader import AlitaTextLoader
27
28
 
28
29
  loaders_map = {
29
30
  '.png': {
@@ -122,7 +123,9 @@ loaders_map = {
122
123
  '.docx': {
123
124
  'class': AlitaDocxMammothLoader,
124
125
  'is_multimodal_processing': True,
125
- 'kwargs': {}
126
+ 'kwargs': {
127
+ 'extract_images': True
128
+ }
126
129
  },
127
130
  '.doc': {
128
131
  'class': AlitaTextLoader,
@@ -130,11 +133,9 @@ loaders_map = {
130
133
  'kwargs': {}
131
134
  },
132
135
  '.json': {
133
- 'class': AlitaTextLoader,
136
+ 'class': AlitaJSONLoader,
134
137
  'is_multimodal_processing': False,
135
- 'kwargs': {
136
- 'autodetect_encoding': True
137
- }
138
+ 'kwargs': {}
138
139
  },
139
140
  '.jsonl': {
140
141
  'class': AirbyteJSONLoader,
@@ -154,12 +155,16 @@ loaders_map = {
154
155
  '.ppt': {
155
156
  'class': AlitaPowerPointLoader,
156
157
  'is_multimodal_processing': False,
157
- 'kwargs': {}
158
+ 'kwargs': {
159
+ 'mode': 'paged'
160
+ }
158
161
  },
159
162
  '.pptx': {
160
163
  'class': AlitaPowerPointLoader,
161
164
  'is_multimodal_processing': False,
162
- 'kwargs': {}
165
+ 'kwargs': {
166
+ 'mode': 'paged'
167
+ }
163
168
  },
164
169
  '.py': {
165
170
  'class': PythonLoader,
@@ -1,9 +1,9 @@
1
1
  import uuid
2
2
  from logging import getLogger
3
- from typing import Any, Type, Literal, Optional
3
+ from typing import Any, Type, Literal, Optional, Union, List
4
4
 
5
5
  from langchain_core.tools import BaseTool
6
- from pydantic import BaseModel, Field, create_model
6
+ from pydantic import BaseModel, Field, create_model, EmailStr, constr
7
7
 
8
8
  logger = getLogger(__name__)
9
9
 
@@ -19,45 +19,73 @@ class McpServerTool(BaseTool):
19
19
 
20
20
 
21
21
  @staticmethod
22
- def create_pydantic_model_from_schema(schema: dict):
23
- fields = {}
24
- for field_name, field_info in schema['properties'].items():
25
- field_type = field_info['type']
26
- field_description = field_info.get('description', '')
27
- if field_type == 'string':
28
- if 'enum' in field_info:
29
- field_type = Literal[tuple(field_info['enum'])]
30
- else:
31
- field_type = str
32
- elif field_type == 'integer':
33
- field_type = int
34
- elif field_type == 'number':
35
- field_type = float
36
- elif field_type == 'boolean':
37
- field_type = bool
38
- elif field_type == 'object':#Dict[str, Any]
39
- nested_model = McpServerTool.create_pydantic_model_from_schema(field_info)
40
- field_type = nested_model
41
- elif field_type == 'array':
42
- item_schema = field_info['items']
43
- item_type = McpServerTool.create_pydantic_model_from_schema(item_schema) if item_schema['type'] == 'object' else (
44
- str if item_schema['type'] == 'string' else
45
- int if item_schema['type'] == 'integer' else
46
- float if item_schema['type'] == 'number' else
47
- bool if item_schema['type'] == 'boolean' else
48
- None
49
- )
50
- if item_type is None:
51
- raise ValueError(f"Unsupported array item type: {item_schema['type']}")
52
- field_type = list[item_type]
53
- else:
54
- raise ValueError(f"Unsupported field type: {field_type}")
22
+ def create_pydantic_model_from_schema(schema: dict, model_name: str = "ArgsSchema"):
23
+ def parse_type(field: dict, name: str = "Field") -> Any:
24
+ if "allOf" in field:
25
+ merged = {}
26
+ required = set()
27
+ for idx, subschema in enumerate(field["allOf"]):
28
+ sub_type = parse_type(subschema, f"{name}AllOf{idx}")
29
+ if hasattr(sub_type, "__fields__"):
30
+ merged.update({k: (v.outer_type_, v.default) for k, v in sub_type.__fields__.items()})
31
+ required.update({k for k, v in sub_type.__fields__.items() if v.required})
32
+ if merged:
33
+ return create_model(f"{name}AllOf", **merged)
34
+ return Any
35
+ if "anyOf" in field or "oneOf" in field:
36
+ key = "anyOf" if "anyOf" in field else "oneOf"
37
+ types = [parse_type(sub, f"{name}{key.capitalize()}{i}") for i, sub in enumerate(field[key])]
38
+ # Check for null type
39
+ if any(sub.get("type") == "null" for sub in field[key]):
40
+ non_null_types = [parse_type(sub, f"{name}{key.capitalize()}{i}")
41
+ for i, sub in enumerate(field[key]) if sub.get("type") != "null"]
42
+ if len(non_null_types) == 1:
43
+ return Optional[non_null_types[0]]
44
+ return Union[tuple(types)]
45
+ t = field.get("type")
46
+ if isinstance(t, list):
47
+ if "null" in t:
48
+ non_null = [x for x in t if x != "null"]
49
+ if len(non_null) == 1:
50
+ field = dict(field)
51
+ field["type"] = non_null[0]
52
+ return Optional[parse_type(field, name)]
53
+ return Any
54
+ return Any
55
+ if t == "string":
56
+ if "enum" in field:
57
+ return Literal[tuple(field["enum"])]
58
+ if field.get("format") == "email":
59
+ return EmailStr
60
+ if "pattern" in field:
61
+ return constr(regex=field["pattern"])
62
+ return str
63
+ if t == "integer":
64
+ return int
65
+ if t == "number":
66
+ return float
67
+ if t == "boolean":
68
+ return bool
69
+ if t == "object":
70
+ return McpServerTool.create_pydantic_model_from_schema(field, name.capitalize())
71
+ if t == "array":
72
+ items = field.get("items", {})
73
+ return List[parse_type(items, name + "Item")]
74
+ return Any
55
75
 
56
- if field_name in schema.get('required', []):
57
- fields[field_name] = (field_type, Field(..., description=field_description))
58
- else:
59
- fields[field_name] = (Optional[field_type], Field(None, description=field_description))
60
- return create_model('DynamicModel', **fields)
76
+ properties = schema.get("properties", {})
77
+ required = set(schema.get("required", []))
78
+ fields = {}
79
+ for name, prop in properties.items():
80
+ typ = parse_type(prop, name.capitalize())
81
+ default = prop.get("default", ... if name in required else None)
82
+ field_args = {}
83
+ if "description" in prop:
84
+ field_args["description"] = prop["description"]
85
+ if "format" in prop:
86
+ field_args["format"] = prop["format"]
87
+ fields[name] = (typ, Field(default, **field_args))
88
+ return create_model(model_name, **fields)
61
89
 
62
90
  def _run(self, *args, **kwargs):
63
91
  call_data = {
@@ -336,6 +336,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
336
336
 
337
337
  from ..langchain.interfaces.llm_processor import add_documents
338
338
 
339
+ self._log_tool_event(message=f"Starting the indexing... Parameters: {collection_suffix=}, {clean_index=}, {is_code}", tool_name="index_documents")
339
340
  # pre-process documents if needed (find duplicates, etc.)
340
341
  if clean_index:
341
342
  logger.info("Cleaning index before re-indexing all documents.")
@@ -351,9 +352,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
351
352
  if isinstance(documents, types.GeneratorType):
352
353
  documents = list(documents)
353
354
  else:
355
+ self._log_tool_event(
356
+ message="Filter for duplicates",
357
+ tool_name="index_documents")
354
358
  # remove duplicates based on metadata 'id' and 'updated_on' or 'commit_hash' fields
355
359
  documents = self._reduce_code_duplicates(documents, collection_suffix) if is_code \
356
360
  else self._reduce_non_code_duplicates(documents, collection_suffix)
361
+ self._log_tool_event(
362
+ message="All the duplicates were filtered out. Proceeding with indexing.",
363
+ tool_name="index_documents")
357
364
 
358
365
  if not documents or len(documents) == 0:
359
366
  logger.info("No new documents to index after duplicate check.")
@@ -362,8 +369,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
362
369
  # if func is provided, apply it to documents
363
370
  # used for processing of documents before indexing,
364
371
  # e.g. to avoid time-consuming operations for documents that are already indexed
372
+ self._log_tool_event(message=f"Processing the dependent documents (attachments, etc.)", tool_name="index_documents")
365
373
  dependent_docs_generator = self.process_document_func(documents) if self.process_document_func else []
366
-
367
374
  # notify user about missed required metadata fields: id, updated_on
368
375
  # it is not required to have them, but it is recommended to have them for proper re-indexing and duplicate detection
369
376
  for doc in documents:
@@ -375,6 +382,9 @@ class VectorStoreWrapper(BaseToolApiWrapper):
375
382
 
376
383
  documents = documents + list(dependent_docs_generator)
377
384
 
385
+ self._log_tool_event(message=f"Documents for indexing were processed. Total documents: {len(documents)}",
386
+ tool_name="index_documents")
387
+
378
388
  # if collection_suffix is provided, add it to metadata of each document
379
389
  if collection_suffix:
380
390
  for doc in documents:
@@ -386,7 +396,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
386
396
  total_docs = len(documents)
387
397
  documents_count = 0
388
398
  _documents = []
389
-
399
+ self._log_tool_event(message=f"Starting the indexing of processed documents. Total documents: {len(documents)}",
400
+ tool_name="index_documents")
390
401
  # set default progress step to 20 if out of 0...100 or None
391
402
  progress_step = 20 if progress_step not in range(0, 100) else progress_step
392
403
  next_progress_point = progress_step
@@ -1,18 +1,16 @@
1
1
  import json
2
2
  import math
3
- import types
4
- from typing import Any, Optional, List, Dict, Callable, Generator
3
+ from logging import getLogger
4
+ from typing import Any, Optional, List, Dict, Generator
5
5
 
6
6
  from langchain_core.documents import Document
7
- from pydantic import BaseModel, model_validator, Field
8
- from ..langchain.tools.vector import VectorAdapter
9
7
  from langchain_core.messages import HumanMessage
8
+ from pydantic import BaseModel, model_validator, Field
9
+
10
10
  from alita_sdk.tools.elitea_base import BaseToolApiWrapper
11
11
  from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
12
- from logging import getLogger
13
-
12
+ from ..langchain.tools.vector import VectorAdapter
14
13
  from ..utils.logging import dispatch_custom_event
15
- from ..utils.utils import IndexerKeywords
16
14
 
17
15
  logger = getLogger(__name__)
18
16
 
@@ -212,10 +210,6 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
212
210
  tool_name="_clean_collection"
213
211
  )
214
212
 
215
- def _add_to_collection(self, entry_id, new_collection_value):
216
- """Add a new collection name to the `collection` key in the `metadata` column."""
217
- self.vector_adapter.add_to_collection(self, entry_id, new_collection_value)
218
-
219
213
  def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True):
220
214
  """ Index documents in the vectorstore.
221
215
 
@@ -26,7 +26,7 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
26
26
  'toolkit_name': True,
27
27
  'max_toolkit_length': AzureDevOpsWorkItemsToolkit.toolkit_max_length})
28
28
  ),
29
- ado_configuration=(AdoConfiguration, Field(description="Ado Work Item configuration", json_schema_extra={'configuration_types': ['ado_work_item']})),
29
+ ado_configuration=(AdoConfiguration, Field(description="Ado Work Item configuration", json_schema_extra={'configuration_types': ['ado']})),
30
30
  limit=(Optional[int], Field(description="ADO plans limit used for limitation of the list with results", default=5)),
31
31
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
32
32
  # indexer settings
@@ -6,7 +6,6 @@ from langchain_core.documents import Document
6
6
  from pydantic import create_model, Field, SecretStr
7
7
 
8
8
  # from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
9
- from .chunkers import markdown_chunker
10
9
  from .utils.content_parser import process_content_by_type
11
10
  from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
12
11
  from ..runtime.tools.vectorstore_base import VectorStoreWrapperBase
@@ -141,7 +140,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
141
140
  def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
142
141
  """ Loads documents from a source, processes them,
143
142
  and returns a list of Document objects with base metadata: id and created_on."""
144
- pass
143
+ yield from ()
145
144
 
146
145
  def _process_document(self, base_document: Document) -> Generator[Document, None, None]:
147
146
  """ Process an existing base document to extract relevant metadata for full document preparation.
@@ -153,7 +152,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
153
152
 
154
153
  Returns:
155
154
  Document: The processed document with metadata."""
156
- pass
155
+ yield from ()
157
156
 
158
157
  def index_data(self, **kwargs):
159
158
  collection_suffix = kwargs.get("collection_suffix")
@@ -174,18 +173,20 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
174
173
  return self._save_index(list(documents), collection_suffix=collection_suffix, progress_step=progress_step)
175
174
 
176
175
  def _apply_loaders_chunkers(self, documents: Generator[Document, None, None], chunking_tool: str=None, chunking_config=None) -> Generator[Document, None, None]:
177
- from alita_sdk.tools.chunkers import __confluence_chunkers__ as chunkers, __confluence_models__ as models
176
+ from alita_sdk.tools.chunkers import __confluence_chunkers__ as chunkers
178
177
 
179
178
  if chunking_config is None:
180
179
  chunking_config = {}
181
180
  chunking_config['embedding'] = self._embedding
182
181
  chunking_config['llm'] = self.llm
183
-
182
+
184
183
  for document in documents:
185
184
  if content_type := document.metadata.get('loader_content_type', None):
186
185
  # apply parsing based on content type and chunk if chunker was applied to parent doc
186
+ content = document.metadata.pop('loader_content', None)
187
187
  yield from process_content_by_type(
188
188
  document=document,
189
+ content=content,
189
190
  extension_source=content_type, llm=self.llm, chunking_config=chunking_config)
190
191
  elif chunking_tool:
191
192
  # apply default chunker from toolkit config. No parsing.
@@ -205,9 +206,6 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
205
206
  for dep in dependencies:
206
207
  dep.metadata[IndexerKeywords.PARENT.value] = document.metadata.get('id', None)
207
208
  yield dep
208
-
209
- def _content_loader(self):
210
- pass
211
209
 
212
210
  def _reduce_duplicates(
213
211
  self,
@@ -255,36 +253,6 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
255
253
  def remove_ids_fn(self, idx_data, key: str):
256
254
  raise NotImplementedError("Subclasses must implement this method")
257
255
 
258
- def _process_documents(self, documents: List[Document]) -> Generator[Document, None, None]:
259
- """
260
- Process a list of base documents to extract relevant metadata for full document preparation.
261
- Used for late processing of documents after we ensure that the documents have to be indexed to avoid
262
- time-consuming operations for documents which might be useless.
263
- This function passed to index_documents method of vector store and called after _reduce_duplicates method.
264
-
265
- Args:
266
- documents (List[Document]): The base documents to process.
267
-
268
- Returns:
269
- Generator[Document, None, None]: A generator yielding processed documents with metadata.
270
- """
271
- for doc in documents:
272
- # Filter documents to process only those that either:
273
- # - do not have a 'chunk_id' in their metadata, or
274
- # - have 'chunk_id' explicitly set to 1.
275
- # This prevents processing of irrelevant or duplicate chunks, improving efficiency.
276
- chunk_id = doc.metadata.get("chunk_id")
277
- if chunk_id is None or chunk_id == 1:
278
- processed_docs = self._process_document(doc)
279
- if processed_docs: # Only proceed if the list is not empty
280
- for processed_doc in processed_docs:
281
- # map processed document (child) to the original document (parent)
282
- processed_doc.metadata[IndexerKeywords.PARENT.value] = doc.metadata.get('id', None)
283
- if chunker:=self._get_dependencies_chunker(processed_doc):
284
- yield from chunker(file_content_generator=iter([processed_doc]), config=self._get_dependencies_chunker_config())
285
- else:
286
- yield processed_doc
287
-
288
256
  def remove_index(self, collection_suffix: str = ""):
289
257
  """Cleans the indexed data in the collection."""
290
258
  super()._clean_collection(collection_suffix=collection_suffix)
@@ -1,7 +1,6 @@
1
1
  from typing import Generator
2
- from langchain.schema import Document
3
2
  from langchain_core.documents import Document
4
- from langchain_text_splitters import MarkdownHeaderTextSplitter
3
+ from langchain_text_splitters import MarkdownHeaderTextSplitter, ExperimentalMarkdownSyntaxTextSplitter
5
4
  from langchain.text_splitter import TokenTextSplitter
6
5
  from ..utils import tiktoken_length
7
6
  from copy import deepcopy as copy
@@ -50,4 +49,32 @@ def markdown_chunker(file_content_generator: Generator[Document, None, None], co
50
49
  yield Document(
51
50
  page_content=chunk.page_content,
52
51
  metadata=docmeta
53
- )
52
+ )
53
+
54
+
55
+ def markdown_by_headers_chunker(file_content_generator: Generator[Document, None, None], config: dict, *args, **kwargs) -> Generator[Document, None, None]:
56
+ strip_header = config.get("strip_header", False)
57
+ return_each_line = config.get("return_each_line", False)
58
+ headers_to_split_on = config.get("headers_to_split_on", [])
59
+ headers_to_split_on = [header.split(' ', 1) for header in headers_to_split_on]
60
+ for doc in file_content_generator:
61
+ doc_metadata = doc.metadata
62
+ doc_content = doc.page_content
63
+ chunk_id = 0
64
+ markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter(
65
+ headers_to_split_on=headers_to_split_on,
66
+ strip_headers=strip_header,
67
+ return_each_line=return_each_line
68
+ )
69
+ md_header_splits = markdown_splitter.split_text(doc_content)
70
+ for chunk in md_header_splits:
71
+ chunk_id += 1
72
+ headers_meta = list(chunk.metadata.values())
73
+ docmeta = copy(doc_metadata)
74
+ docmeta.update({"headers": "; ".join(headers_meta)})
75
+ docmeta['chunk_id'] = chunk_id
76
+ docmeta['chunk_type'] = "document"
77
+ yield Document(
78
+ page_content=chunk.page_content,
79
+ metadata=docmeta
80
+ )
@@ -20,6 +20,7 @@ def get_tools(tool):
20
20
  confluence_configuration=tool['settings']['confluence_configuration'],
21
21
  limit=tool['settings'].get('limit', 5),
22
22
  labels=parse_list(tool['settings'].get('labels', None)),
23
+ custom_headers=tool['settings'].get('custom_headers', {}),
23
24
  additional_fields=tool['settings'].get('additional_fields', []),
24
25
  verify_ssl=tool['settings'].get('verify_ssl', True),
25
26
  alita=tool['settings'].get('alita'),
@@ -78,6 +79,8 @@ class ConfluenceToolkit(BaseToolkit):
78
79
  number_of_retries=(int, Field(description="Number of retries", default=2)),
79
80
  min_retry_seconds=(int, Field(description="Min retry, sec", default=10)),
80
81
  max_retry_seconds=(int, Field(description="Max retry, sec", default=60)),
82
+ # optional field for custom headers as dictionary
83
+ custom_headers=(Optional[dict], Field(description="Custom headers for API requests", default=None)),
81
84
  confluence_configuration=(Optional[ConfluenceConfiguration], Field(description="Confluence Configuration", json_schema_extra={'configuration_types': ['confluence']})),
82
85
  pgvector_configuration=(Optional[PgVectorConfiguration], Field(default = None,
83
86
  description="PgVector Configuration",
@@ -223,16 +223,21 @@ class ConfluenceAPIWrapper(BaseVectorStoreToolApiWrapper):
223
223
  username = values.get('username')
224
224
  token = values.get('token')
225
225
  cloud = values.get('cloud')
226
- # if values.get('collection_name'):
227
- # values['collection_name'] = shortuuid.encode(values['collection_name'])
228
226
  if token and is_cookie_token(token):
229
227
  session = requests.Session()
230
228
  session.cookies.update(parse_cookie_string(token))
231
- values['client'] = Confluence(url=url, session=session, cloud=cloud)
229
+ client_instance = Confluence(url=url, session=session, cloud=cloud)
232
230
  elif token:
233
- values['client'] = Confluence(url=url, token=token, cloud=cloud)
231
+ client_instance = Confluence(url=url, token=token, cloud=cloud)
234
232
  else:
235
- values['client'] = Confluence(url=url, username=username, password=api_key, cloud=cloud)
233
+ client_instance = Confluence(url=url, username=username, password=api_key, cloud=cloud)
234
+
235
+ custom_headers = values.get('custom_headers', {})
236
+ logger.info(f"Jira tool: custom headers length: {len(custom_headers)}")
237
+ for header, value in custom_headers.items():
238
+ client_instance._update_header(header, value)
239
+
240
+ values['client'] = client_instance
236
241
  return values
237
242
 
238
243
  def __unquote_confluence_space(self) -> str | None:
@@ -110,7 +110,7 @@ BaseStepbackSearchParams = create_model(
110
110
  BaseIndexDataParams = create_model(
111
111
  "indexData",
112
112
  __base__=BaseIndexParams,
113
- progress_step=(Optional[int], Field(default=10, ge=0, le=100,
113
+ progress_step=(Optional[int], Field(default=5, ge=0, le=100,
114
114
  description="Optional step size for progress reporting during indexing")),
115
115
  clean_index=(Optional[bool], Field(default=False,
116
116
  description="Optional flag to enforce clean existing index before indexing new data")),
@@ -124,6 +124,28 @@ class BaseToolApiWrapper(BaseModel):
124
124
  def get_available_tools(self):
125
125
  raise NotImplementedError("Subclasses should implement this method")
126
126
 
127
+ def _log_tool_event(self, message: str, tool_name: str = None):
128
+ """Log data and dispatch custom event for the tool"""
129
+
130
+ try:
131
+ from langchain_core.callbacks import dispatch_custom_event
132
+
133
+ if tool_name is None:
134
+ tool_name = 'tool_progress'
135
+
136
+ logger.info(message)
137
+ dispatch_custom_event(
138
+ name="tool_execution_step",
139
+ data={
140
+ "message": message,
141
+ "tool_name": tool_name,
142
+ "toolkit": self.__class__.__name__,
143
+ },
144
+ )
145
+ except Exception as e:
146
+ logger.warning(f"Failed to dispatch progress event: {str(e)}")
147
+
148
+
127
149
  def run(self, mode: str, *args: Any, **kwargs: Any):
128
150
  if TOOLKIT_SPLITTER in mode:
129
151
  mode = mode.rsplit(TOOLKIT_SPLITTER, maxsplit=1)[1]
@@ -314,7 +336,13 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
314
336
  Returns:
315
337
  Generator[Document, None, None]: A generator yielding processed documents with metadata.
316
338
  """
317
- for doc in documents:
339
+ total_docs = len(documents)
340
+ self._log_tool_event(
341
+ message=f"Preparing a base documents for indexing. Total documents: {total_docs}",
342
+ tool_name="_process_documents"
343
+ )
344
+ processed_count = 0
345
+ for idx, doc in enumerate(documents, 1):
318
346
  # Filter documents to process only those that either:
319
347
  # - do not have a 'chunk_id' in their metadata, or
320
348
  # - have 'chunk_id' explicitly set to 1.
@@ -326,10 +354,19 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
326
354
  for processed_doc in processed_docs:
327
355
  # map processed document (child) to the original document (parent)
328
356
  processed_doc.metadata[IndexerKeywords.PARENT.value] = doc.metadata.get('id', None)
329
- if chunker:=self._get_dependencies_chunker(processed_doc):
330
- yield from chunker(file_content_generator=iter([processed_doc]), config=self._get_dependencies_chunker_config())
357
+ if chunker := self._get_dependencies_chunker(processed_doc):
358
+ yield from chunker(
359
+ file_content_generator=iter([processed_doc]),
360
+ config=self._get_dependencies_chunker_config()
361
+ )
331
362
  else:
332
363
  yield processed_doc
364
+ processed_count += 1
365
+ if processed_count % 5 == 0 or processed_count == total_docs:
366
+ self._log_tool_event(
367
+ message=f"Prepared {processed_count} out of {total_docs} documents for indexing.",
368
+ tool_name="_process_documents"
369
+ )
333
370
 
334
371
 
335
372
  # TODO: init store once and re-use the instance
@@ -563,7 +600,7 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
563
600
  from .chunkers.code.codeparser import parse_code_files_for_db
564
601
 
565
602
  _files = self.__handle_get_files("", branch or self.active_branch or self._active_branch)
566
-
603
+ self._log_tool_event(message="Listing files in branch", tool_name="loader")
567
604
  logger.info(f"Files in branch: {_files}")
568
605
 
569
606
  def is_whitelisted(file_path: str) -> bool:
@@ -579,11 +616,22 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
579
616
  return False
580
617
 
581
618
  def file_content_generator():
582
- for file in _files:
619
+ self._log_tool_event(message="Reading the files", tool_name="loader")
620
+ # log the progress of file reading
621
+ total_files = len(_files)
622
+ for idx, file in enumerate(_files, 1):
583
623
  if is_whitelisted(file) and not is_blacklisted(file):
624
+ # read file ONLY if it matches whitelist and does not match blacklist
625
+ file_content = self._read_file(file, branch=branch or self.active_branch or self._active_branch)
626
+ # hash the file content to ensure uniqueness
627
+ import hashlib
628
+ file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
584
629
  yield {"file_name": file,
585
- "file_content": self._read_file(file, branch=branch or self.active_branch or self._active_branch),
586
- "commit_hash": self._file_commit_hash(file, branch=branch or self.active_branch or self._active_branch)}
630
+ "file_content": file_content,
631
+ "commit_hash": file_hash}
632
+ if idx % 10 == 0 or idx == total_files:
633
+ self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
634
+ self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
587
635
 
588
636
  return parse_code_files_for_db(file_content_generator())
589
637
 
@@ -601,7 +649,9 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
601
649
  blacklist=blacklist
602
650
  )
603
651
  vectorstore = self._init_vector_store()
604
- return vectorstore.index_documents(documents, collection_suffix=collection_suffix, clean_index=False, is_code=True)
652
+ clean_index = kwargs.get('clean_index', False)
653
+ return vectorstore.index_documents(documents, collection_suffix=collection_suffix,
654
+ clean_index=clean_index, is_code=True)
605
655
 
606
656
  def _get_vector_search_tools(self):
607
657
  """
@@ -1,6 +1,5 @@
1
- import json
2
1
  import logging
3
- from typing import Optional, List, Generator, Any
2
+ from typing import Optional, Generator
4
3
 
5
4
  from langchain_core.documents import Document
6
5
  from langchain_core.tools import ToolException
@@ -8,7 +7,7 @@ from office365.runtime.auth.client_credential import ClientCredential
8
7
  from office365.sharepoint.client_context import ClientContext
9
8
  from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
10
9
 
11
- from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
10
+ from ..non_code_indexer_toolkit import NonCodeIndexerToolkit
12
11
  from ..utils.content_parser import parse_file_content
13
12
 
14
13
  NoInput = create_model(
@@ -38,7 +37,7 @@ ReadDocument = create_model(
38
37
  )
39
38
 
40
39
 
41
- class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
40
+ class SharepointApiWrapper(NonCodeIndexerToolkit):
42
41
  site_url: str
43
42
  client_id: str = None
44
43
  client_secret: SecretStr = None
@@ -77,9 +76,8 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
77
76
  raise ToolException("You have to define token or client id&secret.")
78
77
  logging.info("Successfully authenticated to SharePoint.")
79
78
  except Exception as e:
80
- logging.error(f"Failed to authenticate with SharePoint: {str(e)}")
81
- return values
82
-
79
+ logging.error(f"Failed to authenticate with SharePoint: {str(e)}")
80
+ return super().validate_toolkit(values)
83
81
 
84
82
  def read_list(self, list_title, limit: int = 1000):
85
83
  """ Reads a specified List in sharepoint site. Number of list items is limited by limit (default is 1000). """
@@ -161,25 +159,26 @@ class SharepointApiWrapper(BaseVectorStoreToolApiWrapper):
161
159
  }
162
160
  yield Document(page_content="", metadata=metadata)
163
161
 
164
- def _process_document(self, document: Document) -> Generator[Document, None, None]:
165
- doc_content = ""
166
- try:
167
- doc_content = self.read_file(document.metadata['Path'],
168
- is_capture_image=True,
169
- excel_by_sheets=True)
170
- except Exception as e:
171
- logging.error(f"Failed while parsing the file '{document.metadata['Path']}': {e}")
172
- if isinstance(doc_content, dict):
173
- for page, content in doc_content.items():
174
- new_metadata = document.metadata
175
- new_metadata['page'] = page
176
- yield Document(page_content=str(content), metadata=new_metadata)
177
- else:
178
- document.page_content = str(doc_content)
179
-
180
- @extend_with_vector_tools
162
+ def _extend_data(self, documents: Generator[Document, None, None]):
163
+ for document in documents:
164
+ try:
165
+ document.metadata['loader_content'] = self._load_file_content_in_bytes(document.metadata['Path'])
166
+ document.metadata['loader_content_type'] = document.metadata['Name']
167
+ yield document
168
+ except Exception as e:
169
+ logging.error(f"Failed while parsing the file '{document.metadata['Path']}': {e}")
170
+ yield document
171
+
172
+ def _load_file_content_in_bytes(self, path):
173
+ file = self._client.web.get_file_by_server_relative_path(path)
174
+ self._client.load(file).execute_query()
175
+ file_content = file.read()
176
+ self._client.execute_query()
177
+ #
178
+ return file_content
179
+
181
180
  def get_available_tools(self):
182
- return [
181
+ return super().get_available_tools() + [
183
182
  {
184
183
  "name": "read_list",
185
184
  "description": self.read_list.__doc__,
@@ -466,11 +466,11 @@ class TestrailAPIWrapper(BaseVectorStoreToolApiWrapper):
466
466
  return ToolException(
467
467
  "json_case_arguments must be a JSON string or dictionary."
468
468
  )
469
-
469
+ self._log_tool_event(message=f"Extract test cases per filter {params}", tool_name='get_cases_by_filter')
470
470
  extracted_cases = self._client.cases.get_cases(
471
471
  project_id=project_id, **params
472
472
  )
473
-
473
+ self._log_tool_event(message=f"Test cases were extracted", tool_name='get_cases_by_filter')
474
474
  # support old versions of testrail_api
475
475
  cases = extracted_cases.get("cases") if isinstance(extracted_cases, dict) else extracted_cases
476
476
 
@@ -1,16 +1,13 @@
1
1
  import os
2
2
  import tempfile
3
- from copy import deepcopy as copy
4
3
  from logging import getLogger
5
4
  from pathlib import Path
6
5
  from typing import Generator
7
6
 
8
7
  from langchain_core.documents import Document
9
8
  from langchain_core.tools import ToolException
10
- from langchain_text_splitters import TokenTextSplitter
11
9
 
12
10
  from alita_sdk.runtime.langchain.document_loaders.constants import loaders_map
13
- from alita_sdk.tools.chunkers.utils import tiktoken_length
14
11
 
15
12
  logger = getLogger(__name__)
16
13
 
@@ -170,14 +167,17 @@ def load_content_from_bytes(file_content: bytes, extension: str = None, loader_e
170
167
  if temp_file_path and os.path.exists(temp_file_path):
171
168
  os.remove(temp_file_path)
172
169
 
173
- def process_content_by_type(document: Document, extension_source: str, llm = None, chunking_config={}) -> Generator[Document, None, None]:
170
+ def process_content_by_type(document: Document, content, extension_source: str, llm = None, chunking_config={}) -> Generator[Document, None, None]:
174
171
  temp_file_path = None
175
172
  try:
176
173
  extension = "." + extension_source.split('.')[-1].lower()
177
174
 
178
175
  with tempfile.NamedTemporaryFile(mode='w+b', suffix=extension, delete=False) as temp_file:
179
176
  temp_file_path = temp_file.name
180
- content = document.metadata.pop('loader_content')
177
+ if content is None:
178
+ logger.warning("'loader_content' ie expected but not found in document metadata.")
179
+ return
180
+
181
181
  temp_file.write(content)
182
182
  temp_file.flush()
183
183
 
@@ -190,37 +190,32 @@ def process_content_by_type(document: Document, extension_source: str, llm = Non
190
190
  loader_kwargs = loader_config['kwargs']
191
191
 
192
192
  loader = loader_cls(file_path=temp_file_path, **loader_kwargs)
193
- docs_iterator = loader.load()
194
- max_tokens = chunking_config.get('max_tokens', 512)
195
- tokens_overlapping = chunking_config.get('tokens_overlapping', 10)
196
- chunk_id = 0
197
- for chunk in docs_iterator:
198
- if tiktoken_length(chunk.page_content) > max_tokens:
199
- for subchunk in TokenTextSplitter(encoding_name="cl100k_base",
200
- chunk_size=max_tokens,
201
- chunk_overlap=tokens_overlapping
202
- ).split_text(chunk.page_content):
203
- chunk_id += 1
204
- headers_meta = list(chunk.metadata.values())
205
- docmeta = copy(document.metadata)
206
- docmeta.update({"headers": "; ".join(str(headers_meta))})
207
- docmeta['chunk_id'] = chunk_id
208
- docmeta['chunk_type'] = "document"
209
- yield Document(
210
- page_content=subchunk,
211
- metadata=docmeta
212
- )
213
- else:
214
- chunk_id += 1
215
- headers_meta = list(chunk.metadata.values())
216
- docmeta = copy(document.metadata)
217
- docmeta.update({"headers": "; ".join(str(headers_meta))})
218
- docmeta['chunk_id'] = chunk_id
219
- docmeta['chunk_type'] = "document"
220
- yield Document(
221
- page_content=chunk.page_content,
222
- metadata=docmeta
223
- )
193
+ for chunk in loader.load():
194
+ yield Document(
195
+ page_content=sanitize_for_postgres(chunk.page_content),
196
+ metadata={**document.metadata, **chunk.metadata}
197
+ )
224
198
  finally:
225
199
  if temp_file_path and os.path.exists(temp_file_path):
226
- os.remove(temp_file_path)
200
+ os.remove(temp_file_path)
201
+
202
+ # FIXME copied from langchain_core/utils/strings.py of 0.3.74 version
203
+ # https://github.com/langchain-ai/langchain/pull/32157
204
+ # should be used from langchain_core.utils import sanitize_for_postgres once updated to newer version
205
+ def sanitize_for_postgres(text: str, replacement: str = "") -> str:
206
+ r"""Sanitize text by removing NUL bytes that are incompatible with PostgreSQL.
207
+ PostgreSQL text fields cannot contain NUL (0x00) bytes, which can cause
208
+ psycopg.DataError when inserting documents. This function removes or replaces
209
+ such characters to ensure compatibility.
210
+ Args:
211
+ text: The text to sanitize.
212
+ replacement: String to replace NUL bytes with. Defaults to empty string.
213
+ Returns:
214
+ str: The sanitized text with NUL bytes removed or replaced.
215
+ Example:
216
+ >>> sanitize_for_postgres("Hello\\x00world")
217
+ 'Helloworld'
218
+ >>> sanitize_for_postgres("Hello\\x00world", " ")
219
+ 'Hello world'
220
+ """
221
+ return text.replace("\x00", replacement)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.253
3
+ Version: 0.3.255
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -18,7 +18,7 @@ alita_sdk/configurations/postman.py,sha256=wEmbZxwJGKSmeOzNVgk4vWkme275m3PFfYu06
18
18
  alita_sdk/configurations/qtest.py,sha256=LHM6RXxs_iSwSUdBjNXXVvqiiehT9fkBESE-ECDukt0,695
19
19
  alita_sdk/configurations/rally.py,sha256=1rwYh7bVV3XXufWRuPbr3Gz6zVPnfbA42bJYvJYsY-o,1515
20
20
  alita_sdk/configurations/service_now.py,sha256=Y3EQx0DQmLDm0P7V997FV5DoPQprgJ3Mk-yJmE5rE3M,1196
21
- alita_sdk/configurations/slack.py,sha256=fiKs04brkESygJg2EB1p6Dj1mkvKIKyuEozaueL_KMM,1150
21
+ alita_sdk/configurations/slack.py,sha256=ppwfV7YMpkq-qU6YREK7EH8VmYBZ0EN_9WIwz3EZI-Q,1139
22
22
  alita_sdk/configurations/testrail.py,sha256=k0fPmHBIrWAfEKhrDdB9Rdirw-UFHFoXkRePyrsqcWI,725
23
23
  alita_sdk/configurations/xray.py,sha256=xbydsVMqGJYVrNmg6bCr3uMxXVEPFtEhPovgWX6-6_Y,1141
24
24
  alita_sdk/configurations/zephyr.py,sha256=ndqGYFy5OFxjoXB7DzC71rd5W6qGBGAlKMWoqT8TuNk,1653
@@ -26,7 +26,7 @@ alita_sdk/configurations/zephyr_enterprise.py,sha256=5W1QEcv62Y5Rk_kApI2QmOwvWZe
26
26
  alita_sdk/runtime/__init__.py,sha256=4W0UF-nl3QF2bvET5lnah4o24CoTwSoKXhuN0YnwvEE,828
27
27
  alita_sdk/runtime/clients/__init__.py,sha256=BdehU5GBztN1Qi1Wul0cqlU46FxUfMnI6Vq2Zd_oq1M,296
28
28
  alita_sdk/runtime/clients/artifact.py,sha256=H3pJAh5G-zWVyJ6YbqHGk4jA8U6HfacQduiTivpJZ3Y,3210
29
- alita_sdk/runtime/clients/client.py,sha256=HO5mSrrque9HaHdBmQVR639leBNTI1TFA0HYmXdfqLA,43187
29
+ alita_sdk/runtime/clients/client.py,sha256=irj2uTGdIQj8Wd1ZGdi5yDCFm_n9TiRhEhODJz4yI84,43493
30
30
  alita_sdk/runtime/clients/datasource.py,sha256=HAZovoQN9jBg0_-lIlGBQzb4FJdczPhkHehAiVG3Wx0,1020
31
31
  alita_sdk/runtime/clients/prompt.py,sha256=li1RG9eBwgNK_Qf0qUaZ8QNTmsncFrAL2pv3kbxZRZg,1447
32
32
  alita_sdk/runtime/langchain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -45,10 +45,11 @@ alita_sdk/runtime/langchain/document_loaders/AlitaBDDScenariosLoader.py,sha256=4
45
45
  alita_sdk/runtime/langchain/document_loaders/AlitaCSVLoader.py,sha256=3ne-a5qIkBuGL2pzIePxDr79n3RJhASbOdS5izYWDMg,2321
46
46
  alita_sdk/runtime/langchain/document_loaders/AlitaConfluenceLoader.py,sha256=NzpoL4C7UzyzLouTSL_xTQw70MitNt-WZz3Eyl7QkTA,8294
47
47
  alita_sdk/runtime/langchain/document_loaders/AlitaDirectoryLoader.py,sha256=fKezkgvIcLG7S2PVJp1a8sZd6C4XQKNZKAFC87DbQts,7003
48
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py,sha256=nHvXm5U5qa26FGRwl6YKCG7HGBV5erjqqyWowNWs7iI,5723
48
+ alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py,sha256=9hi5eHgDIfa9wBWqTuwMM6D6W64czrDTfZl_htooe8Y,5943
49
49
  alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py,sha256=YBFYikrOEITfIavU0Xu7BQSNvPCFKzcmbJ_VDeQ6KdI,3078
50
50
  alita_sdk/runtime/langchain/document_loaders/AlitaGitRepoLoader.py,sha256=5WXGcyHraSVj3ANHj_U6X4EDikoekrIYtS0Q_QqNIng,2608
51
51
  alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py,sha256=ogvCmpnS54-D7fP_sSkL1dnhHTmRSD-HA2FFrTNhDEo,6560
52
+ alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py,sha256=1mGZjltnqsSXkp1Jw-lQroyNFiCPpjb9ZbdoqOlqPeU,3354
52
53
  alita_sdk/runtime/langchain/document_loaders/AlitaJiraLoader.py,sha256=M2q3YThkps0yAZOjfoLcyE7qycVTYKcXEGtpmp0N6C8,10950
53
54
  alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py,sha256=elymFlVGiCkcrIY5FrLxbxnQ9jdt3PPV0yBJGF3pTFE,2858
54
55
  alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py,sha256=SKAAPo3DfMtRPxICKrPzlXXkC5RfaeiRj7lejLXTi7o,2337
@@ -56,7 +57,7 @@ alita_sdk/runtime/langchain/document_loaders/AlitaQtestLoader.py,sha256=CUVVnisx
56
57
  alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py,sha256=o0SRFPZ-VskltgThVRX80rT19qtB4gPzxED9SENTNWo,4145
57
58
  alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py,sha256=uNcV0En49_0u0RYB1sP1XfNspT2Xc5CacuJr9Jqv79Q,2972
58
59
  alita_sdk/runtime/langchain/document_loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
- alita_sdk/runtime/langchain/document_loaders/constants.py,sha256=Zu_TSxZzcsrJjLEfLSgPEmoZOn97hwI-lfKJR0JKJzk,4535
60
+ alita_sdk/runtime/langchain/document_loaders/constants.py,sha256=EL20rusYbnPk2zwOh8-gxSdaEuqThZJcqiyINXphxFw,4607
60
61
  alita_sdk/runtime/langchain/document_loaders/utils.py,sha256=9xghESf3axBbwxATyVuS0Yu-TWe8zWZnXgCD1ZVyNW0,2414
61
62
  alita_sdk/runtime/langchain/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
63
  alita_sdk/runtime/langchain/interfaces/kwextractor.py,sha256=kSJA9L8g8UArmHu7Bd9dIO0Rrq86JPUb8RYNlnN68FQ,3072
@@ -100,13 +101,13 @@ alita_sdk/runtime/tools/indexer_tool.py,sha256=whSLPevB4WD6dhh2JDXEivDmTvbjiMV1M
100
101
  alita_sdk/runtime/tools/llm.py,sha256=NsrsP-SblyxDdzgMCn9_OBUL0sUGDVS5yqer49V7ciE,15069
101
102
  alita_sdk/runtime/tools/loop.py,sha256=uds0WhZvwMxDVFI6MZHrcmMle637cQfBNg682iLxoJA,8335
102
103
  alita_sdk/runtime/tools/loop_output.py,sha256=U4hO9PCQgWlXwOq6jdmCGbegtAxGAPXObSxZQ3z38uk,8069
103
- alita_sdk/runtime/tools/mcp_server_tool.py,sha256=eI8QUt497xblwF4Zhbvi8wCg17yh2yoWjcw_AIzHwGE,2819
104
+ alita_sdk/runtime/tools/mcp_server_tool.py,sha256=trGraI8-AwdbNmTKMjfmlBxgTDMTE4-21heCVtd_lz0,4156
104
105
  alita_sdk/runtime/tools/pgvector_search.py,sha256=NN2BGAnq4SsDHIhUcFZ8d_dbEOM8QwB0UwpsWCYruXU,11692
105
106
  alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9PppM,741
106
107
  alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
107
108
  alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
108
- alita_sdk/runtime/tools/vectorstore.py,sha256=l5wfovwMNvS_RgW-ZHXCh8Cm8gauunRzP0NPkzmshcQ,33852
109
- alita_sdk/runtime/tools/vectorstore_base.py,sha256=OdJIJkjTmQ0BC-AzAOMP2phAcNATJ8gI5JoBWSSdpNU,27892
109
+ alita_sdk/runtime/tools/vectorstore.py,sha256=yl6FKJGVQDevftSkxWTkMbqjIskIFz69vXELdEGp9u4,34780
110
+ alita_sdk/runtime/tools/vectorstore_base.py,sha256=HFaNk_oBoeZWrQWBrvEsozajHqwjWxsV6RigkQyq-eQ,27586
110
111
  alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
111
112
  alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
112
113
  alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -118,8 +119,8 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
118
119
  alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
119
120
  alita_sdk/runtime/utils/utils.py,sha256=CpEl3LCeLbhzQySz08lkKPm7Auac6IiLF7WB8wmArMI,589
120
121
  alita_sdk/tools/__init__.py,sha256=ko5TToGYZFmBrho26DRAVvrkHWxQ2sfs8gVAASinYp8,10611
121
- alita_sdk/tools/base_indexer_toolkit.py,sha256=qQfMHzsQ2BfusKMV_DNiHOtZVheiQ4gBfy5JXjYi0UY,20231
122
- alita_sdk/tools/elitea_base.py,sha256=qXSrl0A8KxIuv6796bTkjPpxBm4WQ5zmpskIAwCFfC8,30394
122
+ alita_sdk/tools/base_indexer_toolkit.py,sha256=gOjE1igKyjG1LohMj0XMlj1IGaFp7eEEDqyEG6-xLmc,18405
123
+ alita_sdk/tools/elitea_base.py,sha256=Qrj8r71yffF6qmbwLtu5dz72LjitEjK8Me-8-TSfLVY,32694
123
124
  alita_sdk/tools/non_code_indexer_toolkit.py,sha256=v9uq1POE1fQKCd152mbqDtF-HSe0qoDj83k4E5LAkMI,1080
124
125
  alita_sdk/tools/ado/__init__.py,sha256=bArTObt5cqG1SkijKevWGbsIILHBA3aCStg8Q1jd69k,1243
125
126
  alita_sdk/tools/ado/utils.py,sha256=PTCludvaQmPLakF2EbCGy66Mro4-rjDtavVP-xcB2Wc,1252
@@ -129,7 +130,7 @@ alita_sdk/tools/ado/test_plan/__init__.py,sha256=4fEw_3cm4shuZ868HhAU-uMH3xNXPyb
129
130
  alita_sdk/tools/ado/test_plan/test_plan_wrapper.py,sha256=jQt8kFmdAzsopjByLTMiSnWtoqz_IUOmYkhPTVGeMnU,20265
130
131
  alita_sdk/tools/ado/wiki/__init__.py,sha256=uBKo_Meu2ZxMxcxGsMmvCXyplRE2um1_PIRvdYd37rM,5171
131
132
  alita_sdk/tools/ado/wiki/ado_wrapper.py,sha256=zg6wMRar1DTp-ZRlYaQifBEnpYmTrHXskTNPdrLdy8s,14759
132
- alita_sdk/tools/ado/work_item/__init__.py,sha256=coDedNL0pSPLjZ6VVK1UcqWo00zxe2T4XfVXt8bMho8,5383
133
+ alita_sdk/tools/ado/work_item/__init__.py,sha256=HNcdIMwTSNe-25_Pg-KmVVXTFci3vNa84tkTFkls36c,5373
133
134
  alita_sdk/tools/ado/work_item/ado_wrapper.py,sha256=gEywCL_kS0k1jWcDhsmYUybpIP08tH8go6CixLJGwT4,28409
134
135
  alita_sdk/tools/advanced_jira_mining/__init__.py,sha256=pUTzECqGvYaR5qWY3JPUhrImrZgc7pCXuqSe5eWIE80,4604
135
136
  alita_sdk/tools/advanced_jira_mining/data_mining_wrapper.py,sha256=nZPtuwVWp8VeHw1B8q9kdwf-6ZvHnlXTOGdcIMDkKpw,44211
@@ -193,7 +194,7 @@ alita_sdk/tools/chunkers/code/treesitter/treesitter_rs.py,sha256=LgKyNffBy30gIr8
193
194
  alita_sdk/tools/chunkers/code/treesitter/treesitter_ts.py,sha256=Qs1a_BBN296iZc5hh8UNF9sc0G0-A_XZVhP3Na1ZNDg,387
194
195
  alita_sdk/tools/chunkers/sematic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
195
196
  alita_sdk/tools/chunkers/sematic/base.py,sha256=bRHpCFbOy-KPe4HBGpegrvIhvOsd7sDRfmb06T8tSuU,349
196
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py,sha256=NZCZi0Xzi58Bm7-9LzwGoAhdNZhvUERb_sK1bNQpqCQ,2574
197
+ alita_sdk/tools/chunkers/sematic/markdown_chunker.py,sha256=HmAGKuIodnMcHl-kBwAb1NY0GKKwAskRFvGaW3m4HAM,3859
197
198
  alita_sdk/tools/chunkers/sematic/proposal_chunker.py,sha256=t8JjX9TH6yHXXaemiDK1E6000tlES2Kl8XfyezmlIoo,5116
198
199
  alita_sdk/tools/chunkers/sematic/statistical_chunker.py,sha256=VDQcMC-ky72GqdWJiHMmcRmfJTTU5XglBF1IWg2Qews,13403
199
200
  alita_sdk/tools/cloud/__init__.py,sha256=ekqANTJAyuURqpjNTn6MmSn2q6qEKwENxEXBUFGkkck,512
@@ -211,8 +212,8 @@ alita_sdk/tools/code/linter/api_wrapper.py,sha256=wylpwhAw02Jt8L18CqBq2He5PbwIkx
211
212
  alita_sdk/tools/code/loaders/codesearcher.py,sha256=XoXXZtIQZhvjIwZlnl_4wVGHC-3saYzFo5oDR_Zh3EY,529
212
213
  alita_sdk/tools/code/sonar/__init__.py,sha256=u8wpgXJ_shToLl3G9-XEtGDor5dhmsnurIImh1-e-U0,3165
213
214
  alita_sdk/tools/code/sonar/api_wrapper.py,sha256=nNqxcWN_6W8c0ckj-Er9HkNuAdgQLoWBXh5UyzNutis,2653
214
- alita_sdk/tools/confluence/__init__.py,sha256=xLsxdBZ62NL0k9NxaV4KnspwmDcucQzcl-tAaz7eLB8,6562
215
- alita_sdk/tools/confluence/api_wrapper.py,sha256=4WqjVeFWyFeb4-VD5v4_J69pbyjire4Op7cBSKU9EXw,85057
215
+ alita_sdk/tools/confluence/__init__.py,sha256=ClK6fuJr5rsgDhLLA2Ci3zJdBSP3liyUpHML9oQqKFs,6804
216
+ alita_sdk/tools/confluence/api_wrapper.py,sha256=-wQduJUk2wwfBQGQHSWuCnrt35gfp195nSKUFVSPS1s,85218
216
217
  alita_sdk/tools/confluence/loader.py,sha256=4bf5qrJMEiJzuZp2NlxO2XObLD1w7fxss_WyMUpe8sg,9290
217
218
  alita_sdk/tools/confluence/utils.py,sha256=Lxo6dBD0OlvM4o0JuK6qeB_4LV9BptiwJA9e1vqNcDw,435
218
219
  alita_sdk/tools/custom_open_api/__init__.py,sha256=9aT5SPNPWcJC6jMZEM-3rUCXVULj_3-qJLQKmnreKNo,2537
@@ -298,7 +299,7 @@ alita_sdk/tools/servicenow/__init__.py,sha256=hReiTp8yv07eR0O_1KJThzUO2xhWhIWcjU
298
299
  alita_sdk/tools/servicenow/api_wrapper.py,sha256=WpH-bBLGFdhehs4g-K-WAkNuaD1CSrwsDpdgB3RG53s,6120
299
300
  alita_sdk/tools/servicenow/servicenow_client.py,sha256=Rdqfu-ll-qbnclMzChLZBsfXRDzgoX_FdeI2WLApWxc,3269
300
301
  alita_sdk/tools/sharepoint/__init__.py,sha256=Mofg_N-7zFf5mKm3_0D0dhC_H0MX-bk3YQ5Sl3oXokg,4114
301
- alita_sdk/tools/sharepoint/api_wrapper.py,sha256=TSdKZuLnn3uSkaNuYb7a2xG4w4sQzXbzOO3c8tIlFds,9259
302
+ alita_sdk/tools/sharepoint/api_wrapper.py,sha256=Hcd9YypWMr3upDVJHRxUyPdN4k8joqRQOc_uce2ek1A,9250
302
303
  alita_sdk/tools/sharepoint/authorization_helper.py,sha256=n-nL5dlBoLMK70nHu7P2RYCb8C6c9HMA_gEaw8LxuhE,2007
303
304
  alita_sdk/tools/sharepoint/utils.py,sha256=fZ1YzAu5CTjKSZeslowpOPH974902S8vCp1Wu7L44LM,446
304
305
  alita_sdk/tools/slack/__init__.py,sha256=o8BnDMWGC5qA8pVIyIiflM6T__dZ6qAE1UdtJcvmaxk,3901
@@ -309,9 +310,9 @@ alita_sdk/tools/sql/models.py,sha256=AKJgSl_kEEz4fZfw3kbvdGHXaRZ-yiaqfJOB6YOj3i0
309
310
  alita_sdk/tools/testio/__init__.py,sha256=qi12wyJXN02hrUXg08CbijcCL5pi30JMbJfiXjn1Zr0,2646
310
311
  alita_sdk/tools/testio/api_wrapper.py,sha256=BvmL5h634BzG6p7ajnQLmj-uoAw1gjWnd4FHHu1h--Q,21638
311
312
  alita_sdk/tools/testrail/__init__.py,sha256=0kETjWKLU7R6mugBWsjwEUsh10pipbAeNSGJAO0FBh0,4634
312
- alita_sdk/tools/testrail/api_wrapper.py,sha256=K-Gc42RH2z-fK4cXi8zQq3s9A4v_pCJkRB3XKLAhypc,32056
313
+ alita_sdk/tools/testrail/api_wrapper.py,sha256=5T-QyTzt-J0rI32xc_E684lCdgyWeHSyeTYiwQwtGyg,32275
313
314
  alita_sdk/tools/utils/__init__.py,sha256=155xepXPr4OEzs2Mz5YnjXcBpxSv1X2eznRUVoPtyK0,3268
314
- alita_sdk/tools/utils/content_parser.py,sha256=0HKQqGTdXHKlcz72GHEwXqLXJsRYXm35F-P1KZz0sNc,10351
315
+ alita_sdk/tools/utils/content_parser.py,sha256=zqeyuxZqZqVFq5M5sZM-falMdlOw48FyZnp3Z0XUpCw,9868
315
316
  alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=a6FAsiix_EvATIKUf5YT6vHh5LDyJ5uSP3LJqoxFo04,17367
316
317
  alita_sdk/tools/vector_adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
317
318
  alita_sdk/tools/xray/__init__.py,sha256=GGpbiBdDQ9kMFqJEHYi7XwKpkuMMHi-ZF-IM8yFIgUM,4380
@@ -333,8 +334,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=JAeWf-RXohsxheUpT0iMDClc_izj-
333
334
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
334
335
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
335
336
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
336
- alita_sdk-0.3.253.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
337
- alita_sdk-0.3.253.dist-info/METADATA,sha256=sOv_LdDPyuyBm4c-1hfZH1XG_V5-MeUIDuTJgmDX8Hk,18897
338
- alita_sdk-0.3.253.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
339
- alita_sdk-0.3.253.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
340
- alita_sdk-0.3.253.dist-info/RECORD,,
337
+ alita_sdk-0.3.255.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
338
+ alita_sdk-0.3.255.dist-info/METADATA,sha256=U2ck9IqpmmWxni_szIR0vV7aZZpPr9HUKUexI2HQb44,18897
339
+ alita_sdk-0.3.255.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
340
+ alita_sdk-0.3.255.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
341
+ alita_sdk-0.3.255.dist-info/RECORD,,