alita-sdk 0.3.376__py3-none-any.whl → 0.3.423__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/configurations/bitbucket.py +95 -0
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/client.py +3 -2
- alita_sdk/runtime/clients/sandbox_client.py +8 -0
- alita_sdk/runtime/langchain/assistant.py +41 -38
- alita_sdk/runtime/langchain/constants.py +4 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
- alita_sdk/runtime/langchain/langraph_agent.py +88 -27
- alita_sdk/runtime/langchain/utils.py +24 -4
- alita_sdk/runtime/toolkits/application.py +8 -1
- alita_sdk/runtime/toolkits/tools.py +80 -49
- alita_sdk/runtime/tools/__init__.py +7 -2
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/function.py +20 -28
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +104 -8
- alita_sdk/runtime/tools/llm.py +146 -114
- alita_sdk/runtime/tools/sandbox.py +166 -63
- alita_sdk/runtime/tools/vectorstore.py +3 -2
- alita_sdk/runtime/tools/vectorstore_base.py +4 -3
- alita_sdk/runtime/utils/utils.py +1 -0
- alita_sdk/tools/__init__.py +43 -31
- alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
- alita_sdk/tools/base_indexer_toolkit.py +75 -66
- alita_sdk/tools/code_indexer_toolkit.py +13 -3
- alita_sdk/tools/confluence/api_wrapper.py +29 -7
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/elitea_base.py +7 -7
- alita_sdk/tools/gitlab/api_wrapper.py +8 -9
- alita_sdk/tools/jira/api_wrapper.py +1 -1
- alita_sdk/tools/openapi/__init__.py +10 -1
- alita_sdk/tools/qtest/api_wrapper.py +298 -51
- alita_sdk/tools/sharepoint/api_wrapper.py +104 -33
- alita_sdk/tools/sharepoint/authorization_helper.py +175 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/utils/content_parser.py +27 -16
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +19 -6
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.423.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.423.dist-info}/RECORD +50 -50
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.423.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.423.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.423.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,6 @@ from typing import Any, Optional, List, Dict, Generator
|
|
|
7
7
|
from langchain_core.documents import Document
|
|
8
8
|
from pydantic import create_model, Field, SecretStr
|
|
9
9
|
|
|
10
|
-
from .utils import make_json_serializable
|
|
11
10
|
from .utils.content_parser import file_extension_by_chunker, process_document_by_type
|
|
12
11
|
from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
|
|
13
12
|
from ..runtime.langchain.document_loaders.constants import loaders_allowed_to_override
|
|
@@ -111,7 +110,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
111
110
|
def __init__(self, **kwargs):
|
|
112
111
|
conn = kwargs.get('connection_string', None)
|
|
113
112
|
connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
|
|
114
|
-
collection_name = kwargs.get('
|
|
113
|
+
collection_name = kwargs.get('collection_schema')
|
|
115
114
|
|
|
116
115
|
if 'vectorstore_type' not in kwargs:
|
|
117
116
|
kwargs['vectorstore_type'] = 'PGVector'
|
|
@@ -152,39 +151,45 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
152
151
|
|
|
153
152
|
def index_data(self, **kwargs):
|
|
154
153
|
index_name = kwargs.get("index_name")
|
|
155
|
-
progress_step = kwargs.get("progress_step")
|
|
156
154
|
clean_index = kwargs.get("clean_index")
|
|
157
155
|
chunking_tool = kwargs.get("chunking_tool")
|
|
158
156
|
chunking_config = kwargs.get("chunking_config")
|
|
157
|
+
result = {"count": 0}
|
|
159
158
|
#
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
159
|
+
try:
|
|
160
|
+
if clean_index:
|
|
161
|
+
self._clean_index(index_name)
|
|
162
|
+
#
|
|
163
|
+
self.index_meta_init(index_name, kwargs)
|
|
164
|
+
#
|
|
165
|
+
self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
|
|
166
|
+
self._log_tool_event(f"Loading the documents to index...{kwargs}")
|
|
167
|
+
documents = self._base_loader(**kwargs)
|
|
168
|
+
documents = list(documents) # consume/exhaust generator to count items
|
|
169
|
+
documents_count = len(documents)
|
|
170
|
+
documents = (doc for doc in documents)
|
|
171
|
+
self._log_tool_event(f"Base documents were pre-loaded. "
|
|
172
|
+
f"Search for possible document duplicates and remove them from the indexing list...")
|
|
173
|
+
documents = self._reduce_duplicates(documents, index_name)
|
|
174
|
+
self._log_tool_event(f"Duplicates were removed. "
|
|
175
|
+
f"Processing documents to collect dependencies and prepare them for indexing...")
|
|
176
|
+
self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
|
|
177
|
+
#
|
|
178
|
+
results_count = result["count"]
|
|
179
|
+
self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
|
|
180
|
+
#
|
|
181
|
+
return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
|
|
182
|
+
else "no new documents to index"}
|
|
183
|
+
except Exception as e:
|
|
184
|
+
self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
|
|
185
|
+
raise e
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, result, index_name: Optional[str] = None):
|
|
183
189
|
self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
|
|
184
190
|
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
185
191
|
#
|
|
186
192
|
base_doc_counter = 0
|
|
187
|
-
total_counter = 0
|
|
188
193
|
pg_vector_add_docs_chunk = []
|
|
189
194
|
for base_doc in base_documents:
|
|
190
195
|
base_doc_counter += 1
|
|
@@ -232,10 +237,9 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
232
237
|
msg = f"Indexed base document #{base_doc_counter} out of {base_total} (with {dependent_docs_counter} dependencies)."
|
|
233
238
|
logger.debug(msg)
|
|
234
239
|
self._log_tool_event(msg)
|
|
235
|
-
|
|
240
|
+
result["count"] += dependent_docs_counter
|
|
236
241
|
if pg_vector_add_docs_chunk:
|
|
237
242
|
add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
|
|
238
|
-
return total_counter
|
|
239
243
|
|
|
240
244
|
def _apply_loaders_chunkers(self, documents: Generator[Document, None, None], chunking_tool: str=None, chunking_config=None) -> Generator[Document, None, None]:
|
|
241
245
|
from ..tools.chunkers import __all__ as chunkers
|
|
@@ -343,7 +347,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
343
347
|
"""Cleans the indexed data in the collection."""
|
|
344
348
|
super()._clean_collection(index_name=index_name)
|
|
345
349
|
return (f"Collection '{index_name}' has been removed from the vector store.\n"
|
|
346
|
-
f"Available collections: {self.
|
|
350
|
+
f"Available collections: {self.list_collections()}") if index_name \
|
|
347
351
|
else "All collections have been removed from the vector store."
|
|
348
352
|
|
|
349
353
|
def _build_collection_filter(self, filter: dict | str, index_name: str = "") -> dict:
|
|
@@ -385,7 +389,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
385
389
|
""" Searches indexed documents in the vector store."""
|
|
386
390
|
# build filter on top of index_name
|
|
387
391
|
|
|
388
|
-
available_collections = super().
|
|
392
|
+
available_collections = super().list_collections()
|
|
389
393
|
if index_name and index_name not in available_collections:
|
|
390
394
|
return f"Collection '{index_name}' not found. Available collections: {available_collections}"
|
|
391
395
|
|
|
@@ -454,37 +458,28 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
454
458
|
)
|
|
455
459
|
|
|
456
460
|
def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
else:
|
|
480
|
-
history = []
|
|
481
|
-
new_history_item = {k: v for k, v in index_meta_raw.get("metadata", {}).items() if k != "history"}
|
|
482
|
-
history.append(new_history_item)
|
|
483
|
-
metadata["history"] = json.dumps(history)
|
|
484
|
-
index_meta_ids = [index_meta_raw.get("id")]
|
|
485
|
-
#
|
|
486
|
-
index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
|
|
487
|
-
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
|
|
461
|
+
index_meta = super().get_index_meta(index_name)
|
|
462
|
+
if not index_meta:
|
|
463
|
+
self._log_tool_event(
|
|
464
|
+
f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
|
|
465
|
+
tool_name="index_data"
|
|
466
|
+
)
|
|
467
|
+
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
468
|
+
created_on = time.time()
|
|
469
|
+
metadata = {
|
|
470
|
+
"collection": index_name,
|
|
471
|
+
"type": IndexerKeywords.INDEX_META_TYPE.value,
|
|
472
|
+
"indexed": 0,
|
|
473
|
+
"state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
|
|
474
|
+
"index_configuration": index_configuration,
|
|
475
|
+
"created_on": created_on,
|
|
476
|
+
"updated_on": created_on,
|
|
477
|
+
"task_id": None,
|
|
478
|
+
"conversation_id": None,
|
|
479
|
+
}
|
|
480
|
+
metadata["history"] = json.dumps([metadata])
|
|
481
|
+
index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
|
|
482
|
+
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
|
|
488
483
|
|
|
489
484
|
def index_meta_update(self, index_name: str, state: str, result: int):
|
|
490
485
|
index_meta_raw = super().get_index_meta(index_name)
|
|
@@ -495,6 +490,20 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
495
490
|
metadata["indexed"] = result
|
|
496
491
|
metadata["state"] = state
|
|
497
492
|
metadata["updated_on"] = time.time()
|
|
493
|
+
#
|
|
494
|
+
history_raw = metadata.pop("history", "[]")
|
|
495
|
+
try:
|
|
496
|
+
history = json.loads(history_raw) if history_raw.strip() else []
|
|
497
|
+
# replace the last history item with updated metadata
|
|
498
|
+
if history and isinstance(history, list):
|
|
499
|
+
history[-1] = metadata
|
|
500
|
+
else:
|
|
501
|
+
history = [metadata]
|
|
502
|
+
except (json.JSONDecodeError, TypeError):
|
|
503
|
+
logger.warning(f"Failed to load index history: {history_raw}. Create new with only current item.")
|
|
504
|
+
history = [metadata]
|
|
505
|
+
#
|
|
506
|
+
metadata["history"] = json.dumps(history)
|
|
498
507
|
index_meta_doc = Document(page_content=index_meta_raw.get("content", ""), metadata=metadata)
|
|
499
508
|
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=[index_meta_raw.get("id")])
|
|
500
509
|
|
|
@@ -547,10 +556,10 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
547
556
|
"args_schema": RemoveIndexParams
|
|
548
557
|
},
|
|
549
558
|
{
|
|
550
|
-
"name": "
|
|
551
|
-
"mode": "
|
|
552
|
-
"ref": self.
|
|
553
|
-
"description": self.
|
|
559
|
+
"name": "list_collections",
|
|
560
|
+
"mode": "list_collections",
|
|
561
|
+
"ref": self.list_collections,
|
|
562
|
+
"description": self.list_collections.__doc__,
|
|
554
563
|
"args_schema": create_model("ListCollectionsParams") # No parameters
|
|
555
564
|
},
|
|
556
565
|
]
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import ast
|
|
2
2
|
import fnmatch
|
|
3
|
+
import json
|
|
3
4
|
import logging
|
|
4
5
|
from typing import Optional, List, Generator
|
|
5
6
|
|
|
@@ -21,7 +22,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
21
22
|
return self.vector_adapter.get_code_indexed_data(self, index_name)
|
|
22
23
|
|
|
23
24
|
def key_fn(self, document: Document):
|
|
24
|
-
return document.metadata.get(
|
|
25
|
+
return document.metadata.get("filename")
|
|
25
26
|
|
|
26
27
|
def compare_fn(self, document: Document, idx_data):
|
|
27
28
|
return (document.metadata.get('commit_hash') and
|
|
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
46
47
|
)
|
|
47
48
|
|
|
48
49
|
def _extend_data(self, documents: Generator[Document, None, None]):
|
|
49
|
-
yield from
|
|
50
|
+
yield from documents
|
|
50
51
|
|
|
51
52
|
def _index_tool_params(self):
|
|
52
53
|
"""Return the parameters for indexing data."""
|
|
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
117
118
|
if not file_content:
|
|
118
119
|
# empty file, skip
|
|
119
120
|
continue
|
|
121
|
+
#
|
|
122
|
+
# ensure file content is a string
|
|
123
|
+
if isinstance(file_content, bytes):
|
|
124
|
+
file_content = file_content.decode("utf-8", errors="ignore")
|
|
125
|
+
elif isinstance(file_content, dict) and file.endswith('.json'):
|
|
126
|
+
file_content = json.dumps(file_content)
|
|
127
|
+
elif not isinstance(file_content, str):
|
|
128
|
+
file_content = str(file_content)
|
|
129
|
+
#
|
|
120
130
|
# hash the file content to ensure uniqueness
|
|
121
131
|
import hashlib
|
|
122
132
|
file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
|
|
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
127
137
|
self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
|
|
128
138
|
self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
|
|
129
139
|
|
|
130
|
-
return file_content_generator()
|
|
140
|
+
return parse_code_files_for_db(file_content_generator())
|
|
131
141
|
|
|
132
142
|
def __handle_get_files(self, path: str, branch: str):
|
|
133
143
|
"""
|
|
@@ -7,12 +7,14 @@ from json import JSONDecodeError
|
|
|
7
7
|
from typing import Optional, List, Any, Dict, Callable, Generator, Literal
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
|
+
from atlassian.errors import ApiError
|
|
10
11
|
from langchain_community.document_loaders.confluence import ContentFormat
|
|
11
12
|
from langchain_core.documents import Document
|
|
12
13
|
from langchain_core.messages import HumanMessage
|
|
13
14
|
from langchain_core.tools import ToolException
|
|
14
15
|
from markdownify import markdownify
|
|
15
16
|
from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
|
|
17
|
+
from requests import HTTPError
|
|
16
18
|
from tenacity import retry, stop_after_attempt, wait_exponential, before_sleep_log
|
|
17
19
|
|
|
18
20
|
from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
|
|
@@ -194,6 +196,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
194
196
|
keep_markdown_format: Optional[bool] = True
|
|
195
197
|
ocr_languages: Optional[str] = None
|
|
196
198
|
keep_newlines: Optional[bool] = True
|
|
199
|
+
_errors: Optional[list[str]] = None
|
|
197
200
|
_image_cache: ImageDescriptionCache = PrivateAttr(default_factory=ImageDescriptionCache)
|
|
198
201
|
|
|
199
202
|
@model_validator(mode='before')
|
|
@@ -498,7 +501,9 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
498
501
|
restrictions = self.client.get_all_restrictions_for_content(page["id"])
|
|
499
502
|
|
|
500
503
|
return (
|
|
501
|
-
page["status"] == "current"
|
|
504
|
+
(page["status"] == "current"
|
|
505
|
+
# allow user to see archived content if needed
|
|
506
|
+
or page["status"] == "archived")
|
|
502
507
|
and not restrictions["read"]["restrictions"]["user"]["results"]
|
|
503
508
|
and not restrictions["read"]["restrictions"]["group"]["results"]
|
|
504
509
|
)
|
|
@@ -518,18 +523,35 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
518
523
|
),
|
|
519
524
|
before_sleep=before_sleep_log(logger, logging.WARNING),
|
|
520
525
|
)(self.client.get_page_by_id)
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
+
try:
|
|
527
|
+
page = get_page(
|
|
528
|
+
page_id=page_id, expand=f"{self.content_format.value},version"
|
|
529
|
+
)
|
|
530
|
+
except (ApiError, HTTPError) as e:
|
|
531
|
+
logger.error(f"Error fetching page with ID {page_id}: {e}")
|
|
532
|
+
page_content_temp = f"Confluence API Error: cannot fetch the page with ID {page_id}: {e}"
|
|
533
|
+
# store errors
|
|
534
|
+
if self._errors is None:
|
|
535
|
+
self._errors = []
|
|
536
|
+
self._errors.append(page_content_temp)
|
|
537
|
+
return Document(page_content=page_content_temp,
|
|
538
|
+
metadata={})
|
|
539
|
+
# TODO: update on toolkit advanced settings level as a separate feature
|
|
540
|
+
# if not self.include_restricted_content and not self.is_public_page(page):
|
|
541
|
+
# continue
|
|
526
542
|
yield self.process_page(page, skip_images)
|
|
527
543
|
|
|
544
|
+
def _log_errors(self):
|
|
545
|
+
""" Log errors encountered during toolkit execution. """
|
|
546
|
+
if self._errors:
|
|
547
|
+
logger.info(f"Errors encountered during toolkit execution: {self._errors}")
|
|
548
|
+
|
|
528
549
|
def read_page_by_id(self, page_id: str, skip_images: bool = False):
|
|
529
550
|
"""Reads a page by its id in the Confluence space. If id is not available, but there is a title - use get_page_id first."""
|
|
530
551
|
result = list(self.get_pages_by_id([page_id], skip_images))
|
|
531
552
|
if not result:
|
|
532
|
-
"
|
|
553
|
+
return f"Pages not found. Errors: {self._errors}" if self._errors \
|
|
554
|
+
else "Pages not found or you do not have access to them."
|
|
533
555
|
return result[0].page_content
|
|
534
556
|
# return self._strip_base64_images(result[0].page_content) if skip_images else result[0].page_content
|
|
535
557
|
|
|
@@ -3,6 +3,7 @@ from typing import Optional, List
|
|
|
3
3
|
from logging import getLogger
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
|
+
from langchain_core.documents import Document
|
|
6
7
|
|
|
7
8
|
logger = getLogger(__name__)
|
|
8
9
|
from PIL import Image
|
|
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
|
|
|
193
194
|
else:
|
|
194
195
|
return super().process_image(link, ocr_languages)
|
|
195
196
|
|
|
197
|
+
def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
|
|
198
|
+
content_format: ContentFormat, ocr_languages: Optional[str] = None,
|
|
199
|
+
keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
|
|
200
|
+
if not page.get("title"):
|
|
201
|
+
# if 'include_restricted_content' set to True, draft pages are loaded and can have no title
|
|
202
|
+
page["title"] = "Untitled"
|
|
203
|
+
return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
|
|
204
|
+
ocr_languages, keep_markdown_format, keep_newlines)
|
|
205
|
+
|
|
196
206
|
# TODO review usage
|
|
197
207
|
# def process_svg(
|
|
198
208
|
# self,
|
alita_sdk/tools/elitea_base.py
CHANGED
|
@@ -17,7 +17,7 @@ from ..runtime.utils.utils import IndexerKeywords
|
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
|
-
INDEX_TOOL_NAMES = ['index_data', 'remove_index', '
|
|
20
|
+
INDEX_TOOL_NAMES = ['index_data', 'remove_index', 'list_collections', 'search_index', 'stepback_search_index',
|
|
21
21
|
'stepback_summary_index']
|
|
22
22
|
|
|
23
23
|
LoaderSchema = create_model(
|
|
@@ -403,9 +403,9 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
403
403
|
"""Cleans the indexed data in the collection."""
|
|
404
404
|
self._init_vector_store()._clean_collection(index_name=index_name)
|
|
405
405
|
return (f"Collection '{index_name}' has been removed from the vector store.\n"
|
|
406
|
-
f"Available collections: {self.
|
|
406
|
+
f"Available collections: {self.list_collections()}")
|
|
407
407
|
|
|
408
|
-
def
|
|
408
|
+
def list_collections(self):
|
|
409
409
|
"""Lists all collections in the vector store."""
|
|
410
410
|
vectorstore_wrapper = self._init_vector_store()
|
|
411
411
|
return vectorstore_wrapper.list_collections()
|
|
@@ -537,10 +537,10 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
537
537
|
"args_schema": RemoveIndexParams
|
|
538
538
|
},
|
|
539
539
|
{
|
|
540
|
-
"name": "
|
|
541
|
-
"mode": "
|
|
542
|
-
"ref": self.
|
|
543
|
-
"description": self.
|
|
540
|
+
"name": "list_collections",
|
|
541
|
+
"mode": "list_collections",
|
|
542
|
+
"ref": self.list_collections,
|
|
543
|
+
"description": self.list_collections.__doc__,
|
|
544
544
|
"args_schema": create_model("ListCollectionsParams") # No parameters
|
|
545
545
|
},
|
|
546
546
|
|
|
@@ -115,9 +115,8 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
|
|
|
115
115
|
"""Remove trailing slash from URL if present."""
|
|
116
116
|
return url.rstrip('/') if url else url
|
|
117
117
|
|
|
118
|
-
@model_validator(mode='
|
|
119
|
-
|
|
120
|
-
def validate_toolkit(cls, values: Dict) -> Dict:
|
|
118
|
+
@model_validator(mode='after')
|
|
119
|
+
def validate_toolkit(self):
|
|
121
120
|
try:
|
|
122
121
|
import gitlab
|
|
123
122
|
except ImportError:
|
|
@@ -125,17 +124,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
|
|
|
125
124
|
"python-gitlab is not installed. "
|
|
126
125
|
"Please install it with `pip install python-gitlab`"
|
|
127
126
|
)
|
|
128
|
-
|
|
127
|
+
self.repository = self._sanitize_url(self.repository)
|
|
129
128
|
g = gitlab.Gitlab(
|
|
130
|
-
url=
|
|
131
|
-
private_token=
|
|
129
|
+
url=self._sanitize_url(self.url),
|
|
130
|
+
private_token=self.private_token.get_secret_value(),
|
|
132
131
|
keep_base_url=True,
|
|
133
132
|
)
|
|
134
133
|
|
|
135
134
|
g.auth()
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
return
|
|
135
|
+
self._git = g
|
|
136
|
+
self._active_branch = self.branch
|
|
137
|
+
return self
|
|
139
138
|
|
|
140
139
|
@property
|
|
141
140
|
def repo_instance(self):
|
|
@@ -563,7 +563,7 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
|
|
|
563
563
|
Use the appropriate issue link type (e.g., "Test", "Relates", "Blocks").
|
|
564
564
|
If we use "Test" linktype, the test is inward issue, the story/other issue is outward issue.."""
|
|
565
565
|
|
|
566
|
-
comment = "
|
|
566
|
+
comment = f"Issue {inward_issue_key} was linked to {outward_issue_key}."
|
|
567
567
|
comment_body = {"content": [{"content": [{"text": comment,"type": "text"}],"type": "paragraph"}],"type": "doc","version": 1} if self.api_version == "3" else comment
|
|
568
568
|
link_data = {
|
|
569
569
|
"type": {"name": f"{linktype}"},
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import re
|
|
3
3
|
import logging
|
|
4
|
+
import yaml
|
|
4
5
|
from typing import List, Any, Optional, Dict
|
|
5
6
|
from langchain_core.tools import BaseTool, BaseToolkit, ToolException
|
|
6
7
|
from requests_openapi import Operation, Client, Server
|
|
@@ -101,7 +102,15 @@ class AlitaOpenAPIToolkit(BaseToolkit):
|
|
|
101
102
|
else:
|
|
102
103
|
tools_set = {}
|
|
103
104
|
if isinstance(openapi_spec, str):
|
|
104
|
-
|
|
105
|
+
# Try to detect if it's YAML or JSON by attempting to parse as JSON first
|
|
106
|
+
try:
|
|
107
|
+
openapi_spec = json.loads(openapi_spec)
|
|
108
|
+
except json.JSONDecodeError:
|
|
109
|
+
# If JSON parsing fails, try YAML
|
|
110
|
+
try:
|
|
111
|
+
openapi_spec = yaml.safe_load(openapi_spec)
|
|
112
|
+
except yaml.YAMLError as e:
|
|
113
|
+
raise ToolException(f"Failed to parse OpenAPI spec as JSON or YAML: {e}")
|
|
105
114
|
c = Client()
|
|
106
115
|
c.load_spec(openapi_spec)
|
|
107
116
|
if headers:
|