alita-sdk 0.3.379__py3-none-any.whl → 0.3.462__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent_executor.py +144 -0
- alita_sdk/cli/agent_loader.py +197 -0
- alita_sdk/cli/agent_ui.py +166 -0
- alita_sdk/cli/agents.py +1069 -0
- alita_sdk/cli/callbacks.py +576 -0
- alita_sdk/cli/cli.py +159 -0
- alita_sdk/cli/config.py +153 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +330 -0
- alita_sdk/cli/toolkit_loader.py +55 -0
- alita_sdk/cli/tools/__init__.py +9 -0
- alita_sdk/cli/tools/filesystem.py +905 -0
- alita_sdk/configurations/bitbucket.py +95 -0
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/client.py +47 -10
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +8 -0
- alita_sdk/runtime/langchain/assistant.py +37 -16
- alita_sdk/runtime/langchain/constants.py +6 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
- alita_sdk/runtime/langchain/langraph_agent.py +146 -31
- alita_sdk/runtime/langchain/utils.py +39 -7
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/toolkits/__init__.py +24 -0
- alita_sdk/runtime/toolkits/application.py +8 -1
- alita_sdk/runtime/toolkits/artifact.py +5 -6
- alita_sdk/runtime/toolkits/mcp.py +895 -0
- alita_sdk/runtime/toolkits/tools.py +137 -56
- alita_sdk/runtime/tools/__init__.py +7 -2
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/function.py +29 -25
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +104 -8
- alita_sdk/runtime/tools/llm.py +204 -114
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +166 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
- alita_sdk/runtime/tools/sandbox.py +57 -43
- alita_sdk/runtime/tools/vectorstore.py +2 -1
- alita_sdk/runtime/tools/vectorstore_base.py +19 -3
- alita_sdk/runtime/utils/mcp_oauth.py +164 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
- alita_sdk/runtime/utils/streamlit.py +34 -3
- alita_sdk/runtime/utils/toolkit_utils.py +14 -4
- alita_sdk/tools/__init__.py +46 -31
- alita_sdk/tools/ado/repos/__init__.py +1 -0
- alita_sdk/tools/ado/test_plan/__init__.py +1 -1
- alita_sdk/tools/ado/wiki/__init__.py +1 -5
- alita_sdk/tools/ado/work_item/__init__.py +1 -5
- alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
- alita_sdk/tools/base_indexer_toolkit.py +105 -43
- alita_sdk/tools/bitbucket/__init__.py +1 -0
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/code/sonar/__init__.py +1 -1
- alita_sdk/tools/code_indexer_toolkit.py +13 -3
- alita_sdk/tools/confluence/__init__.py +2 -2
- alita_sdk/tools/confluence/api_wrapper.py +29 -7
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/github/__init__.py +2 -2
- alita_sdk/tools/gitlab/__init__.py +2 -1
- alita_sdk/tools/gitlab/api_wrapper.py +11 -7
- alita_sdk/tools/gitlab_org/__init__.py +1 -2
- alita_sdk/tools/google_places/__init__.py +2 -1
- alita_sdk/tools/jira/__init__.py +1 -0
- alita_sdk/tools/jira/api_wrapper.py +1 -1
- alita_sdk/tools/memory/__init__.py +1 -1
- alita_sdk/tools/openapi/__init__.py +10 -1
- alita_sdk/tools/pandas/__init__.py +1 -1
- alita_sdk/tools/postman/__init__.py +2 -1
- alita_sdk/tools/pptx/__init__.py +2 -2
- alita_sdk/tools/qtest/__init__.py +3 -3
- alita_sdk/tools/qtest/api_wrapper.py +1708 -76
- alita_sdk/tools/rally/__init__.py +1 -2
- alita_sdk/tools/report_portal/__init__.py +1 -0
- alita_sdk/tools/salesforce/__init__.py +1 -0
- alita_sdk/tools/servicenow/__init__.py +2 -3
- alita_sdk/tools/sharepoint/__init__.py +1 -0
- alita_sdk/tools/sharepoint/api_wrapper.py +125 -34
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +1 -0
- alita_sdk/tools/sql/__init__.py +2 -1
- alita_sdk/tools/testio/__init__.py +1 -0
- alita_sdk/tools/testrail/__init__.py +1 -3
- alita_sdk/tools/utils/content_parser.py +27 -16
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +18 -5
- alita_sdk/tools/xray/__init__.py +2 -1
- alita_sdk/tools/zephyr/__init__.py +2 -1
- alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
- alita_sdk/tools/zephyr_essential/__init__.py +1 -0
- alita_sdk/tools/zephyr_scale/__init__.py +1 -0
- alita_sdk/tools/zephyr_squad/__init__.py +1 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/METADATA +8 -2
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/RECORD +110 -86
- alita_sdk-0.3.462.dist-info/entry_points.txt +2 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/top_level.txt +0 -0
|
@@ -23,11 +23,6 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
|
|
|
23
23
|
AzureDevOpsWorkItemsToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
24
24
|
m = create_model(
|
|
25
25
|
name,
|
|
26
|
-
name=(str, Field(description="Toolkit name",
|
|
27
|
-
json_schema_extra={
|
|
28
|
-
'toolkit_name': True,
|
|
29
|
-
'max_toolkit_length': AzureDevOpsWorkItemsToolkit.toolkit_max_length})
|
|
30
|
-
),
|
|
31
26
|
ado_configuration=(AdoConfiguration, Field(description="Ado Work Item configuration", json_schema_extra={'configuration_types': ['ado']})),
|
|
32
27
|
limit=(Optional[int], Field(description="ADO plans limit used for limitation of the list with results", default=5)),
|
|
33
28
|
selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
|
|
@@ -42,6 +37,7 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
|
|
|
42
37
|
'metadata': {
|
|
43
38
|
"label": "ADO boards",
|
|
44
39
|
"icon_url": "ado-boards-icon.svg",
|
|
40
|
+
"max_length": AzureDevOpsWorkItemsToolkit.toolkit_max_length,
|
|
45
41
|
"categories": ["project management"],
|
|
46
42
|
"extra_categories": ["work item management", "issue tracking", "agile boards"],
|
|
47
43
|
"sections": {
|
|
@@ -329,11 +329,14 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
|
|
|
329
329
|
parsed_item.update(fields_data)
|
|
330
330
|
|
|
331
331
|
# extract relations if any
|
|
332
|
-
relations_data =
|
|
332
|
+
relations_data = None
|
|
333
|
+
if expand and str(expand).lower() in ("relations", "all"):
|
|
334
|
+
try:
|
|
335
|
+
relations_data = getattr(work_item, 'relations', None)
|
|
336
|
+
except KeyError:
|
|
337
|
+
relations_data = None
|
|
333
338
|
if relations_data:
|
|
334
|
-
parsed_item['relations'] = []
|
|
335
|
-
for relation in relations_data:
|
|
336
|
-
parsed_item['relations'].append(relation.as_dict())
|
|
339
|
+
parsed_item['relations'] = [relation.as_dict() for relation in relations_data]
|
|
337
340
|
|
|
338
341
|
if parse_attachments:
|
|
339
342
|
# describe images in work item fields if present
|
|
@@ -344,13 +347,19 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
|
|
|
344
347
|
for img in images:
|
|
345
348
|
src = img.get('src')
|
|
346
349
|
if src:
|
|
347
|
-
description = self.parse_attachment_by_url(src, image_description_prompt)
|
|
350
|
+
description = self.parse_attachment_by_url(src, image_description_prompt=image_description_prompt)
|
|
348
351
|
img['image-description'] = description
|
|
349
352
|
parsed_item[field_name] = str(soup)
|
|
350
353
|
# parse attached documents if present
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
+
for relation in parsed_item.get('relations', []):
|
|
355
|
+
# Only process actual file attachments
|
|
356
|
+
if relation.get('rel') == 'AttachedFile':
|
|
357
|
+
file_name = relation.get('attributes', {}).get('name')
|
|
358
|
+
if file_name:
|
|
359
|
+
try:
|
|
360
|
+
relation['content'] = self.parse_attachment_by_url(relation['url'], file_name, image_description_prompt=image_description_prompt)
|
|
361
|
+
except Exception as att_e:
|
|
362
|
+
logger.warning(f"Failed to parse attachment {file_name}: {att_e}")
|
|
354
363
|
|
|
355
364
|
|
|
356
365
|
return parsed_item
|
|
@@ -4,10 +4,10 @@ import logging
|
|
|
4
4
|
import time
|
|
5
5
|
from typing import Any, Optional, List, Dict, Generator
|
|
6
6
|
|
|
7
|
+
from langchain_core.callbacks import dispatch_custom_event
|
|
7
8
|
from langchain_core.documents import Document
|
|
8
9
|
from pydantic import create_model, Field, SecretStr
|
|
9
10
|
|
|
10
|
-
from .utils import make_json_serializable
|
|
11
11
|
from .utils.content_parser import file_extension_by_chunker, process_document_by_type
|
|
12
12
|
from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
|
|
13
13
|
from ..runtime.langchain.document_loaders.constants import loaders_allowed_to_override
|
|
@@ -16,6 +16,8 @@ from ..runtime.utils.utils import IndexerKeywords
|
|
|
16
16
|
|
|
17
17
|
logger = logging.getLogger(__name__)
|
|
18
18
|
|
|
19
|
+
DEFAULT_CUT_OFF = 0.2
|
|
20
|
+
|
|
19
21
|
# Base Vector Store Schema Models
|
|
20
22
|
BaseIndexParams = create_model(
|
|
21
23
|
"BaseIndexParams",
|
|
@@ -38,7 +40,7 @@ BaseSearchParams = create_model(
|
|
|
38
40
|
default={},
|
|
39
41
|
examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
|
|
40
42
|
)),
|
|
41
|
-
cut_off=(Optional[float], Field(description="Cut-off score for search results", default=
|
|
43
|
+
cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
|
|
42
44
|
search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
|
|
43
45
|
full_text_search=(Optional[Dict[str, Any]], Field(
|
|
44
46
|
description="Full text search parameters. Can be a dictionary with search options.",
|
|
@@ -68,7 +70,7 @@ BaseStepbackSearchParams = create_model(
|
|
|
68
70
|
default={},
|
|
69
71
|
examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
|
|
70
72
|
)),
|
|
71
|
-
cut_off=(Optional[float], Field(description="Cut-off score for search results", default=
|
|
73
|
+
cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
|
|
72
74
|
search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
|
|
73
75
|
full_text_search=(Optional[Dict[str, Any]], Field(
|
|
74
76
|
description="Full text search parameters. Can be a dictionary with search options.",
|
|
@@ -111,7 +113,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
111
113
|
def __init__(self, **kwargs):
|
|
112
114
|
conn = kwargs.get('connection_string', None)
|
|
113
115
|
connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
|
|
114
|
-
collection_name = kwargs.get('
|
|
116
|
+
collection_name = kwargs.get('collection_schema')
|
|
115
117
|
|
|
116
118
|
if 'vectorstore_type' not in kwargs:
|
|
117
119
|
kwargs['vectorstore_type'] = 'PGVector'
|
|
@@ -176,11 +178,15 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
176
178
|
f"Processing documents to collect dependencies and prepare them for indexing...")
|
|
177
179
|
self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
|
|
178
180
|
#
|
|
179
|
-
|
|
181
|
+
results_count = result["count"]
|
|
182
|
+
self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
|
|
183
|
+
self._emit_index_event(index_name)
|
|
180
184
|
#
|
|
181
|
-
return {"status": "ok", "message": f"successfully indexed {
|
|
185
|
+
return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
|
|
186
|
+
else "no new documents to index"}
|
|
182
187
|
except Exception as e:
|
|
183
188
|
self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
|
|
189
|
+
self._emit_index_event(index_name, error=str(e))
|
|
184
190
|
raise e
|
|
185
191
|
|
|
186
192
|
|
|
@@ -379,7 +385,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
379
385
|
def search_index(self,
|
|
380
386
|
query: str,
|
|
381
387
|
index_name: str = "",
|
|
382
|
-
filter: dict | str = {}, cut_off: float =
|
|
388
|
+
filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
|
|
383
389
|
search_top: int = 10, reranker: dict = {},
|
|
384
390
|
full_text_search: Optional[Dict[str, Any]] = None,
|
|
385
391
|
reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
|
|
@@ -410,7 +416,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
410
416
|
query: str,
|
|
411
417
|
messages: List[Dict[str, Any]] = [],
|
|
412
418
|
index_name: str = "",
|
|
413
|
-
filter: dict | str = {}, cut_off: float =
|
|
419
|
+
filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
|
|
414
420
|
search_top: int = 10, reranker: dict = {},
|
|
415
421
|
full_text_search: Optional[Dict[str, Any]] = None,
|
|
416
422
|
reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
|
|
@@ -435,7 +441,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
435
441
|
query: str,
|
|
436
442
|
messages: List[Dict[str, Any]] = [],
|
|
437
443
|
index_name: str = "",
|
|
438
|
-
filter: dict | str = {}, cut_off: float =
|
|
444
|
+
filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
|
|
439
445
|
search_top: int = 10, reranker: dict = {},
|
|
440
446
|
full_text_search: Optional[Dict[str, Any]] = None,
|
|
441
447
|
reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
|
|
@@ -457,37 +463,29 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
457
463
|
)
|
|
458
464
|
|
|
459
465
|
def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
history = []
|
|
484
|
-
new_history_item = {k: v for k, v in index_meta_raw.get("metadata", {}).items() if k != "history"}
|
|
485
|
-
history.append(new_history_item)
|
|
486
|
-
metadata["history"] = json.dumps(history)
|
|
487
|
-
index_meta_ids = [index_meta_raw.get("id")]
|
|
488
|
-
#
|
|
489
|
-
index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
|
|
490
|
-
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
|
|
466
|
+
index_meta = super().get_index_meta(index_name)
|
|
467
|
+
if not index_meta:
|
|
468
|
+
self._log_tool_event(
|
|
469
|
+
f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
|
|
470
|
+
tool_name="index_data"
|
|
471
|
+
)
|
|
472
|
+
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
473
|
+
created_on = time.time()
|
|
474
|
+
metadata = {
|
|
475
|
+
"collection": index_name,
|
|
476
|
+
"type": IndexerKeywords.INDEX_META_TYPE.value,
|
|
477
|
+
"indexed": 0,
|
|
478
|
+
"updated": 0,
|
|
479
|
+
"state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
|
|
480
|
+
"index_configuration": index_configuration,
|
|
481
|
+
"created_on": created_on,
|
|
482
|
+
"updated_on": created_on,
|
|
483
|
+
"task_id": None,
|
|
484
|
+
"conversation_id": None,
|
|
485
|
+
}
|
|
486
|
+
metadata["history"] = json.dumps([metadata])
|
|
487
|
+
index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
|
|
488
|
+
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
|
|
491
489
|
|
|
492
490
|
def index_meta_update(self, index_name: str, state: str, result: int):
|
|
493
491
|
index_meta_raw = super().get_index_meta(index_name)
|
|
@@ -495,12 +493,75 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
495
493
|
#
|
|
496
494
|
if index_meta_raw:
|
|
497
495
|
metadata = copy.deepcopy(index_meta_raw.get("metadata", {}))
|
|
498
|
-
metadata["indexed"] =
|
|
496
|
+
metadata["indexed"] = self.get_indexed_count(index_name)
|
|
497
|
+
metadata["updated"] = result
|
|
499
498
|
metadata["state"] = state
|
|
500
499
|
metadata["updated_on"] = time.time()
|
|
500
|
+
#
|
|
501
|
+
history_raw = metadata.pop("history", "[]")
|
|
502
|
+
try:
|
|
503
|
+
history = json.loads(history_raw) if history_raw.strip() else []
|
|
504
|
+
# replace the last history item with updated metadata
|
|
505
|
+
if history and isinstance(history, list):
|
|
506
|
+
history[-1] = metadata
|
|
507
|
+
else:
|
|
508
|
+
history = [metadata]
|
|
509
|
+
except (json.JSONDecodeError, TypeError):
|
|
510
|
+
logger.warning(f"Failed to load index history: {history_raw}. Create new with only current item.")
|
|
511
|
+
history = [metadata]
|
|
512
|
+
#
|
|
513
|
+
metadata["history"] = json.dumps(history)
|
|
501
514
|
index_meta_doc = Document(page_content=index_meta_raw.get("content", ""), metadata=metadata)
|
|
502
515
|
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=[index_meta_raw.get("id")])
|
|
503
516
|
|
|
517
|
+
def _emit_index_event(self, index_name: str, error: Optional[str] = None):
|
|
518
|
+
"""
|
|
519
|
+
Emit custom event for index data operation.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
index_name: The name of the index
|
|
523
|
+
error: Error message if the operation failed, None otherwise
|
|
524
|
+
"""
|
|
525
|
+
index_meta = super().get_index_meta(index_name)
|
|
526
|
+
|
|
527
|
+
if not index_meta:
|
|
528
|
+
logger.warning(
|
|
529
|
+
f"No index_meta found for index '{index_name}'. "
|
|
530
|
+
"Cannot emit index event."
|
|
531
|
+
)
|
|
532
|
+
return
|
|
533
|
+
|
|
534
|
+
metadata = index_meta.get("metadata", {})
|
|
535
|
+
|
|
536
|
+
# Determine if this is a reindex operation
|
|
537
|
+
history_raw = metadata.get("history", "[]")
|
|
538
|
+
try:
|
|
539
|
+
history = json.loads(history_raw) if history_raw.strip() else []
|
|
540
|
+
is_reindex = len(history) > 1
|
|
541
|
+
except (json.JSONDecodeError, TypeError):
|
|
542
|
+
is_reindex = False
|
|
543
|
+
|
|
544
|
+
# Build event message
|
|
545
|
+
event_data = {
|
|
546
|
+
"id": index_meta.get("id"),
|
|
547
|
+
"index_name": index_name,
|
|
548
|
+
"state": metadata.get("state"),
|
|
549
|
+
"error": error,
|
|
550
|
+
"reindex": is_reindex,
|
|
551
|
+
"indexed": metadata.get("indexed", 0),
|
|
552
|
+
"updated": metadata.get("updated", 0),
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
# Emit the event
|
|
556
|
+
try:
|
|
557
|
+
dispatch_custom_event("index_data_status", event_data)
|
|
558
|
+
logger.debug(
|
|
559
|
+
f"Emitted index_data_status event for index "
|
|
560
|
+
f"'{index_name}': {event_data}"
|
|
561
|
+
)
|
|
562
|
+
except Exception as e:
|
|
563
|
+
logger.warning(f"Failed to emit index_data_status event: {e}")
|
|
564
|
+
|
|
504
565
|
def get_available_tools(self):
|
|
505
566
|
"""
|
|
506
567
|
Returns the standardized vector search tools (search operations only).
|
|
@@ -554,6 +615,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
554
615
|
"mode": "list_collections",
|
|
555
616
|
"ref": self.list_collections,
|
|
556
617
|
"description": self.list_collections.__doc__,
|
|
557
|
-
|
|
618
|
+
# No parameters
|
|
619
|
+
"args_schema": create_model("ListCollectionsParams")
|
|
558
620
|
},
|
|
559
|
-
]
|
|
621
|
+
]
|
|
@@ -61,6 +61,7 @@ class AlitaBitbucketToolkit(BaseToolkit):
|
|
|
61
61
|
'metadata':
|
|
62
62
|
{
|
|
63
63
|
"label": "Bitbucket", "icon_url": "bitbucket-icon.svg",
|
|
64
|
+
"max_length": AlitaBitbucketToolkit.toolkit_max_length,
|
|
64
65
|
"categories": ["code repositories"],
|
|
65
66
|
"extra_categories": ["bitbucket", "git", "repository", "code", "version control"],
|
|
66
67
|
}
|
|
@@ -6,7 +6,7 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
|
6
6
|
from langchain.text_splitter import TokenTextSplitter
|
|
7
7
|
|
|
8
8
|
from typing import Optional, List
|
|
9
|
-
from
|
|
9
|
+
from pydantic import BaseModel
|
|
10
10
|
from ..utils import tiktoken_length
|
|
11
11
|
|
|
12
12
|
logger = getLogger(__name__)
|
|
@@ -29,7 +29,7 @@ class SonarToolkit(BaseToolkit):
|
|
|
29
29
|
SonarToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
30
30
|
return create_model(
|
|
31
31
|
name,
|
|
32
|
-
sonar_project_name=(str, Field(description="Project name of the desired repository"
|
|
32
|
+
sonar_project_name=(str, Field(description="Project name of the desired repository")),
|
|
33
33
|
sonar_configuration=(SonarConfiguration, Field(description="Sonar Configuration", json_schema_extra={'configuration_types': ['sonar']})),
|
|
34
34
|
selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
|
|
35
35
|
__config__=ConfigDict(json_schema_extra=
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import ast
|
|
2
2
|
import fnmatch
|
|
3
|
+
import json
|
|
3
4
|
import logging
|
|
4
5
|
from typing import Optional, List, Generator
|
|
5
6
|
|
|
@@ -21,7 +22,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
21
22
|
return self.vector_adapter.get_code_indexed_data(self, index_name)
|
|
22
23
|
|
|
23
24
|
def key_fn(self, document: Document):
|
|
24
|
-
return document.metadata.get(
|
|
25
|
+
return document.metadata.get("filename")
|
|
25
26
|
|
|
26
27
|
def compare_fn(self, document: Document, idx_data):
|
|
27
28
|
return (document.metadata.get('commit_hash') and
|
|
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
46
47
|
)
|
|
47
48
|
|
|
48
49
|
def _extend_data(self, documents: Generator[Document, None, None]):
|
|
49
|
-
yield from
|
|
50
|
+
yield from documents
|
|
50
51
|
|
|
51
52
|
def _index_tool_params(self):
|
|
52
53
|
"""Return the parameters for indexing data."""
|
|
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
117
118
|
if not file_content:
|
|
118
119
|
# empty file, skip
|
|
119
120
|
continue
|
|
121
|
+
#
|
|
122
|
+
# ensure file content is a string
|
|
123
|
+
if isinstance(file_content, bytes):
|
|
124
|
+
file_content = file_content.decode("utf-8", errors="ignore")
|
|
125
|
+
elif isinstance(file_content, dict) and file.endswith('.json'):
|
|
126
|
+
file_content = json.dumps(file_content)
|
|
127
|
+
elif not isinstance(file_content, str):
|
|
128
|
+
file_content = str(file_content)
|
|
129
|
+
#
|
|
120
130
|
# hash the file content to ensure uniqueness
|
|
121
131
|
import hashlib
|
|
122
132
|
file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
|
|
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
127
137
|
self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
|
|
128
138
|
self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
|
|
129
139
|
|
|
130
|
-
return file_content_generator()
|
|
140
|
+
return parse_code_files_for_db(file_content_generator())
|
|
131
141
|
|
|
132
142
|
def __handle_get_files(self, path: str, branch: str):
|
|
133
143
|
"""
|
|
@@ -67,8 +67,7 @@ class ConfluenceToolkit(BaseToolkit):
|
|
|
67
67
|
|
|
68
68
|
model = create_model(
|
|
69
69
|
name,
|
|
70
|
-
space=(str, Field(description="Space",
|
|
71
|
-
'max_toolkit_length': ConfluenceToolkit.toolkit_max_length})),
|
|
70
|
+
space=(str, Field(description="Space")),
|
|
72
71
|
cloud=(bool, Field(description="Hosting Option", json_schema_extra={'configuration': True})),
|
|
73
72
|
limit=(int, Field(description="Pages limit per request", default=5)),
|
|
74
73
|
labels=(Optional[str], Field(
|
|
@@ -95,6 +94,7 @@ class ConfluenceToolkit(BaseToolkit):
|
|
|
95
94
|
'metadata': {
|
|
96
95
|
"label": "Confluence",
|
|
97
96
|
"icon_url": None,
|
|
97
|
+
"max_length": ConfluenceToolkit.toolkit_max_length,
|
|
98
98
|
"categories": ["documentation"],
|
|
99
99
|
"extra_categories": ["confluence", "wiki", "knowledge base", "documentation", "atlassian"]
|
|
100
100
|
}
|
|
@@ -7,12 +7,14 @@ from json import JSONDecodeError
|
|
|
7
7
|
from typing import Optional, List, Any, Dict, Callable, Generator, Literal
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
|
+
from atlassian.errors import ApiError
|
|
10
11
|
from langchain_community.document_loaders.confluence import ContentFormat
|
|
11
12
|
from langchain_core.documents import Document
|
|
12
13
|
from langchain_core.messages import HumanMessage
|
|
13
14
|
from langchain_core.tools import ToolException
|
|
14
15
|
from markdownify import markdownify
|
|
15
16
|
from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
|
|
17
|
+
from requests import HTTPError
|
|
16
18
|
from tenacity import retry, stop_after_attempt, wait_exponential, before_sleep_log
|
|
17
19
|
|
|
18
20
|
from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
|
|
@@ -194,6 +196,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
194
196
|
keep_markdown_format: Optional[bool] = True
|
|
195
197
|
ocr_languages: Optional[str] = None
|
|
196
198
|
keep_newlines: Optional[bool] = True
|
|
199
|
+
_errors: Optional[list[str]] = None
|
|
197
200
|
_image_cache: ImageDescriptionCache = PrivateAttr(default_factory=ImageDescriptionCache)
|
|
198
201
|
|
|
199
202
|
@model_validator(mode='before')
|
|
@@ -498,7 +501,9 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
498
501
|
restrictions = self.client.get_all_restrictions_for_content(page["id"])
|
|
499
502
|
|
|
500
503
|
return (
|
|
501
|
-
page["status"] == "current"
|
|
504
|
+
(page["status"] == "current"
|
|
505
|
+
# allow user to see archived content if needed
|
|
506
|
+
or page["status"] == "archived")
|
|
502
507
|
and not restrictions["read"]["restrictions"]["user"]["results"]
|
|
503
508
|
and not restrictions["read"]["restrictions"]["group"]["results"]
|
|
504
509
|
)
|
|
@@ -518,18 +523,35 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
518
523
|
),
|
|
519
524
|
before_sleep=before_sleep_log(logger, logging.WARNING),
|
|
520
525
|
)(self.client.get_page_by_id)
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
+
try:
|
|
527
|
+
page = get_page(
|
|
528
|
+
page_id=page_id, expand=f"{self.content_format.value},version"
|
|
529
|
+
)
|
|
530
|
+
except (ApiError, HTTPError) as e:
|
|
531
|
+
logger.error(f"Error fetching page with ID {page_id}: {e}")
|
|
532
|
+
page_content_temp = f"Confluence API Error: cannot fetch the page with ID {page_id}: {e}"
|
|
533
|
+
# store errors
|
|
534
|
+
if self._errors is None:
|
|
535
|
+
self._errors = []
|
|
536
|
+
self._errors.append(page_content_temp)
|
|
537
|
+
return Document(page_content=page_content_temp,
|
|
538
|
+
metadata={})
|
|
539
|
+
# TODO: update on toolkit advanced settings level as a separate feature
|
|
540
|
+
# if not self.include_restricted_content and not self.is_public_page(page):
|
|
541
|
+
# continue
|
|
526
542
|
yield self.process_page(page, skip_images)
|
|
527
543
|
|
|
544
|
+
def _log_errors(self):
|
|
545
|
+
""" Log errors encountered during toolkit execution. """
|
|
546
|
+
if self._errors:
|
|
547
|
+
logger.info(f"Errors encountered during toolkit execution: {self._errors}")
|
|
548
|
+
|
|
528
549
|
def read_page_by_id(self, page_id: str, skip_images: bool = False):
|
|
529
550
|
"""Reads a page by its id in the Confluence space. If id is not available, but there is a title - use get_page_id first."""
|
|
530
551
|
result = list(self.get_pages_by_id([page_id], skip_images))
|
|
531
552
|
if not result:
|
|
532
|
-
"
|
|
553
|
+
return f"Pages not found. Errors: {self._errors}" if self._errors \
|
|
554
|
+
else "Pages not found or you do not have access to them."
|
|
533
555
|
return result[0].page_content
|
|
534
556
|
# return self._strip_base64_images(result[0].page_content) if skip_images else result[0].page_content
|
|
535
557
|
|
|
@@ -3,6 +3,7 @@ from typing import Optional, List
|
|
|
3
3
|
from logging import getLogger
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
|
+
from langchain_core.documents import Document
|
|
6
7
|
|
|
7
8
|
logger = getLogger(__name__)
|
|
8
9
|
from PIL import Image
|
|
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
|
|
|
193
194
|
else:
|
|
194
195
|
return super().process_image(link, ocr_languages)
|
|
195
196
|
|
|
197
|
+
def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
|
|
198
|
+
content_format: ContentFormat, ocr_languages: Optional[str] = None,
|
|
199
|
+
keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
|
|
200
|
+
if not page.get("title"):
|
|
201
|
+
# if 'include_restricted_content' set to True, draft pages are loaded and can have no title
|
|
202
|
+
page["title"] = "Untitled"
|
|
203
|
+
return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
|
|
204
|
+
ocr_languages, keep_markdown_format, keep_newlines)
|
|
205
|
+
|
|
196
206
|
# TODO review usage
|
|
197
207
|
# def process_svg(
|
|
198
208
|
# self,
|
|
@@ -53,6 +53,7 @@ class AlitaGitHubToolkit(BaseToolkit):
|
|
|
53
53
|
'metadata': {
|
|
54
54
|
"label": "GitHub",
|
|
55
55
|
"icon_url": None,
|
|
56
|
+
"max_length": AlitaGitHubToolkit.toolkit_max_length,
|
|
56
57
|
"categories": ["code repositories"],
|
|
57
58
|
"extra_categories": ["github", "git", "repository", "code", "version control"],
|
|
58
59
|
},
|
|
@@ -62,8 +63,7 @@ class AlitaGitHubToolkit(BaseToolkit):
|
|
|
62
63
|
json_schema_extra={'configuration_types': ['github']})),
|
|
63
64
|
pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector configuration", default=None,
|
|
64
65
|
json_schema_extra={'configuration_types': ['pgvector']})),
|
|
65
|
-
repository=(str, Field(description="Github repository",
|
|
66
|
-
'max_toolkit_length': AlitaGitHubToolkit.toolkit_max_length})),
|
|
66
|
+
repository=(str, Field(description="Github repository")),
|
|
67
67
|
active_branch=(Optional[str], Field(description="Active branch", default="main")),
|
|
68
68
|
base_branch=(Optional[str], Field(description="Github Base branch", default="main")),
|
|
69
69
|
# embedder settings
|
|
@@ -43,7 +43,7 @@ class AlitaGitlabToolkit(BaseToolkit):
|
|
|
43
43
|
AlitaGitlabToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
44
44
|
return create_model(
|
|
45
45
|
name,
|
|
46
|
-
repository=(str, Field(description="GitLab repository"
|
|
46
|
+
repository=(str, Field(description="GitLab repository")),
|
|
47
47
|
gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration", json_schema_extra={'configuration_types': ['gitlab']})),
|
|
48
48
|
branch=(str, Field(description="Main branch", default="main")),
|
|
49
49
|
# indexer settings
|
|
@@ -57,6 +57,7 @@ class AlitaGitlabToolkit(BaseToolkit):
|
|
|
57
57
|
'metadata': {
|
|
58
58
|
"label": "GitLab",
|
|
59
59
|
"icon_url": None,
|
|
60
|
+
"max_length": AlitaGitlabToolkit.toolkit_max_length,
|
|
60
61
|
"categories": ["code repositories"],
|
|
61
62
|
"extra_categories": ["gitlab", "git", "repository", "code", "version control"],
|
|
62
63
|
}
|
|
@@ -117,7 +117,11 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
|
|
|
117
117
|
|
|
118
118
|
@model_validator(mode='before')
|
|
119
119
|
@classmethod
|
|
120
|
-
def
|
|
120
|
+
def validate_toolkit_before(cls, values: Dict) -> Dict:
|
|
121
|
+
return super().validate_toolkit(values)
|
|
122
|
+
|
|
123
|
+
@model_validator(mode='after')
|
|
124
|
+
def validate_toolkit(self):
|
|
121
125
|
try:
|
|
122
126
|
import gitlab
|
|
123
127
|
except ImportError:
|
|
@@ -125,17 +129,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
|
|
|
125
129
|
"python-gitlab is not installed. "
|
|
126
130
|
"Please install it with `pip install python-gitlab`"
|
|
127
131
|
)
|
|
128
|
-
|
|
132
|
+
self.repository = self._sanitize_url(self.repository)
|
|
129
133
|
g = gitlab.Gitlab(
|
|
130
|
-
url=
|
|
131
|
-
private_token=
|
|
134
|
+
url=self._sanitize_url(self.url),
|
|
135
|
+
private_token=self.private_token.get_secret_value(),
|
|
132
136
|
keep_base_url=True,
|
|
133
137
|
)
|
|
134
138
|
|
|
135
139
|
g.auth()
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
return
|
|
140
|
+
self._git = g
|
|
141
|
+
self._active_branch = self.branch
|
|
142
|
+
return self
|
|
139
143
|
|
|
140
144
|
@property
|
|
141
145
|
def repo_instance(self):
|
|
@@ -30,8 +30,6 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
|
|
|
30
30
|
AlitaGitlabSpaceToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
31
31
|
return create_model(
|
|
32
32
|
name,
|
|
33
|
-
name=(str, Field(description="Toolkit name", json_schema_extra={'toolkit_name': True,
|
|
34
|
-
'max_toolkit_length': AlitaGitlabSpaceToolkit.toolkit_max_length})),
|
|
35
33
|
gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration",
|
|
36
34
|
json_schema_extra={
|
|
37
35
|
'configuration_types': ['gitlab']})),
|
|
@@ -46,6 +44,7 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
|
|
|
46
44
|
'metadata': {
|
|
47
45
|
"label": "GitLab Org",
|
|
48
46
|
"icon_url": None,
|
|
47
|
+
"max_length": AlitaGitlabSpaceToolkit.toolkit_max_length,
|
|
49
48
|
"categories": ["code repositories"],
|
|
50
49
|
"extra_categories": ["gitlab", "git", "repository", "code", "version control"],
|
|
51
50
|
}
|
|
@@ -30,7 +30,7 @@ class GooglePlacesToolkit(BaseToolkit):
|
|
|
30
30
|
GooglePlacesToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
31
31
|
return create_model(
|
|
32
32
|
name,
|
|
33
|
-
results_count=(Optional[int], Field(description="Results number to show", default=None
|
|
33
|
+
results_count=(Optional[int], Field(description="Results number to show", default=None)),
|
|
34
34
|
google_places_configuration=(GooglePlacesConfiguration, Field(description="Google Places Configuration", json_schema_extra={'configuration_types': ['google_places']})),
|
|
35
35
|
selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
|
|
36
36
|
__config__=ConfigDict(json_schema_extra=
|
|
@@ -38,6 +38,7 @@ class GooglePlacesToolkit(BaseToolkit):
|
|
|
38
38
|
'metadata':
|
|
39
39
|
{
|
|
40
40
|
"label": "Google Places", "icon_url": "gplaces-icon.svg",
|
|
41
|
+
"max_length": GooglePlacesToolkit.toolkit_max_length,
|
|
41
42
|
"categories": ["other"],
|
|
42
43
|
"extra_categories": ["google", "places", "maps", "location",
|
|
43
44
|
"geolocation"],
|
alita_sdk/tools/jira/__init__.py
CHANGED
|
@@ -89,6 +89,7 @@ class JiraToolkit(BaseToolkit):
|
|
|
89
89
|
'metadata': {
|
|
90
90
|
"label": "Jira",
|
|
91
91
|
"icon_url": "jira-icon.svg",
|
|
92
|
+
"max_length": JiraToolkit.toolkit_max_length,
|
|
92
93
|
"categories": ["project management"],
|
|
93
94
|
"extra_categories": ["jira", "atlassian", "issue tracking", "project management", "task management"],
|
|
94
95
|
}
|
|
@@ -563,7 +563,7 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
|
|
|
563
563
|
Use the appropriate issue link type (e.g., "Test", "Relates", "Blocks").
|
|
564
564
|
If we use "Test" linktype, the test is inward issue, the story/other issue is outward issue.."""
|
|
565
565
|
|
|
566
|
-
comment = "
|
|
566
|
+
comment = f"Issue {inward_issue_key} was linked to {outward_issue_key}."
|
|
567
567
|
comment_body = {"content": [{"content": [{"text": comment,"type": "text"}],"type": "paragraph"}],"type": "doc","version": 1} if self.api_version == "3" else comment
|
|
568
568
|
link_data = {
|
|
569
569
|
"type": {"name": f"{linktype}"},
|
|
@@ -61,7 +61,7 @@ class MemoryToolkit(BaseToolkit):
|
|
|
61
61
|
|
|
62
62
|
return create_model(
|
|
63
63
|
'memory',
|
|
64
|
-
namespace=(str, Field(description="Memory namespace"
|
|
64
|
+
namespace=(str, Field(description="Memory namespace")),
|
|
65
65
|
pgvector_configuration=(PgVectorConfiguration, Field(description="PgVector Configuration",
|
|
66
66
|
json_schema_extra={
|
|
67
67
|
'configuration_types': ['pgvector']})),
|