alita-sdk 0.3.376__py3-none-any.whl → 0.3.435__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/configurations/bitbucket.py +95 -0
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/client.py +9 -4
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +8 -0
- alita_sdk/runtime/langchain/assistant.py +41 -38
- alita_sdk/runtime/langchain/constants.py +5 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
- alita_sdk/runtime/langchain/langraph_agent.py +91 -27
- alita_sdk/runtime/langchain/utils.py +24 -4
- alita_sdk/runtime/models/mcp_models.py +57 -0
- alita_sdk/runtime/toolkits/__init__.py +24 -0
- alita_sdk/runtime/toolkits/application.py +8 -1
- alita_sdk/runtime/toolkits/mcp.py +787 -0
- alita_sdk/runtime/toolkits/tools.py +98 -50
- alita_sdk/runtime/tools/__init__.py +7 -2
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/function.py +20 -28
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +104 -8
- alita_sdk/runtime/tools/llm.py +146 -114
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +79 -10
- alita_sdk/runtime/tools/sandbox.py +166 -63
- alita_sdk/runtime/tools/vectorstore.py +3 -2
- alita_sdk/runtime/tools/vectorstore_base.py +4 -3
- alita_sdk/runtime/utils/streamlit.py +34 -3
- alita_sdk/runtime/utils/toolkit_utils.py +5 -2
- alita_sdk/runtime/utils/utils.py +1 -0
- alita_sdk/tools/__init__.py +48 -31
- alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
- alita_sdk/tools/base_indexer_toolkit.py +75 -66
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/code_indexer_toolkit.py +13 -3
- alita_sdk/tools/confluence/api_wrapper.py +29 -7
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/elitea_base.py +7 -7
- alita_sdk/tools/gitlab/api_wrapper.py +11 -7
- alita_sdk/tools/jira/api_wrapper.py +1 -1
- alita_sdk/tools/openapi/__init__.py +10 -1
- alita_sdk/tools/qtest/api_wrapper.py +522 -74
- alita_sdk/tools/sharepoint/api_wrapper.py +104 -33
- alita_sdk/tools/sharepoint/authorization_helper.py +175 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/utils/content_parser.py +27 -16
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +19 -6
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/RECORD +60 -55
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/top_level.txt +0 -0
alita_sdk/tools/__init__.py
CHANGED
|
@@ -90,62 +90,79 @@ available_count = len(AVAILABLE_TOOLS)
|
|
|
90
90
|
total_attempted = len(AVAILABLE_TOOLS) + len(FAILED_IMPORTS)
|
|
91
91
|
logger.info(f"Tool imports completed: {available_count}/{total_attempted} successful")
|
|
92
92
|
|
|
93
|
+
|
|
93
94
|
def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args, **kwargs):
|
|
94
95
|
tools = []
|
|
96
|
+
|
|
95
97
|
for tool in tools_list:
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
tool
|
|
98
|
+
settings = tool.get('settings')
|
|
99
|
+
|
|
100
|
+
# Skip tools without settings early
|
|
101
|
+
if not settings:
|
|
102
|
+
logger.warning(f"Tool '{tool.get('type', '')}' has no settings, skipping...")
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
# Validate tool names once
|
|
106
|
+
selected_tools = settings.get('selected_tools', [])
|
|
107
|
+
invalid_tools = [name for name in selected_tools if isinstance(name, str) and name.startswith('_')]
|
|
108
|
+
if invalid_tools:
|
|
109
|
+
raise ValueError(f"Tool names {invalid_tools} from toolkit '{tool.get('type', '')}' cannot start with '_'")
|
|
110
|
+
|
|
111
|
+
# Cache tool type and add common settings
|
|
104
112
|
tool_type = tool['type']
|
|
113
|
+
settings['alita'] = alita
|
|
114
|
+
settings['llm'] = llm
|
|
115
|
+
settings['store'] = store
|
|
116
|
+
|
|
117
|
+
# Set pgvector collection schema if present
|
|
118
|
+
if settings.get('pgvector_configuration'):
|
|
119
|
+
settings['pgvector_configuration']['collection_schema'] = str(tool['id'])
|
|
105
120
|
|
|
106
|
-
# Handle special cases
|
|
121
|
+
# Handle ADO special cases
|
|
107
122
|
if tool_type in ['ado_boards', 'ado_wiki', 'ado_plans']:
|
|
108
123
|
tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
|
|
124
|
+
continue
|
|
109
125
|
|
|
110
|
-
#
|
|
111
|
-
|
|
126
|
+
# Handle ADO repos aliases
|
|
127
|
+
if tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
|
|
112
128
|
try:
|
|
113
|
-
|
|
114
|
-
tools.extend(get_tools_func(tool))
|
|
115
|
-
|
|
129
|
+
tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
|
|
116
130
|
except Exception as e:
|
|
117
|
-
logger.error(f"Error getting
|
|
118
|
-
|
|
131
|
+
logger.error(f"Error getting ADO repos tools: {e}")
|
|
132
|
+
continue
|
|
119
133
|
|
|
120
|
-
#
|
|
121
|
-
|
|
134
|
+
# Skip MCP toolkit - it's handled by runtime/toolkits/tools.py to avoid duplicate loading
|
|
135
|
+
if tool_type == 'mcp':
|
|
136
|
+
logger.debug(f"Skipping MCP toolkit '{tool.get('toolkit_name')}' - handled by runtime toolkit system")
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
# Handle standard tools
|
|
140
|
+
if tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
|
|
122
141
|
try:
|
|
123
|
-
|
|
124
|
-
tools.extend(get_tools_func(tool))
|
|
142
|
+
tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
|
|
125
143
|
except Exception as e:
|
|
126
|
-
logger.error(f"Error getting
|
|
144
|
+
logger.error(f"Error getting tools for {tool_type}: {e}")
|
|
145
|
+
raise ToolException(f"Error getting tools for {tool_type}: {e}")
|
|
146
|
+
continue
|
|
127
147
|
|
|
128
148
|
# Handle custom modules
|
|
129
|
-
|
|
149
|
+
if settings.get("module"):
|
|
130
150
|
try:
|
|
131
|
-
settings = tool.get("settings", {})
|
|
132
151
|
mod = import_module(settings.pop("module"))
|
|
133
152
|
tkitclass = getattr(mod, settings.pop("class"))
|
|
134
|
-
|
|
135
|
-
get_toolkit_params = tool["settings"].copy()
|
|
153
|
+
get_toolkit_params = settings.copy()
|
|
136
154
|
get_toolkit_params["name"] = tool.get("name")
|
|
137
|
-
#
|
|
138
155
|
toolkit = tkitclass.get_toolkit(**get_toolkit_params)
|
|
139
156
|
tools.extend(toolkit.get_tools())
|
|
140
157
|
except Exception as e:
|
|
141
158
|
logger.error(f"Error in getting custom toolkit: {e}")
|
|
159
|
+
continue
|
|
142
160
|
|
|
161
|
+
# Tool not available
|
|
162
|
+
if tool_type in FAILED_IMPORTS:
|
|
163
|
+
logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
|
|
143
164
|
else:
|
|
144
|
-
|
|
145
|
-
if tool_type in FAILED_IMPORTS:
|
|
146
|
-
logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
|
|
147
|
-
else:
|
|
148
|
-
logger.warning(f"Unknown tool type: {tool_type}")
|
|
165
|
+
logger.warning(f"Unknown tool type: {tool_type}")
|
|
149
166
|
|
|
150
167
|
return tools
|
|
151
168
|
|
|
@@ -329,11 +329,14 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
|
|
|
329
329
|
parsed_item.update(fields_data)
|
|
330
330
|
|
|
331
331
|
# extract relations if any
|
|
332
|
-
relations_data =
|
|
332
|
+
relations_data = None
|
|
333
|
+
if expand and str(expand).lower() in ("relations", "all"):
|
|
334
|
+
try:
|
|
335
|
+
relations_data = getattr(work_item, 'relations', None)
|
|
336
|
+
except KeyError:
|
|
337
|
+
relations_data = None
|
|
333
338
|
if relations_data:
|
|
334
|
-
parsed_item['relations'] = []
|
|
335
|
-
for relation in relations_data:
|
|
336
|
-
parsed_item['relations'].append(relation.as_dict())
|
|
339
|
+
parsed_item['relations'] = [relation.as_dict() for relation in relations_data]
|
|
337
340
|
|
|
338
341
|
if parse_attachments:
|
|
339
342
|
# describe images in work item fields if present
|
|
@@ -344,13 +347,19 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
|
|
|
344
347
|
for img in images:
|
|
345
348
|
src = img.get('src')
|
|
346
349
|
if src:
|
|
347
|
-
description = self.parse_attachment_by_url(src, image_description_prompt)
|
|
350
|
+
description = self.parse_attachment_by_url(src, image_description_prompt=image_description_prompt)
|
|
348
351
|
img['image-description'] = description
|
|
349
352
|
parsed_item[field_name] = str(soup)
|
|
350
353
|
# parse attached documents if present
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
+
for relation in parsed_item.get('relations', []):
|
|
355
|
+
# Only process actual file attachments
|
|
356
|
+
if relation.get('rel') == 'AttachedFile':
|
|
357
|
+
file_name = relation.get('attributes', {}).get('name')
|
|
358
|
+
if file_name:
|
|
359
|
+
try:
|
|
360
|
+
relation['content'] = self.parse_attachment_by_url(relation['url'], file_name, image_description_prompt=image_description_prompt)
|
|
361
|
+
except Exception as att_e:
|
|
362
|
+
logger.warning(f"Failed to parse attachment {file_name}: {att_e}")
|
|
354
363
|
|
|
355
364
|
|
|
356
365
|
return parsed_item
|
|
@@ -7,7 +7,6 @@ from typing import Any, Optional, List, Dict, Generator
|
|
|
7
7
|
from langchain_core.documents import Document
|
|
8
8
|
from pydantic import create_model, Field, SecretStr
|
|
9
9
|
|
|
10
|
-
from .utils import make_json_serializable
|
|
11
10
|
from .utils.content_parser import file_extension_by_chunker, process_document_by_type
|
|
12
11
|
from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
|
|
13
12
|
from ..runtime.langchain.document_loaders.constants import loaders_allowed_to_override
|
|
@@ -111,7 +110,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
111
110
|
def __init__(self, **kwargs):
|
|
112
111
|
conn = kwargs.get('connection_string', None)
|
|
113
112
|
connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
|
|
114
|
-
collection_name = kwargs.get('
|
|
113
|
+
collection_name = kwargs.get('collection_schema')
|
|
115
114
|
|
|
116
115
|
if 'vectorstore_type' not in kwargs:
|
|
117
116
|
kwargs['vectorstore_type'] = 'PGVector'
|
|
@@ -152,39 +151,45 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
152
151
|
|
|
153
152
|
def index_data(self, **kwargs):
|
|
154
153
|
index_name = kwargs.get("index_name")
|
|
155
|
-
progress_step = kwargs.get("progress_step")
|
|
156
154
|
clean_index = kwargs.get("clean_index")
|
|
157
155
|
chunking_tool = kwargs.get("chunking_tool")
|
|
158
156
|
chunking_config = kwargs.get("chunking_config")
|
|
157
|
+
result = {"count": 0}
|
|
159
158
|
#
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
159
|
+
try:
|
|
160
|
+
if clean_index:
|
|
161
|
+
self._clean_index(index_name)
|
|
162
|
+
#
|
|
163
|
+
self.index_meta_init(index_name, kwargs)
|
|
164
|
+
#
|
|
165
|
+
self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
|
|
166
|
+
self._log_tool_event(f"Loading the documents to index...{kwargs}")
|
|
167
|
+
documents = self._base_loader(**kwargs)
|
|
168
|
+
documents = list(documents) # consume/exhaust generator to count items
|
|
169
|
+
documents_count = len(documents)
|
|
170
|
+
documents = (doc for doc in documents)
|
|
171
|
+
self._log_tool_event(f"Base documents were pre-loaded. "
|
|
172
|
+
f"Search for possible document duplicates and remove them from the indexing list...")
|
|
173
|
+
documents = self._reduce_duplicates(documents, index_name)
|
|
174
|
+
self._log_tool_event(f"Duplicates were removed. "
|
|
175
|
+
f"Processing documents to collect dependencies and prepare them for indexing...")
|
|
176
|
+
self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
|
|
177
|
+
#
|
|
178
|
+
results_count = result["count"]
|
|
179
|
+
self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
|
|
180
|
+
#
|
|
181
|
+
return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
|
|
182
|
+
else "no new documents to index"}
|
|
183
|
+
except Exception as e:
|
|
184
|
+
self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
|
|
185
|
+
raise e
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, result, index_name: Optional[str] = None):
|
|
183
189
|
self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
|
|
184
190
|
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
185
191
|
#
|
|
186
192
|
base_doc_counter = 0
|
|
187
|
-
total_counter = 0
|
|
188
193
|
pg_vector_add_docs_chunk = []
|
|
189
194
|
for base_doc in base_documents:
|
|
190
195
|
base_doc_counter += 1
|
|
@@ -232,10 +237,9 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
232
237
|
msg = f"Indexed base document #{base_doc_counter} out of {base_total} (with {dependent_docs_counter} dependencies)."
|
|
233
238
|
logger.debug(msg)
|
|
234
239
|
self._log_tool_event(msg)
|
|
235
|
-
|
|
240
|
+
result["count"] += dependent_docs_counter
|
|
236
241
|
if pg_vector_add_docs_chunk:
|
|
237
242
|
add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
|
|
238
|
-
return total_counter
|
|
239
243
|
|
|
240
244
|
def _apply_loaders_chunkers(self, documents: Generator[Document, None, None], chunking_tool: str=None, chunking_config=None) -> Generator[Document, None, None]:
|
|
241
245
|
from ..tools.chunkers import __all__ as chunkers
|
|
@@ -343,7 +347,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
343
347
|
"""Cleans the indexed data in the collection."""
|
|
344
348
|
super()._clean_collection(index_name=index_name)
|
|
345
349
|
return (f"Collection '{index_name}' has been removed from the vector store.\n"
|
|
346
|
-
f"Available collections: {self.
|
|
350
|
+
f"Available collections: {self.list_collections()}") if index_name \
|
|
347
351
|
else "All collections have been removed from the vector store."
|
|
348
352
|
|
|
349
353
|
def _build_collection_filter(self, filter: dict | str, index_name: str = "") -> dict:
|
|
@@ -385,7 +389,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
385
389
|
""" Searches indexed documents in the vector store."""
|
|
386
390
|
# build filter on top of index_name
|
|
387
391
|
|
|
388
|
-
available_collections = super().
|
|
392
|
+
available_collections = super().list_collections()
|
|
389
393
|
if index_name and index_name not in available_collections:
|
|
390
394
|
return f"Collection '{index_name}' not found. Available collections: {available_collections}"
|
|
391
395
|
|
|
@@ -454,37 +458,28 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
454
458
|
)
|
|
455
459
|
|
|
456
460
|
def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
else:
|
|
480
|
-
history = []
|
|
481
|
-
new_history_item = {k: v for k, v in index_meta_raw.get("metadata", {}).items() if k != "history"}
|
|
482
|
-
history.append(new_history_item)
|
|
483
|
-
metadata["history"] = json.dumps(history)
|
|
484
|
-
index_meta_ids = [index_meta_raw.get("id")]
|
|
485
|
-
#
|
|
486
|
-
index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
|
|
487
|
-
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
|
|
461
|
+
index_meta = super().get_index_meta(index_name)
|
|
462
|
+
if not index_meta:
|
|
463
|
+
self._log_tool_event(
|
|
464
|
+
f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
|
|
465
|
+
tool_name="index_data"
|
|
466
|
+
)
|
|
467
|
+
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
468
|
+
created_on = time.time()
|
|
469
|
+
metadata = {
|
|
470
|
+
"collection": index_name,
|
|
471
|
+
"type": IndexerKeywords.INDEX_META_TYPE.value,
|
|
472
|
+
"indexed": 0,
|
|
473
|
+
"state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
|
|
474
|
+
"index_configuration": index_configuration,
|
|
475
|
+
"created_on": created_on,
|
|
476
|
+
"updated_on": created_on,
|
|
477
|
+
"task_id": None,
|
|
478
|
+
"conversation_id": None,
|
|
479
|
+
}
|
|
480
|
+
metadata["history"] = json.dumps([metadata])
|
|
481
|
+
index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
|
|
482
|
+
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
|
|
488
483
|
|
|
489
484
|
def index_meta_update(self, index_name: str, state: str, result: int):
|
|
490
485
|
index_meta_raw = super().get_index_meta(index_name)
|
|
@@ -495,6 +490,20 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
495
490
|
metadata["indexed"] = result
|
|
496
491
|
metadata["state"] = state
|
|
497
492
|
metadata["updated_on"] = time.time()
|
|
493
|
+
#
|
|
494
|
+
history_raw = metadata.pop("history", "[]")
|
|
495
|
+
try:
|
|
496
|
+
history = json.loads(history_raw) if history_raw.strip() else []
|
|
497
|
+
# replace the last history item with updated metadata
|
|
498
|
+
if history and isinstance(history, list):
|
|
499
|
+
history[-1] = metadata
|
|
500
|
+
else:
|
|
501
|
+
history = [metadata]
|
|
502
|
+
except (json.JSONDecodeError, TypeError):
|
|
503
|
+
logger.warning(f"Failed to load index history: {history_raw}. Create new with only current item.")
|
|
504
|
+
history = [metadata]
|
|
505
|
+
#
|
|
506
|
+
metadata["history"] = json.dumps(history)
|
|
498
507
|
index_meta_doc = Document(page_content=index_meta_raw.get("content", ""), metadata=metadata)
|
|
499
508
|
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=[index_meta_raw.get("id")])
|
|
500
509
|
|
|
@@ -547,10 +556,10 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
547
556
|
"args_schema": RemoveIndexParams
|
|
548
557
|
},
|
|
549
558
|
{
|
|
550
|
-
"name": "
|
|
551
|
-
"mode": "
|
|
552
|
-
"ref": self.
|
|
553
|
-
"description": self.
|
|
559
|
+
"name": "list_collections",
|
|
560
|
+
"mode": "list_collections",
|
|
561
|
+
"ref": self.list_collections,
|
|
562
|
+
"description": self.list_collections.__doc__,
|
|
554
563
|
"args_schema": create_model("ListCollectionsParams") # No parameters
|
|
555
564
|
},
|
|
556
565
|
]
|
|
@@ -6,7 +6,7 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
|
6
6
|
from langchain.text_splitter import TokenTextSplitter
|
|
7
7
|
|
|
8
8
|
from typing import Optional, List
|
|
9
|
-
from
|
|
9
|
+
from pydantic import BaseModel
|
|
10
10
|
from ..utils import tiktoken_length
|
|
11
11
|
|
|
12
12
|
logger = getLogger(__name__)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import ast
|
|
2
2
|
import fnmatch
|
|
3
|
+
import json
|
|
3
4
|
import logging
|
|
4
5
|
from typing import Optional, List, Generator
|
|
5
6
|
|
|
@@ -21,7 +22,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
21
22
|
return self.vector_adapter.get_code_indexed_data(self, index_name)
|
|
22
23
|
|
|
23
24
|
def key_fn(self, document: Document):
|
|
24
|
-
return document.metadata.get(
|
|
25
|
+
return document.metadata.get("filename")
|
|
25
26
|
|
|
26
27
|
def compare_fn(self, document: Document, idx_data):
|
|
27
28
|
return (document.metadata.get('commit_hash') and
|
|
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
46
47
|
)
|
|
47
48
|
|
|
48
49
|
def _extend_data(self, documents: Generator[Document, None, None]):
|
|
49
|
-
yield from
|
|
50
|
+
yield from documents
|
|
50
51
|
|
|
51
52
|
def _index_tool_params(self):
|
|
52
53
|
"""Return the parameters for indexing data."""
|
|
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
117
118
|
if not file_content:
|
|
118
119
|
# empty file, skip
|
|
119
120
|
continue
|
|
121
|
+
#
|
|
122
|
+
# ensure file content is a string
|
|
123
|
+
if isinstance(file_content, bytes):
|
|
124
|
+
file_content = file_content.decode("utf-8", errors="ignore")
|
|
125
|
+
elif isinstance(file_content, dict) and file.endswith('.json'):
|
|
126
|
+
file_content = json.dumps(file_content)
|
|
127
|
+
elif not isinstance(file_content, str):
|
|
128
|
+
file_content = str(file_content)
|
|
129
|
+
#
|
|
120
130
|
# hash the file content to ensure uniqueness
|
|
121
131
|
import hashlib
|
|
122
132
|
file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
|
|
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
127
137
|
self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
|
|
128
138
|
self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
|
|
129
139
|
|
|
130
|
-
return file_content_generator()
|
|
140
|
+
return parse_code_files_for_db(file_content_generator())
|
|
131
141
|
|
|
132
142
|
def __handle_get_files(self, path: str, branch: str):
|
|
133
143
|
"""
|
|
@@ -7,12 +7,14 @@ from json import JSONDecodeError
|
|
|
7
7
|
from typing import Optional, List, Any, Dict, Callable, Generator, Literal
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
|
+
from atlassian.errors import ApiError
|
|
10
11
|
from langchain_community.document_loaders.confluence import ContentFormat
|
|
11
12
|
from langchain_core.documents import Document
|
|
12
13
|
from langchain_core.messages import HumanMessage
|
|
13
14
|
from langchain_core.tools import ToolException
|
|
14
15
|
from markdownify import markdownify
|
|
15
16
|
from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
|
|
17
|
+
from requests import HTTPError
|
|
16
18
|
from tenacity import retry, stop_after_attempt, wait_exponential, before_sleep_log
|
|
17
19
|
|
|
18
20
|
from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
|
|
@@ -194,6 +196,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
194
196
|
keep_markdown_format: Optional[bool] = True
|
|
195
197
|
ocr_languages: Optional[str] = None
|
|
196
198
|
keep_newlines: Optional[bool] = True
|
|
199
|
+
_errors: Optional[list[str]] = None
|
|
197
200
|
_image_cache: ImageDescriptionCache = PrivateAttr(default_factory=ImageDescriptionCache)
|
|
198
201
|
|
|
199
202
|
@model_validator(mode='before')
|
|
@@ -498,7 +501,9 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
498
501
|
restrictions = self.client.get_all_restrictions_for_content(page["id"])
|
|
499
502
|
|
|
500
503
|
return (
|
|
501
|
-
page["status"] == "current"
|
|
504
|
+
(page["status"] == "current"
|
|
505
|
+
# allow user to see archived content if needed
|
|
506
|
+
or page["status"] == "archived")
|
|
502
507
|
and not restrictions["read"]["restrictions"]["user"]["results"]
|
|
503
508
|
and not restrictions["read"]["restrictions"]["group"]["results"]
|
|
504
509
|
)
|
|
@@ -518,18 +523,35 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
518
523
|
),
|
|
519
524
|
before_sleep=before_sleep_log(logger, logging.WARNING),
|
|
520
525
|
)(self.client.get_page_by_id)
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
+
try:
|
|
527
|
+
page = get_page(
|
|
528
|
+
page_id=page_id, expand=f"{self.content_format.value},version"
|
|
529
|
+
)
|
|
530
|
+
except (ApiError, HTTPError) as e:
|
|
531
|
+
logger.error(f"Error fetching page with ID {page_id}: {e}")
|
|
532
|
+
page_content_temp = f"Confluence API Error: cannot fetch the page with ID {page_id}: {e}"
|
|
533
|
+
# store errors
|
|
534
|
+
if self._errors is None:
|
|
535
|
+
self._errors = []
|
|
536
|
+
self._errors.append(page_content_temp)
|
|
537
|
+
return Document(page_content=page_content_temp,
|
|
538
|
+
metadata={})
|
|
539
|
+
# TODO: update on toolkit advanced settings level as a separate feature
|
|
540
|
+
# if not self.include_restricted_content and not self.is_public_page(page):
|
|
541
|
+
# continue
|
|
526
542
|
yield self.process_page(page, skip_images)
|
|
527
543
|
|
|
544
|
+
def _log_errors(self):
|
|
545
|
+
""" Log errors encountered during toolkit execution. """
|
|
546
|
+
if self._errors:
|
|
547
|
+
logger.info(f"Errors encountered during toolkit execution: {self._errors}")
|
|
548
|
+
|
|
528
549
|
def read_page_by_id(self, page_id: str, skip_images: bool = False):
|
|
529
550
|
"""Reads a page by its id in the Confluence space. If id is not available, but there is a title - use get_page_id first."""
|
|
530
551
|
result = list(self.get_pages_by_id([page_id], skip_images))
|
|
531
552
|
if not result:
|
|
532
|
-
"
|
|
553
|
+
return f"Pages not found. Errors: {self._errors}" if self._errors \
|
|
554
|
+
else "Pages not found or you do not have access to them."
|
|
533
555
|
return result[0].page_content
|
|
534
556
|
# return self._strip_base64_images(result[0].page_content) if skip_images else result[0].page_content
|
|
535
557
|
|
|
@@ -3,6 +3,7 @@ from typing import Optional, List
|
|
|
3
3
|
from logging import getLogger
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
|
+
from langchain_core.documents import Document
|
|
6
7
|
|
|
7
8
|
logger = getLogger(__name__)
|
|
8
9
|
from PIL import Image
|
|
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
|
|
|
193
194
|
else:
|
|
194
195
|
return super().process_image(link, ocr_languages)
|
|
195
196
|
|
|
197
|
+
def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
|
|
198
|
+
content_format: ContentFormat, ocr_languages: Optional[str] = None,
|
|
199
|
+
keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
|
|
200
|
+
if not page.get("title"):
|
|
201
|
+
# if 'include_restricted_content' set to True, draft pages are loaded and can have no title
|
|
202
|
+
page["title"] = "Untitled"
|
|
203
|
+
return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
|
|
204
|
+
ocr_languages, keep_markdown_format, keep_newlines)
|
|
205
|
+
|
|
196
206
|
# TODO review usage
|
|
197
207
|
# def process_svg(
|
|
198
208
|
# self,
|
alita_sdk/tools/elitea_base.py
CHANGED
|
@@ -17,7 +17,7 @@ from ..runtime.utils.utils import IndexerKeywords
|
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
|
-
INDEX_TOOL_NAMES = ['index_data', 'remove_index', '
|
|
20
|
+
INDEX_TOOL_NAMES = ['index_data', 'remove_index', 'list_collections', 'search_index', 'stepback_search_index',
|
|
21
21
|
'stepback_summary_index']
|
|
22
22
|
|
|
23
23
|
LoaderSchema = create_model(
|
|
@@ -403,9 +403,9 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
403
403
|
"""Cleans the indexed data in the collection."""
|
|
404
404
|
self._init_vector_store()._clean_collection(index_name=index_name)
|
|
405
405
|
return (f"Collection '{index_name}' has been removed from the vector store.\n"
|
|
406
|
-
f"Available collections: {self.
|
|
406
|
+
f"Available collections: {self.list_collections()}")
|
|
407
407
|
|
|
408
|
-
def
|
|
408
|
+
def list_collections(self):
|
|
409
409
|
"""Lists all collections in the vector store."""
|
|
410
410
|
vectorstore_wrapper = self._init_vector_store()
|
|
411
411
|
return vectorstore_wrapper.list_collections()
|
|
@@ -537,10 +537,10 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
537
537
|
"args_schema": RemoveIndexParams
|
|
538
538
|
},
|
|
539
539
|
{
|
|
540
|
-
"name": "
|
|
541
|
-
"mode": "
|
|
542
|
-
"ref": self.
|
|
543
|
-
"description": self.
|
|
540
|
+
"name": "list_collections",
|
|
541
|
+
"mode": "list_collections",
|
|
542
|
+
"ref": self.list_collections,
|
|
543
|
+
"description": self.list_collections.__doc__,
|
|
544
544
|
"args_schema": create_model("ListCollectionsParams") # No parameters
|
|
545
545
|
},
|
|
546
546
|
|
|
@@ -117,7 +117,11 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
|
|
|
117
117
|
|
|
118
118
|
@model_validator(mode='before')
|
|
119
119
|
@classmethod
|
|
120
|
-
def
|
|
120
|
+
def validate_toolkit_before(cls, values: Dict) -> Dict:
|
|
121
|
+
return super().validate_toolkit(values)
|
|
122
|
+
|
|
123
|
+
@model_validator(mode='after')
|
|
124
|
+
def validate_toolkit(self):
|
|
121
125
|
try:
|
|
122
126
|
import gitlab
|
|
123
127
|
except ImportError:
|
|
@@ -125,17 +129,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
|
|
|
125
129
|
"python-gitlab is not installed. "
|
|
126
130
|
"Please install it with `pip install python-gitlab`"
|
|
127
131
|
)
|
|
128
|
-
|
|
132
|
+
self.repository = self._sanitize_url(self.repository)
|
|
129
133
|
g = gitlab.Gitlab(
|
|
130
|
-
url=
|
|
131
|
-
private_token=
|
|
134
|
+
url=self._sanitize_url(self.url),
|
|
135
|
+
private_token=self.private_token.get_secret_value(),
|
|
132
136
|
keep_base_url=True,
|
|
133
137
|
)
|
|
134
138
|
|
|
135
139
|
g.auth()
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
return
|
|
140
|
+
self._git = g
|
|
141
|
+
self._active_branch = self.branch
|
|
142
|
+
return self
|
|
139
143
|
|
|
140
144
|
@property
|
|
141
145
|
def repo_instance(self):
|
|
@@ -563,7 +563,7 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
|
|
|
563
563
|
Use the appropriate issue link type (e.g., "Test", "Relates", "Blocks").
|
|
564
564
|
If we use "Test" linktype, the test is inward issue, the story/other issue is outward issue.."""
|
|
565
565
|
|
|
566
|
-
comment = "
|
|
566
|
+
comment = f"Issue {inward_issue_key} was linked to {outward_issue_key}."
|
|
567
567
|
comment_body = {"content": [{"content": [{"text": comment,"type": "text"}],"type": "paragraph"}],"type": "doc","version": 1} if self.api_version == "3" else comment
|
|
568
568
|
link_data = {
|
|
569
569
|
"type": {"name": f"{linktype}"},
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import re
|
|
3
3
|
import logging
|
|
4
|
+
import yaml
|
|
4
5
|
from typing import List, Any, Optional, Dict
|
|
5
6
|
from langchain_core.tools import BaseTool, BaseToolkit, ToolException
|
|
6
7
|
from requests_openapi import Operation, Client, Server
|
|
@@ -101,7 +102,15 @@ class AlitaOpenAPIToolkit(BaseToolkit):
|
|
|
101
102
|
else:
|
|
102
103
|
tools_set = {}
|
|
103
104
|
if isinstance(openapi_spec, str):
|
|
104
|
-
|
|
105
|
+
# Try to detect if it's YAML or JSON by attempting to parse as JSON first
|
|
106
|
+
try:
|
|
107
|
+
openapi_spec = json.loads(openapi_spec)
|
|
108
|
+
except json.JSONDecodeError:
|
|
109
|
+
# If JSON parsing fails, try YAML
|
|
110
|
+
try:
|
|
111
|
+
openapi_spec = yaml.safe_load(openapi_spec)
|
|
112
|
+
except yaml.YAMLError as e:
|
|
113
|
+
raise ToolException(f"Failed to parse OpenAPI spec as JSON or YAML: {e}")
|
|
105
114
|
c = Client()
|
|
106
115
|
c.load_spec(openapi_spec)
|
|
107
116
|
if headers:
|