alita-sdk 0.3.462__py3-none-any.whl → 0.3.627__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +15 -3
- alita_sdk/cli/agent_loader.py +56 -8
- alita_sdk/cli/agent_ui.py +93 -31
- alita_sdk/cli/agents.py +2274 -230
- alita_sdk/cli/callbacks.py +96 -25
- alita_sdk/cli/cli.py +10 -1
- alita_sdk/cli/config.py +162 -9
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/testcases/__init__.py +94 -0
- alita_sdk/cli/testcases/data_generation.py +119 -0
- alita_sdk/cli/testcases/discovery.py +96 -0
- alita_sdk/cli/testcases/executor.py +84 -0
- alita_sdk/cli/testcases/logger.py +85 -0
- alita_sdk/cli/testcases/parser.py +172 -0
- alita_sdk/cli/testcases/prompts.py +91 -0
- alita_sdk/cli/testcases/reporting.py +125 -0
- alita_sdk/cli/testcases/setup.py +108 -0
- alita_sdk/cli/testcases/test_runner.py +282 -0
- alita_sdk/cli/testcases/utils.py +39 -0
- alita_sdk/cli/testcases/validation.py +90 -0
- alita_sdk/cli/testcases/workflow.py +196 -0
- alita_sdk/cli/toolkit.py +14 -17
- alita_sdk/cli/toolkit_loader.py +35 -5
- alita_sdk/cli/tools/__init__.py +36 -2
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +910 -64
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +1 -1
- alita_sdk/configurations/ado.py +141 -20
- alita_sdk/configurations/bitbucket.py +0 -3
- alita_sdk/configurations/confluence.py +76 -42
- alita_sdk/configurations/figma.py +76 -0
- alita_sdk/configurations/gitlab.py +17 -5
- alita_sdk/configurations/openapi.py +329 -0
- alita_sdk/configurations/qtest.py +72 -1
- alita_sdk/configurations/report_portal.py +96 -0
- alita_sdk/configurations/sharepoint.py +148 -0
- alita_sdk/configurations/testio.py +83 -0
- alita_sdk/runtime/clients/artifact.py +3 -3
- alita_sdk/runtime/clients/client.py +353 -48
- alita_sdk/runtime/clients/sandbox_client.py +0 -21
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +123 -26
- alita_sdk/runtime/langchain/constants.py +642 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +6 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
- alita_sdk/runtime/langchain/document_loaders/constants.py +12 -7
- alita_sdk/runtime/langchain/langraph_agent.py +279 -73
- alita_sdk/runtime/langchain/utils.py +82 -15
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +7 -0
- alita_sdk/runtime/toolkits/application.py +21 -9
- alita_sdk/runtime/toolkits/artifact.py +15 -5
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +139 -251
- alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +251 -6
- alita_sdk/runtime/toolkits/tools.py +238 -32
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +3 -1
- alita_sdk/runtime/tools/application.py +20 -6
- alita_sdk/runtime/tools/artifact.py +511 -28
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +43 -15
- alita_sdk/runtime/tools/image_generation.py +50 -44
- alita_sdk/runtime/tools/llm.py +852 -67
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_remote_tool.py +25 -10
- alita_sdk/runtime/tools/mcp_server_tool.py +7 -6
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -4
- alita_sdk/runtime/tools/sandbox.py +9 -6
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +7 -2
- alita_sdk/runtime/tools/vectorstore_base.py +51 -11
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/constants.py +5 -1
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +202 -5
- alita_sdk/runtime/utils/mcp_sse_client.py +36 -7
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/serialization.py +155 -0
- alita_sdk/runtime/utils/streamlit.py +6 -10
- alita_sdk/runtime/utils/toolkit_utils.py +16 -5
- alita_sdk/runtime/utils/utils.py +36 -0
- alita_sdk/tools/__init__.py +113 -29
- alita_sdk/tools/ado/repos/__init__.py +51 -33
- alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
- alita_sdk/tools/ado/test_plan/__init__.py +25 -9
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
- alita_sdk/tools/ado/utils.py +1 -18
- alita_sdk/tools/ado/wiki/__init__.py +25 -8
- alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
- alita_sdk/tools/ado/work_item/__init__.py +26 -9
- alita_sdk/tools/ado/work_item/ado_wrapper.py +56 -3
- alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +11 -8
- alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +170 -45
- alita_sdk/tools/bitbucket/__init__.py +17 -12
- alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
- alita_sdk/tools/browser/__init__.py +5 -4
- alita_sdk/tools/carrier/__init__.py +5 -6
- alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
- alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
- alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +10 -7
- alita_sdk/tools/cloud/azure/__init__.py +10 -7
- alita_sdk/tools/cloud/gcp/__init__.py +10 -7
- alita_sdk/tools/cloud/k8s/__init__.py +10 -7
- alita_sdk/tools/code/linter/__init__.py +10 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +10 -7
- alita_sdk/tools/code_indexer_toolkit.py +73 -23
- alita_sdk/tools/confluence/__init__.py +21 -15
- alita_sdk/tools/confluence/api_wrapper.py +78 -23
- alita_sdk/tools/confluence/loader.py +4 -2
- alita_sdk/tools/custom_open_api/__init__.py +12 -5
- alita_sdk/tools/elastic/__init__.py +11 -8
- alita_sdk/tools/elitea_base.py +493 -30
- alita_sdk/tools/figma/__init__.py +58 -11
- alita_sdk/tools/figma/api_wrapper.py +1235 -143
- alita_sdk/tools/figma/figma_client.py +73 -0
- alita_sdk/tools/figma/toon_tools.py +2748 -0
- alita_sdk/tools/github/__init__.py +13 -14
- alita_sdk/tools/github/github_client.py +224 -100
- alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
- alita_sdk/tools/github/schemas.py +14 -5
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/github/tool_prompts.py +9 -22
- alita_sdk/tools/gitlab/__init__.py +15 -11
- alita_sdk/tools/gitlab/api_wrapper.py +207 -41
- alita_sdk/tools/gitlab_org/__init__.py +10 -8
- alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
- alita_sdk/tools/google/bigquery/__init__.py +13 -12
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +10 -8
- alita_sdk/tools/google_places/api_wrapper.py +1 -1
- alita_sdk/tools/jira/__init__.py +17 -11
- alita_sdk/tools/jira/api_wrapper.py +91 -40
- alita_sdk/tools/keycloak/__init__.py +11 -8
- alita_sdk/tools/localgit/__init__.py +9 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +11 -3
- alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
- alita_sdk/tools/ocr/__init__.py +11 -8
- alita_sdk/tools/openapi/__init__.py +490 -114
- alita_sdk/tools/openapi/api_wrapper.py +1368 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +20 -12
- alita_sdk/tools/pandas/api_wrapper.py +38 -25
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +11 -11
- alita_sdk/tools/pptx/__init__.py +10 -9
- alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
- alita_sdk/tools/qtest/__init__.py +30 -10
- alita_sdk/tools/qtest/api_wrapper.py +430 -13
- alita_sdk/tools/rally/__init__.py +10 -8
- alita_sdk/tools/rally/api_wrapper.py +1 -1
- alita_sdk/tools/report_portal/__init__.py +12 -9
- alita_sdk/tools/salesforce/__init__.py +10 -9
- alita_sdk/tools/servicenow/__init__.py +17 -14
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +10 -8
- alita_sdk/tools/sharepoint/api_wrapper.py +4 -4
- alita_sdk/tools/slack/__init__.py +10 -8
- alita_sdk/tools/slack/api_wrapper.py +2 -2
- alita_sdk/tools/sql/__init__.py +11 -9
- alita_sdk/tools/testio/__init__.py +10 -8
- alita_sdk/tools/testrail/__init__.py +11 -8
- alita_sdk/tools/testrail/api_wrapper.py +1 -1
- alita_sdk/tools/utils/__init__.py +9 -4
- alita_sdk/tools/utils/content_parser.py +77 -3
- alita_sdk/tools/utils/text_operations.py +410 -0
- alita_sdk/tools/utils/tool_prompts.py +79 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
- alita_sdk/tools/xray/__init__.py +12 -9
- alita_sdk/tools/yagmail/__init__.py +9 -3
- alita_sdk/tools/zephyr/__init__.py +9 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +11 -8
- alita_sdk/tools/zephyr_essential/__init__.py +10 -8
- alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
- alita_sdk/tools/zephyr_essential/client.py +2 -2
- alita_sdk/tools/zephyr_scale/__init__.py +11 -9
- alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
- alita_sdk/tools/zephyr_squad/__init__.py +10 -8
- {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +147 -7
- alita_sdk-0.3.627.dist-info/RECORD +468 -0
- alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
- alita_sdk-0.3.462.dist-info/RECORD +0 -384
- alita_sdk-0.3.462.dist-info/entry_points.txt +0 -2
- {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
|
@@ -5,8 +5,9 @@ from pydantic import create_model, BaseModel, ConfigDict, Field
|
|
|
5
5
|
from .api_wrapper import SonarApiWrapper
|
|
6
6
|
from ...base.tool import BaseAction
|
|
7
7
|
from ...elitea_base import filter_missconfigured_index_tools
|
|
8
|
-
from ...utils import clean_string,
|
|
8
|
+
from ...utils import clean_string, get_max_toolkit_length
|
|
9
9
|
from ....configurations.sonar import SonarConfiguration
|
|
10
|
+
from ....runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
|
|
10
11
|
|
|
11
12
|
name = "sonar"
|
|
12
13
|
|
|
@@ -21,12 +22,10 @@ def get_tools(tool):
|
|
|
21
22
|
|
|
22
23
|
class SonarToolkit(BaseToolkit):
|
|
23
24
|
tools: list[BaseTool] = []
|
|
24
|
-
toolkit_max_length: int = 0
|
|
25
25
|
|
|
26
26
|
@staticmethod
|
|
27
27
|
def toolkit_config_schema() -> BaseModel:
|
|
28
28
|
selected_tools = {x['name']: x['args_schema'].schema() for x in SonarApiWrapper.model_construct().get_available_tools()}
|
|
29
|
-
SonarToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
30
29
|
return create_model(
|
|
31
30
|
name,
|
|
32
31
|
sonar_project_name=(str, Field(description="Project name of the desired repository")),
|
|
@@ -55,15 +54,19 @@ class SonarToolkit(BaseToolkit):
|
|
|
55
54
|
sonar_api_wrapper = SonarApiWrapper(**wrapper_payload)
|
|
56
55
|
available_tools = sonar_api_wrapper.get_available_tools()
|
|
57
56
|
tools = []
|
|
58
|
-
prefix = clean_string(toolkit_name, SonarToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
|
59
57
|
for tool in available_tools:
|
|
60
58
|
if selected_tools and tool["name"] not in selected_tools:
|
|
61
59
|
continue
|
|
60
|
+
description = tool["description"]
|
|
61
|
+
if toolkit_name:
|
|
62
|
+
description = f"Toolkit: {toolkit_name}\n{description}"
|
|
63
|
+
description = description[:1000]
|
|
62
64
|
tools.append(BaseAction(
|
|
63
65
|
api_wrapper=sonar_api_wrapper,
|
|
64
|
-
name=
|
|
65
|
-
description=
|
|
66
|
-
args_schema=tool["args_schema"]
|
|
66
|
+
name=tool["name"],
|
|
67
|
+
description=description,
|
|
68
|
+
args_schema=tool["args_schema"],
|
|
69
|
+
metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
|
|
67
70
|
))
|
|
68
71
|
return cls(tools=tools)
|
|
69
72
|
|
|
@@ -9,13 +9,13 @@ from langchain_core.tools import ToolException
|
|
|
9
9
|
from pydantic import Field
|
|
10
10
|
|
|
11
11
|
from alita_sdk.tools.base_indexer_toolkit import BaseIndexerToolkit
|
|
12
|
-
from .chunkers.code.codeparser import parse_code_files_for_db
|
|
13
12
|
|
|
14
13
|
logger = logging.getLogger(__name__)
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
18
17
|
def _get_indexed_data(self, index_name: str):
|
|
18
|
+
self._ensure_vectorstore_initialized()
|
|
19
19
|
if not self.vector_adapter:
|
|
20
20
|
raise ToolException("Vector adapter is not initialized. "
|
|
21
21
|
"Check your configuration: embedding_model and vectorstore_type.")
|
|
@@ -38,12 +38,14 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
38
38
|
branch: Optional[str] = None,
|
|
39
39
|
whitelist: Optional[List[str]] = None,
|
|
40
40
|
blacklist: Optional[List[str]] = None,
|
|
41
|
+
chunking_config: Optional[dict] = None,
|
|
41
42
|
**kwargs) -> Generator[Document, None, None]:
|
|
42
43
|
"""Index repository files in the vector store using code parsing."""
|
|
43
44
|
yield from self.loader(
|
|
44
45
|
branch=branch,
|
|
45
46
|
whitelist=whitelist,
|
|
46
|
-
blacklist=blacklist
|
|
47
|
+
blacklist=blacklist,
|
|
48
|
+
chunking_config=chunking_config
|
|
47
49
|
)
|
|
48
50
|
|
|
49
51
|
def _extend_data(self, documents: Generator[Document, None, None]):
|
|
@@ -66,26 +68,55 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
66
68
|
def loader(self,
|
|
67
69
|
branch: Optional[str] = None,
|
|
68
70
|
whitelist: Optional[List[str]] = None,
|
|
69
|
-
blacklist: Optional[List[str]] = None
|
|
71
|
+
blacklist: Optional[List[str]] = None,
|
|
72
|
+
chunked: bool = True,
|
|
73
|
+
chunking_config: Optional[dict] = None) -> Generator[Document, None, None]:
|
|
70
74
|
"""
|
|
71
|
-
Generates
|
|
75
|
+
Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
|
|
72
76
|
|
|
73
77
|
Parameters:
|
|
74
78
|
- branch (Optional[str]): Branch for listing files. Defaults to the current branch if None.
|
|
75
79
|
- whitelist (Optional[List[str]]): File extensions or paths to include. Defaults to all files if None.
|
|
76
80
|
- blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
|
|
81
|
+
- chunked (bool): If True (default), applies universal chunker based on file type.
|
|
82
|
+
If False, returns raw Documents without chunking.
|
|
83
|
+
- chunking_config (Optional[dict]): Chunking configuration by file extension
|
|
77
84
|
|
|
78
85
|
Returns:
|
|
79
|
-
- generator: Yields
|
|
86
|
+
- generator: Yields Documents from files matching the whitelist but not the blacklist.
|
|
87
|
+
Each document has exactly the key 'filename' in metadata, which is used as an ID
|
|
88
|
+
for further operations (indexing, deduplication, and retrieval).
|
|
80
89
|
|
|
81
90
|
Example:
|
|
82
91
|
# Use 'feature-branch', include '.py' files, exclude 'test_' files
|
|
83
|
-
|
|
92
|
+
for doc in loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*']):
|
|
93
|
+
print(doc.page_content)
|
|
84
94
|
|
|
85
95
|
Notes:
|
|
86
96
|
- Whitelist and blacklist use Unix shell-style wildcards.
|
|
87
97
|
- Files must match the whitelist and not the blacklist to be included.
|
|
98
|
+
- Each document MUST have exactly the key 'filename' in metadata. This key is used as an ID
|
|
99
|
+
for further operations such as indexing, deduplication, and retrieval.
|
|
100
|
+
- When chunked=True:
|
|
101
|
+
- .md files → markdown chunker (header-based splitting)
|
|
102
|
+
- .py/.js/.ts/etc → code parser (TreeSitter-based)
|
|
103
|
+
- .json files → JSON chunker
|
|
104
|
+
- other files → default text chunker
|
|
88
105
|
"""
|
|
106
|
+
import hashlib
|
|
107
|
+
|
|
108
|
+
# Auto-include extensions from chunking_config if whitelist is specified
|
|
109
|
+
# This allows chunking config to work without manually adding extensions to whitelist
|
|
110
|
+
if chunking_config and whitelist:
|
|
111
|
+
for ext_pattern in chunking_config.keys():
|
|
112
|
+
# Normalize extension pattern (both ".cbl" and "*.cbl" should work)
|
|
113
|
+
normalized = ext_pattern if ext_pattern.startswith('*') else f'*{ext_pattern}'
|
|
114
|
+
if normalized not in whitelist:
|
|
115
|
+
whitelist.append(normalized)
|
|
116
|
+
self._log_tool_event(
|
|
117
|
+
message=f"Auto-included extension '{normalized}' from chunking_config",
|
|
118
|
+
tool_name="loader"
|
|
119
|
+
)
|
|
89
120
|
|
|
90
121
|
_files = self.__handle_get_files("", self.__get_branch(branch))
|
|
91
122
|
self._log_tool_event(message="Listing files in branch", tool_name="loader")
|
|
@@ -103,41 +134,60 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
|
103
134
|
or any(file_path.endswith(f'.{pattern}') for pattern in blacklist))
|
|
104
135
|
return False
|
|
105
136
|
|
|
106
|
-
def
|
|
137
|
+
def raw_document_generator() -> Generator[Document, None, None]:
|
|
138
|
+
"""Yields raw Documents without chunking."""
|
|
107
139
|
self._log_tool_event(message="Reading the files", tool_name="loader")
|
|
108
|
-
# log the progress of file reading
|
|
109
140
|
total_files = len(_files)
|
|
141
|
+
processed = 0
|
|
142
|
+
|
|
110
143
|
for idx, file in enumerate(_files, 1):
|
|
111
144
|
if is_whitelisted(file) and not is_blacklisted(file):
|
|
112
|
-
# read file ONLY if it matches whitelist and does not match blacklist
|
|
113
145
|
try:
|
|
114
146
|
file_content = self._read_file(file, self.__get_branch(branch))
|
|
115
147
|
except Exception as e:
|
|
116
148
|
logger.error(f"Failed to read file {file}: {e}")
|
|
117
|
-
|
|
149
|
+
continue
|
|
150
|
+
|
|
118
151
|
if not file_content:
|
|
119
|
-
# empty file, skip
|
|
120
152
|
continue
|
|
121
|
-
|
|
122
|
-
#
|
|
153
|
+
|
|
154
|
+
# Ensure file content is a string
|
|
123
155
|
if isinstance(file_content, bytes):
|
|
124
156
|
file_content = file_content.decode("utf-8", errors="ignore")
|
|
125
157
|
elif isinstance(file_content, dict) and file.endswith('.json'):
|
|
126
158
|
file_content = json.dumps(file_content)
|
|
127
159
|
elif not isinstance(file_content, str):
|
|
128
160
|
file_content = str(file_content)
|
|
129
|
-
|
|
130
|
-
#
|
|
131
|
-
import hashlib
|
|
161
|
+
|
|
162
|
+
# Hash the file content for uniqueness tracking
|
|
132
163
|
file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
164
|
+
processed += 1
|
|
165
|
+
|
|
166
|
+
yield Document(
|
|
167
|
+
page_content=file_content,
|
|
168
|
+
metadata={
|
|
169
|
+
'file_path': file,
|
|
170
|
+
'filename': file,
|
|
171
|
+
'source': file,
|
|
172
|
+
'commit_hash': file_hash,
|
|
173
|
+
}
|
|
174
|
+
)
|
|
175
|
+
|
|
136
176
|
if idx % 10 == 0 or idx == total_files:
|
|
137
|
-
self._log_tool_event(
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
177
|
+
self._log_tool_event(
|
|
178
|
+
message=f"{idx} out of {total_files} files checked, {processed} matched",
|
|
179
|
+
tool_name="loader"
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
self._log_tool_event(message=f"{processed} files loaded", tool_name="loader")
|
|
183
|
+
|
|
184
|
+
if not chunked:
|
|
185
|
+
# Return raw documents without chunking
|
|
186
|
+
return raw_document_generator()
|
|
187
|
+
|
|
188
|
+
# Apply universal chunker based on file type
|
|
189
|
+
from .chunkers.universal_chunker import universal_chunker
|
|
190
|
+
return universal_chunker(raw_document_generator())
|
|
141
191
|
|
|
142
192
|
def __handle_get_files(self, path: str, branch: str):
|
|
143
193
|
"""
|
|
@@ -6,14 +6,15 @@ from ..base.tool import BaseAction
|
|
|
6
6
|
from pydantic import create_model, BaseModel, ConfigDict, Field
|
|
7
7
|
|
|
8
8
|
from ..elitea_base import filter_missconfigured_index_tools
|
|
9
|
-
from ..utils import clean_string,
|
|
9
|
+
from ..utils import clean_string, get_max_toolkit_length, parse_list, check_connection_response
|
|
10
10
|
from ...configurations.confluence import ConfluenceConfiguration
|
|
11
11
|
from ...configurations.pgvector import PgVectorConfiguration
|
|
12
12
|
import requests
|
|
13
|
+
from ...runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
|
|
13
14
|
|
|
14
15
|
name = "confluence"
|
|
15
16
|
|
|
16
|
-
def
|
|
17
|
+
def get_toolkit(tool):
|
|
17
18
|
return ConfluenceToolkit().get_toolkit(
|
|
18
19
|
selected_tools=tool['settings'].get('selected_tools', []),
|
|
19
20
|
space=tool['settings'].get('space', None),
|
|
@@ -33,18 +34,19 @@ def get_tools(tool):
|
|
|
33
34
|
doctype='doc',
|
|
34
35
|
embedding_model=tool['settings'].get('embedding_model'),
|
|
35
36
|
vectorstore_type="PGVector"
|
|
36
|
-
)
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def get_tools(tool):
|
|
40
|
+
return get_toolkit(tool).get_tools()
|
|
37
41
|
|
|
38
42
|
|
|
39
43
|
class ConfluenceToolkit(BaseToolkit):
|
|
40
44
|
tools: List[BaseTool] = []
|
|
41
|
-
toolkit_max_length: int = 0
|
|
42
45
|
|
|
43
46
|
@staticmethod
|
|
44
47
|
def toolkit_config_schema() -> BaseModel:
|
|
45
48
|
selected_tools = {x['name']: x['args_schema'].schema() for x in
|
|
46
49
|
ConfluenceAPIWrapper.model_construct().get_available_tools()}
|
|
47
|
-
ConfluenceToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
48
50
|
|
|
49
51
|
@check_connection_response
|
|
50
52
|
def check_connection(self):
|
|
@@ -69,16 +71,16 @@ class ConfluenceToolkit(BaseToolkit):
|
|
|
69
71
|
name,
|
|
70
72
|
space=(str, Field(description="Space")),
|
|
71
73
|
cloud=(bool, Field(description="Hosting Option", json_schema_extra={'configuration': True})),
|
|
72
|
-
limit=(int, Field(description="Pages limit per request", default=5)),
|
|
74
|
+
limit=(int, Field(description="Pages limit per request", default=5, gt=0)),
|
|
73
75
|
labels=(Optional[str], Field(
|
|
74
76
|
description="List of comma separated labels used for labeling of agent's created or updated entities",
|
|
75
77
|
default=None,
|
|
76
78
|
examples="alita,elitea;another-label"
|
|
77
79
|
)),
|
|
78
|
-
max_pages=(int, Field(description="Max total pages", default=10)),
|
|
79
|
-
number_of_retries=(int, Field(description="Number of retries", default=2)),
|
|
80
|
-
min_retry_seconds=(int, Field(description="Min retry, sec", default=10)),
|
|
81
|
-
max_retry_seconds=(int, Field(description="Max retry, sec", default=60)),
|
|
80
|
+
max_pages=(int, Field(description="Max total pages", default=10, gt=0)),
|
|
81
|
+
number_of_retries=(int, Field(description="Number of retries", default=2, ge=0)),
|
|
82
|
+
min_retry_seconds=(int, Field(description="Min retry, sec", default=10, ge=0)),
|
|
83
|
+
max_retry_seconds=(int, Field(description="Max retry, sec", default=60, ge=0)),
|
|
82
84
|
# optional field for custom headers as dictionary
|
|
83
85
|
custom_headers=(Optional[dict], Field(description="Custom headers for API requests", default={})),
|
|
84
86
|
confluence_configuration=(ConfluenceConfiguration, Field(description="Confluence Configuration", json_schema_extra={'configuration_types': ['confluence']})),
|
|
@@ -94,7 +96,6 @@ class ConfluenceToolkit(BaseToolkit):
|
|
|
94
96
|
'metadata': {
|
|
95
97
|
"label": "Confluence",
|
|
96
98
|
"icon_url": None,
|
|
97
|
-
"max_length": ConfluenceToolkit.toolkit_max_length,
|
|
98
99
|
"categories": ["documentation"],
|
|
99
100
|
"extra_categories": ["confluence", "wiki", "knowledge base", "documentation", "atlassian"]
|
|
100
101
|
}
|
|
@@ -115,18 +116,23 @@ class ConfluenceToolkit(BaseToolkit):
|
|
|
115
116
|
**(kwargs.get('pgvector_configuration') or {}),
|
|
116
117
|
}
|
|
117
118
|
confluence_api_wrapper = ConfluenceAPIWrapper(**wrapper_payload)
|
|
118
|
-
prefix = clean_string(toolkit_name, ConfluenceToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
|
119
119
|
available_tools = confluence_api_wrapper.get_available_tools()
|
|
120
120
|
tools = []
|
|
121
121
|
for tool in available_tools:
|
|
122
122
|
if selected_tools:
|
|
123
123
|
if tool["name"] not in selected_tools:
|
|
124
124
|
continue
|
|
125
|
+
description = tool["description"]
|
|
126
|
+
if toolkit_name:
|
|
127
|
+
description = f"Toolkit: {toolkit_name}\n{description}"
|
|
128
|
+
description = f"Confluence space: {confluence_api_wrapper.space}\n{description}"
|
|
129
|
+
description = description[:1000]
|
|
125
130
|
tools.append(BaseAction(
|
|
126
131
|
api_wrapper=confluence_api_wrapper,
|
|
127
|
-
name=
|
|
128
|
-
description=
|
|
129
|
-
args_schema=tool["args_schema"]
|
|
132
|
+
name=tool["name"],
|
|
133
|
+
description=description,
|
|
134
|
+
args_schema=tool["args_schema"],
|
|
135
|
+
metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
|
|
130
136
|
))
|
|
131
137
|
return cls(tools=tools)
|
|
132
138
|
|
|
@@ -480,21 +480,69 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
480
480
|
"""Gets pages with specific label in the Confluence space."""
|
|
481
481
|
|
|
482
482
|
start = 0
|
|
483
|
-
pages_info = []
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
483
|
+
pages_info: List[Dict[str, Any]] = []
|
|
484
|
+
seen_ids: set[str] = set()
|
|
485
|
+
|
|
486
|
+
# Use a while-loop driven by unique pages collected and
|
|
487
|
+
# presence of additional results instead of a fixed number
|
|
488
|
+
# of iterations based purely on max_pages/limit.
|
|
489
|
+
while len(pages_info) < (self.max_pages or 0):
|
|
490
|
+
pages = self.client.get_all_pages_by_label(
|
|
491
|
+
label,
|
|
492
|
+
start=start,
|
|
493
|
+
limit=self.limit,
|
|
494
|
+
) # , expand="body.view.value"
|
|
487
495
|
if not pages:
|
|
488
496
|
break
|
|
489
497
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
498
|
+
# Collect only ids we haven't processed yet to avoid
|
|
499
|
+
# calling get_page_by_id multiple times for the same
|
|
500
|
+
# Confluence page.
|
|
501
|
+
new_ids: List[str] = []
|
|
502
|
+
for p in pages:
|
|
503
|
+
page_id = p["id"] if isinstance(p, dict) else getattr(p, "id", None)
|
|
504
|
+
if page_id is None:
|
|
505
|
+
continue
|
|
506
|
+
if page_id in seen_ids:
|
|
507
|
+
continue
|
|
508
|
+
seen_ids.add(page_id)
|
|
509
|
+
new_ids.append(page_id)
|
|
510
|
+
|
|
511
|
+
if new_ids:
|
|
512
|
+
for page in self.get_pages_by_id(new_ids):
|
|
513
|
+
meta = getattr(page, "metadata", {}) or {}
|
|
514
|
+
page_id = meta.get("id")
|
|
515
|
+
page_title = meta.get("title")
|
|
516
|
+
page_url = meta.get("source")
|
|
517
|
+
content = getattr(page, "page_content", None)
|
|
518
|
+
|
|
519
|
+
if page_id is None:
|
|
520
|
+
continue
|
|
521
|
+
|
|
522
|
+
pages_info.append(
|
|
523
|
+
{
|
|
524
|
+
"page_id": page_id,
|
|
525
|
+
"page_title": page_title,
|
|
526
|
+
"page_url": page_url,
|
|
527
|
+
"content": content,
|
|
528
|
+
}
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
# Respect max_pages on unique pages collected.
|
|
532
|
+
if len(pages_info) >= (self.max_pages or 0):
|
|
533
|
+
break
|
|
534
|
+
|
|
535
|
+
# Advance the offset by the requested page size.
|
|
496
536
|
start += self.limit
|
|
497
|
-
|
|
537
|
+
|
|
538
|
+
# Defensive break: if the API returns fewer items than
|
|
539
|
+
# requested, there are likely no more pages to fetch.
|
|
540
|
+
if len(pages) < self.limit:
|
|
541
|
+
break
|
|
542
|
+
|
|
543
|
+
# Slice as an extra safety net in case of any race conditions
|
|
544
|
+
# around the max_pages guard in the loop above.
|
|
545
|
+
return pages_info[: (self.max_pages or len(pages_info))]
|
|
498
546
|
|
|
499
547
|
def is_public_page(self, page: dict) -> bool:
|
|
500
548
|
"""Check if a page is publicly accessible."""
|
|
@@ -572,11 +620,18 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
572
620
|
def _process_search(self, cql, skip_images: bool = False):
|
|
573
621
|
start = 0
|
|
574
622
|
pages_info = []
|
|
623
|
+
seen_ids: set = set() # Track seen page IDs to avoid duplicates
|
|
575
624
|
for _ in range((self.max_pages + self.limit - 1) // self.limit):
|
|
576
625
|
pages = self.client.cql(cql, start=start, limit=self.limit).get("results", [])
|
|
577
626
|
if not pages:
|
|
578
627
|
break
|
|
579
|
-
|
|
628
|
+
# Deduplicate page IDs before processing
|
|
629
|
+
page_ids = []
|
|
630
|
+
for page in pages:
|
|
631
|
+
page_id = page['content']['id']
|
|
632
|
+
if page_id not in seen_ids:
|
|
633
|
+
seen_ids.add(page_id)
|
|
634
|
+
page_ids.append(page_id)
|
|
580
635
|
for page in self.get_pages_by_id(page_ids, skip_images):
|
|
581
636
|
page_info = {
|
|
582
637
|
'content': page.page_content,
|
|
@@ -896,14 +951,14 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
896
951
|
|
|
897
952
|
# Re-verify extension filters
|
|
898
953
|
# Check if file should be skipped based on skip_extensions
|
|
899
|
-
if any(re.match(pattern.replace('
|
|
954
|
+
if any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', title, re.IGNORECASE)
|
|
900
955
|
for pattern in self._skip_extensions):
|
|
901
956
|
continue
|
|
902
957
|
|
|
903
958
|
# Check if file should be included based on include_extensions
|
|
904
959
|
# If include_extensions is empty, process all files (that weren't skipped)
|
|
905
960
|
if self._include_extensions and not (
|
|
906
|
-
any(re.match(pattern.replace('
|
|
961
|
+
any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', title, re.IGNORECASE)
|
|
907
962
|
for pattern in self._include_extensions)):
|
|
908
963
|
continue
|
|
909
964
|
|
|
@@ -914,6 +969,9 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
914
969
|
created_date = hist.get('createdDate', '') if hist else attachment.get('created', '')
|
|
915
970
|
last_updated = hist.get('lastUpdated', {}).get('when', '') if hist else ''
|
|
916
971
|
|
|
972
|
+
attachment_path = attachment['_links']['download'] if attachment.get(
|
|
973
|
+
'_links', {}).get('download') else ''
|
|
974
|
+
download_url = self.client.url.rstrip('/') + attachment_path
|
|
917
975
|
metadata = {
|
|
918
976
|
'name': title,
|
|
919
977
|
'size': attachment.get('extensions', {}).get('fileSize', None),
|
|
@@ -923,14 +981,10 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
923
981
|
'media_type': media_type,
|
|
924
982
|
'labels': [label['name'] for label in
|
|
925
983
|
attachment.get('metadata', {}).get('labels', {}).get('results', [])],
|
|
926
|
-
'download_url':
|
|
927
|
-
'_links', {}).get('download') else None
|
|
984
|
+
'download_url': download_url
|
|
928
985
|
}
|
|
929
|
-
|
|
930
|
-
download_url = self.base_url.rstrip('/') + attachment['_links']['download']
|
|
931
|
-
|
|
932
986
|
try:
|
|
933
|
-
resp = self.client.request(method="GET", path=
|
|
987
|
+
resp = self.client.request(method="GET", path=attachment_path, advanced_mode=True)
|
|
934
988
|
if resp.status_code == 200:
|
|
935
989
|
content = resp.content
|
|
936
990
|
else:
|
|
@@ -1683,8 +1737,8 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
1683
1737
|
"page_ids": (Optional[List[str]], Field(description="List of page IDs to retrieve.", default=None)),
|
|
1684
1738
|
"label": (Optional[str], Field(description="Label to filter pages.", default=None)),
|
|
1685
1739
|
"cql": (Optional[str], Field(description="CQL query to filter pages.", default=None)),
|
|
1686
|
-
"limit": (Optional[int], Field(description="Limit the number of results.", default=10)),
|
|
1687
|
-
"max_pages": (Optional[int], Field(description="Maximum number of pages to retrieve.", default=1000)),
|
|
1740
|
+
"limit": (Optional[int], Field(description="Limit the number of results.", default=10, gt=0)),
|
|
1741
|
+
"max_pages": (Optional[int], Field(description="Maximum number of pages to retrieve.", default=1000, gt=0)),
|
|
1688
1742
|
"include_restricted_content": (Optional[bool], Field(description="Include restricted content.", default=False)),
|
|
1689
1743
|
"include_archived_content": (Optional[bool], Field(description="Include archived content.", default=False)),
|
|
1690
1744
|
"include_attachments": (Optional[bool], Field(description="Include attachments.", default=False)),
|
|
@@ -1820,4 +1874,5 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
|
|
|
1820
1874
|
"description": self.get_page_attachments.__doc__,
|
|
1821
1875
|
"args_schema": GetPageAttachmentsInput,
|
|
1822
1876
|
}
|
|
1823
|
-
]
|
|
1877
|
+
]
|
|
1878
|
+
|
|
@@ -48,7 +48,8 @@ class AlitaConfluenceLoader(ConfluenceLoader):
|
|
|
48
48
|
del kwargs[key]
|
|
49
49
|
except:
|
|
50
50
|
pass
|
|
51
|
-
|
|
51
|
+
# utilize adjusted URL from Confluence instance for base_url
|
|
52
|
+
self.base_url = confluence_client.url
|
|
52
53
|
self.space_key = kwargs.get('space_key')
|
|
53
54
|
self.page_ids = kwargs.get('page_ids')
|
|
54
55
|
self.label = kwargs.get('label')
|
|
@@ -108,7 +109,8 @@ class AlitaConfluenceLoader(ConfluenceLoader):
|
|
|
108
109
|
texts = []
|
|
109
110
|
for attachment in attachments:
|
|
110
111
|
media_type = attachment["metadata"]["mediaType"]
|
|
111
|
-
|
|
112
|
+
# utilize adjusted URL from Confluence instance for attachment download URL
|
|
113
|
+
absolute_url = self.confluence.url + attachment["_links"]["download"]
|
|
112
114
|
title = attachment["title"]
|
|
113
115
|
try:
|
|
114
116
|
if media_type == "application/pdf":
|
|
@@ -5,7 +5,8 @@ from pydantic import create_model, BaseModel, ConfigDict, Field
|
|
|
5
5
|
|
|
6
6
|
from .api_wrapper import OpenApiWrapper
|
|
7
7
|
from ..base.tool import BaseAction
|
|
8
|
-
from ..utils import clean_string
|
|
8
|
+
from ..utils import clean_string
|
|
9
|
+
from ...runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
|
|
9
10
|
|
|
10
11
|
name = "openapi"
|
|
11
12
|
|
|
@@ -43,15 +44,21 @@ class OpenApiToolkit(BaseToolkit):
|
|
|
43
44
|
openapi_api_wrapper = OpenApiWrapper(**kwargs)
|
|
44
45
|
available_tools = openapi_api_wrapper.get_available_tools()
|
|
45
46
|
tools = []
|
|
46
|
-
|
|
47
|
+
# Use clean toolkit name for context (max 1000 chars in description)
|
|
48
|
+
toolkit_context = f" [Toolkit: {clean_string(toolkit_name)}]" if toolkit_name else ''
|
|
47
49
|
for tool in available_tools:
|
|
48
50
|
if selected_tools and tool["name"] not in selected_tools:
|
|
49
51
|
continue
|
|
52
|
+
# Add toolkit context to description with character limit
|
|
53
|
+
description = tool["description"]
|
|
54
|
+
if toolkit_context and len(description + toolkit_context) <= 1000:
|
|
55
|
+
description = description + toolkit_context
|
|
50
56
|
tools.append(BaseAction(
|
|
51
57
|
api_wrapper=openapi_api_wrapper,
|
|
52
|
-
name=
|
|
53
|
-
description=
|
|
54
|
-
args_schema=tool["args_schema"]
|
|
58
|
+
name=tool["name"],
|
|
59
|
+
description=description,
|
|
60
|
+
args_schema=tool["args_schema"],
|
|
61
|
+
metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
|
|
55
62
|
))
|
|
56
63
|
return cls(tools=tools)
|
|
57
64
|
|
|
@@ -5,7 +5,8 @@ from pydantic import BaseModel, ConfigDict, create_model, Field, SecretStr
|
|
|
5
5
|
|
|
6
6
|
from .api_wrapper import ELITEAElasticApiWrapper
|
|
7
7
|
from ..base.tool import BaseAction
|
|
8
|
-
from ..utils import clean_string,
|
|
8
|
+
from ..utils import clean_string, get_max_toolkit_length
|
|
9
|
+
from ...runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
|
|
9
10
|
|
|
10
11
|
name = "elastic"
|
|
11
12
|
|
|
@@ -19,15 +20,13 @@ def get_tools(tool):
|
|
|
19
20
|
|
|
20
21
|
class ElasticToolkit(BaseToolkit):
|
|
21
22
|
tools: list[BaseTool] = []
|
|
22
|
-
toolkit_max_length: int = 0
|
|
23
23
|
|
|
24
24
|
@staticmethod
|
|
25
25
|
def toolkit_config_schema() -> BaseModel:
|
|
26
26
|
selected_tools = {x['name']: x['args_schema'].schema() for x in ELITEAElasticApiWrapper.model_construct().get_available_tools()}
|
|
27
|
-
ElasticToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
28
27
|
return create_model(
|
|
29
28
|
name,
|
|
30
|
-
url=(str, Field(default=None, title="Elasticsearch URL", description="Elasticsearch URL", json_schema_extra={'toolkit_name': True
|
|
29
|
+
url=(Optional[str], Field(default=None, title="Elasticsearch URL", description="Elasticsearch URL", json_schema_extra={'toolkit_name': True})),
|
|
31
30
|
api_key=(
|
|
32
31
|
Optional[SecretStr],
|
|
33
32
|
Field(
|
|
@@ -48,15 +47,19 @@ class ElasticToolkit(BaseToolkit):
|
|
|
48
47
|
elastic_api_wrapper = ELITEAElasticApiWrapper(**kwargs)
|
|
49
48
|
available_tools = elastic_api_wrapper.get_available_tools()
|
|
50
49
|
tools = []
|
|
51
|
-
prefix = clean_string(toolkit_name, ElasticToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
|
52
50
|
for tool in available_tools:
|
|
53
51
|
if selected_tools and tool["name"] not in selected_tools:
|
|
54
52
|
continue
|
|
53
|
+
description = tool["description"]
|
|
54
|
+
if toolkit_name:
|
|
55
|
+
description = f"Toolkit: {toolkit_name}\n{description}"
|
|
56
|
+
description = description[:1000]
|
|
55
57
|
tools.append(BaseAction(
|
|
56
58
|
api_wrapper=elastic_api_wrapper,
|
|
57
|
-
name=
|
|
58
|
-
description=
|
|
59
|
-
args_schema=tool["args_schema"]
|
|
59
|
+
name=tool["name"],
|
|
60
|
+
description=description,
|
|
61
|
+
args_schema=tool["args_schema"],
|
|
62
|
+
metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
|
|
60
63
|
))
|
|
61
64
|
return cls(tools=tools)
|
|
62
65
|
|