alita-sdk 0.3.263__py3-none-any.whl → 0.3.499__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +155 -0
- alita_sdk/cli/agent_loader.py +215 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3601 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1256 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +64 -8
- alita_sdk/community/inventory/__init__.py +224 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +10 -0
- alita_sdk/configurations/ado.py +4 -2
- alita_sdk/configurations/azure_search.py +1 -1
- alita_sdk/configurations/bigquery.py +1 -1
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/browser.py +18 -0
- alita_sdk/configurations/carrier.py +19 -0
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/delta_lake.py +1 -1
- alita_sdk/configurations/figma.py +0 -5
- alita_sdk/configurations/github.py +65 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/google_places.py +17 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/postman.py +1 -1
- alita_sdk/configurations/qtest.py +1 -3
- alita_sdk/configurations/report_portal.py +19 -0
- alita_sdk/configurations/salesforce.py +19 -0
- alita_sdk/configurations/service_now.py +1 -12
- alita_sdk/configurations/sharepoint.py +19 -0
- alita_sdk/configurations/sonar.py +18 -0
- alita_sdk/configurations/sql.py +20 -0
- alita_sdk/configurations/testio.py +18 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +94 -1
- alita_sdk/configurations/zephyr_enterprise.py +94 -1
- alita_sdk/configurations/zephyr_essential.py +95 -0
- alita_sdk/runtime/clients/artifact.py +12 -2
- alita_sdk/runtime/clients/client.py +235 -66
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +373 -0
- alita_sdk/runtime/langchain/assistant.py +123 -17
- alita_sdk/runtime/langchain/constants.py +8 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
- alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +8 -2
- alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
- alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
- alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
- alita_sdk/runtime/langchain/document_loaders/constants.py +187 -40
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
- alita_sdk/runtime/langchain/langraph_agent.py +406 -91
- alita_sdk/runtime/langchain/utils.py +51 -8
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/toolkits/__init__.py +26 -0
- alita_sdk/runtime/toolkits/application.py +9 -2
- alita_sdk/runtime/toolkits/artifact.py +19 -7
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +780 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/subgraph.py +11 -6
- alita_sdk/runtime/toolkits/tools.py +214 -60
- alita_sdk/runtime/toolkits/vectorstore.py +9 -4
- alita_sdk/runtime/tools/__init__.py +22 -0
- alita_sdk/runtime/tools/application.py +16 -4
- alita_sdk/runtime/tools/artifact.py +312 -19
- alita_sdk/runtime/tools/function.py +100 -4
- alita_sdk/runtime/tools/graph.py +81 -0
- alita_sdk/runtime/tools/image_generation.py +212 -0
- alita_sdk/runtime/tools/llm.py +539 -180
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -1
- alita_sdk/runtime/tools/sandbox.py +375 -0
- alita_sdk/runtime/tools/vectorstore.py +62 -63
- alita_sdk/runtime/tools/vectorstore_base.py +156 -85
- alita_sdk/runtime/utils/AlitaCallback.py +106 -20
- alita_sdk/runtime/utils/mcp_client.py +465 -0
- alita_sdk/runtime/utils/mcp_oauth.py +244 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/streamlit.py +41 -14
- alita_sdk/runtime/utils/toolkit_utils.py +28 -9
- alita_sdk/runtime/utils/utils.py +14 -0
- alita_sdk/tools/__init__.py +78 -35
- alita_sdk/tools/ado/__init__.py +0 -1
- alita_sdk/tools/ado/repos/__init__.py +10 -6
- alita_sdk/tools/ado/repos/repos_wrapper.py +12 -11
- alita_sdk/tools/ado/test_plan/__init__.py +10 -7
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -23
- alita_sdk/tools/ado/wiki/__init__.py +10 -11
- alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -28
- alita_sdk/tools/ado/work_item/__init__.py +10 -11
- alita_sdk/tools/ado/work_item/ado_wrapper.py +63 -10
- alita_sdk/tools/advanced_jira_mining/__init__.py +10 -7
- alita_sdk/tools/aws/delta_lake/__init__.py +13 -11
- alita_sdk/tools/azure_ai/search/__init__.py +11 -7
- alita_sdk/tools/base_indexer_toolkit.py +392 -86
- alita_sdk/tools/bitbucket/__init__.py +18 -11
- alita_sdk/tools/bitbucket/api_wrapper.py +52 -9
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
- alita_sdk/tools/browser/__init__.py +40 -16
- alita_sdk/tools/browser/crawler.py +3 -1
- alita_sdk/tools/browser/utils.py +15 -6
- alita_sdk/tools/carrier/__init__.py +17 -17
- alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
- alita_sdk/tools/carrier/excel_reporter.py +8 -4
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/codeparser.py +1 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +9 -6
- alita_sdk/tools/cloud/azure/__init__.py +9 -6
- alita_sdk/tools/cloud/gcp/__init__.py +9 -6
- alita_sdk/tools/cloud/k8s/__init__.py +9 -6
- alita_sdk/tools/code/linter/__init__.py +7 -7
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +18 -12
- alita_sdk/tools/code_indexer_toolkit.py +199 -0
- alita_sdk/tools/confluence/__init__.py +14 -11
- alita_sdk/tools/confluence/api_wrapper.py +198 -58
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/custom_open_api/__init__.py +9 -4
- alita_sdk/tools/elastic/__init__.py +8 -7
- alita_sdk/tools/elitea_base.py +543 -64
- alita_sdk/tools/figma/__init__.py +10 -8
- alita_sdk/tools/figma/api_wrapper.py +352 -153
- alita_sdk/tools/github/__init__.py +13 -11
- alita_sdk/tools/github/api_wrapper.py +9 -26
- alita_sdk/tools/github/github_client.py +75 -12
- alita_sdk/tools/github/schemas.py +2 -1
- alita_sdk/tools/gitlab/__init__.py +11 -10
- alita_sdk/tools/gitlab/api_wrapper.py +135 -45
- alita_sdk/tools/gitlab_org/__init__.py +11 -9
- alita_sdk/tools/google/bigquery/__init__.py +12 -13
- alita_sdk/tools/google_places/__init__.py +18 -10
- alita_sdk/tools/jira/__init__.py +14 -8
- alita_sdk/tools/jira/api_wrapper.py +315 -168
- alita_sdk/tools/keycloak/__init__.py +8 -7
- alita_sdk/tools/localgit/local_git.py +56 -54
- alita_sdk/tools/memory/__init__.py +27 -11
- alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
- alita_sdk/tools/ocr/__init__.py +8 -7
- alita_sdk/tools/openapi/__init__.py +10 -1
- alita_sdk/tools/pandas/__init__.py +8 -7
- alita_sdk/tools/pandas/api_wrapper.py +7 -25
- alita_sdk/tools/postman/__init__.py +8 -10
- alita_sdk/tools/postman/api_wrapper.py +19 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +8 -9
- alita_sdk/tools/qtest/__init__.py +19 -13
- alita_sdk/tools/qtest/api_wrapper.py +1784 -88
- alita_sdk/tools/rally/__init__.py +10 -9
- alita_sdk/tools/report_portal/__init__.py +20 -15
- alita_sdk/tools/salesforce/__init__.py +19 -15
- alita_sdk/tools/servicenow/__init__.py +14 -11
- alita_sdk/tools/sharepoint/__init__.py +14 -13
- alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +10 -7
- alita_sdk/tools/sql/__init__.py +19 -18
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +18 -12
- alita_sdk/tools/testrail/__init__.py +10 -10
- alita_sdk/tools/testrail/api_wrapper.py +213 -45
- alita_sdk/tools/utils/__init__.py +28 -4
- alita_sdk/tools/utils/content_parser.py +181 -61
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
- alita_sdk/tools/xray/__init__.py +12 -7
- alita_sdk/tools/xray/api_wrapper.py +58 -113
- alita_sdk/tools/zephyr/__init__.py +9 -6
- alita_sdk/tools/zephyr_enterprise/__init__.py +13 -8
- alita_sdk/tools/zephyr_enterprise/api_wrapper.py +17 -7
- alita_sdk/tools/zephyr_essential/__init__.py +13 -9
- alita_sdk/tools/zephyr_essential/api_wrapper.py +289 -47
- alita_sdk/tools/zephyr_essential/client.py +6 -4
- alita_sdk/tools/zephyr_scale/__init__.py +10 -7
- alita_sdk/tools/zephyr_scale/api_wrapper.py +6 -2
- alita_sdk/tools/zephyr_squad/__init__.py +9 -6
- {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +180 -33
- alita_sdk-0.3.499.dist-info/RECORD +433 -0
- alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
- alita_sdk-0.3.263.dist-info/RECORD +0 -342
- {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0
|
@@ -4,8 +4,9 @@ def search_format(items):
|
|
|
4
4
|
results = []
|
|
5
5
|
for (doc, score) in items:
|
|
6
6
|
res_chunk = ''
|
|
7
|
-
language = get_programming_language(get_file_extension(doc.metadata
|
|
8
|
-
|
|
7
|
+
language = get_programming_language(get_file_extension(doc.metadata.get("filename", "unknown")))
|
|
8
|
+
method_name = doc.metadata.get("method_name", "text")
|
|
9
|
+
res_chunk += doc.metadata.get("filename", "unknown") + " -> " + method_name + " (score: " + str(score) + ")"
|
|
9
10
|
res_chunk += "\n\n```" + language.value + "\n"+ doc.page_content + "\n```\n\n"
|
|
10
11
|
results.append(res_chunk)
|
|
11
12
|
return results
|
|
@@ -1,36 +1,34 @@
|
|
|
1
1
|
from typing import List, Literal, Optional
|
|
2
2
|
from langchain_core.tools import BaseToolkit, BaseTool
|
|
3
|
-
from pydantic import create_model, BaseModel, ConfigDict, Field
|
|
3
|
+
from pydantic import create_model, BaseModel, ConfigDict, Field
|
|
4
4
|
|
|
5
5
|
from .api_wrapper import SonarApiWrapper
|
|
6
6
|
from ...base.tool import BaseAction
|
|
7
|
-
from ...
|
|
7
|
+
from ...elitea_base import filter_missconfigured_index_tools
|
|
8
|
+
from ...utils import clean_string, get_max_toolkit_length
|
|
9
|
+
from ....configurations.sonar import SonarConfiguration
|
|
8
10
|
|
|
9
11
|
name = "sonar"
|
|
10
12
|
|
|
11
13
|
def get_tools(tool):
|
|
12
14
|
return SonarToolkit().get_toolkit(
|
|
13
15
|
selected_tools=tool['settings'].get('selected_tools', []),
|
|
14
|
-
url=tool['settings']['url'],
|
|
15
|
-
sonar_token=tool['settings']['sonar_token'],
|
|
16
16
|
sonar_project_name=tool['settings']['sonar_project_name'],
|
|
17
|
+
sonar_configuration=tool['settings']['sonar_configuration'],
|
|
17
18
|
toolkit_name=tool.get('toolkit_name')
|
|
18
19
|
).get_tools()
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class SonarToolkit(BaseToolkit):
|
|
22
23
|
tools: list[BaseTool] = []
|
|
23
|
-
toolkit_max_length: int = 0
|
|
24
24
|
|
|
25
25
|
@staticmethod
|
|
26
26
|
def toolkit_config_schema() -> BaseModel:
|
|
27
27
|
selected_tools = {x['name']: x['args_schema'].schema() for x in SonarApiWrapper.model_construct().get_available_tools()}
|
|
28
|
-
SonarToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
29
28
|
return create_model(
|
|
30
29
|
name,
|
|
31
|
-
url=(str, Field(description="SonarQube Server URL", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': SonarToolkit.toolkit_max_length})),
|
|
32
|
-
sonar_token=(SecretStr, Field(description="SonarQube user token for authentication", json_schema_extra={'secret': True})),
|
|
33
30
|
sonar_project_name=(str, Field(description="Project name of the desired repository")),
|
|
31
|
+
sonar_configuration=(SonarConfiguration, Field(description="Sonar Configuration", json_schema_extra={'configuration_types': ['sonar']})),
|
|
34
32
|
selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
|
|
35
33
|
__config__=ConfigDict(json_schema_extra=
|
|
36
34
|
{
|
|
@@ -44,20 +42,28 @@ class SonarToolkit(BaseToolkit):
|
|
|
44
42
|
)
|
|
45
43
|
|
|
46
44
|
@classmethod
|
|
45
|
+
@filter_missconfigured_index_tools
|
|
47
46
|
def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
|
|
48
47
|
if selected_tools is None:
|
|
49
48
|
selected_tools = []
|
|
50
|
-
|
|
49
|
+
wrapper_payload = {
|
|
50
|
+
**kwargs,
|
|
51
|
+
**kwargs.get('sonar_configuration', {}),
|
|
52
|
+
}
|
|
53
|
+
sonar_api_wrapper = SonarApiWrapper(**wrapper_payload)
|
|
51
54
|
available_tools = sonar_api_wrapper.get_available_tools()
|
|
52
55
|
tools = []
|
|
53
|
-
prefix = clean_string(toolkit_name, SonarToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
|
54
56
|
for tool in available_tools:
|
|
55
57
|
if selected_tools and tool["name"] not in selected_tools:
|
|
56
58
|
continue
|
|
59
|
+
description = tool["description"]
|
|
60
|
+
if toolkit_name:
|
|
61
|
+
description = f"Toolkit: {toolkit_name}\n{description}"
|
|
62
|
+
description = description[:1000]
|
|
57
63
|
tools.append(BaseAction(
|
|
58
64
|
api_wrapper=sonar_api_wrapper,
|
|
59
|
-
name=
|
|
60
|
-
description=
|
|
65
|
+
name=tool["name"],
|
|
66
|
+
description=description,
|
|
61
67
|
args_schema=tool["args_schema"]
|
|
62
68
|
))
|
|
63
69
|
return cls(tools=tools)
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import fnmatch
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Optional, List, Generator
|
|
6
|
+
|
|
7
|
+
from langchain_core.documents import Document
|
|
8
|
+
from langchain_core.tools import ToolException
|
|
9
|
+
from pydantic import Field
|
|
10
|
+
|
|
11
|
+
from alita_sdk.tools.base_indexer_toolkit import BaseIndexerToolkit
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CodeIndexerToolkit(BaseIndexerToolkit):
|
|
17
|
+
def _get_indexed_data(self, index_name: str):
|
|
18
|
+
self._ensure_vectorstore_initialized()
|
|
19
|
+
if not self.vector_adapter:
|
|
20
|
+
raise ToolException("Vector adapter is not initialized. "
|
|
21
|
+
"Check your configuration: embedding_model and vectorstore_type.")
|
|
22
|
+
return self.vector_adapter.get_code_indexed_data(self, index_name)
|
|
23
|
+
|
|
24
|
+
def key_fn(self, document: Document):
|
|
25
|
+
return document.metadata.get("filename")
|
|
26
|
+
|
|
27
|
+
def compare_fn(self, document: Document, idx_data):
|
|
28
|
+
return (document.metadata.get('commit_hash') and
|
|
29
|
+
idx_data.get('commit_hashes') and
|
|
30
|
+
document.metadata.get('commit_hash') in idx_data.get('commit_hashes')
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
def remove_ids_fn(self, idx_data, key: str):
|
|
34
|
+
return idx_data[key]['ids']
|
|
35
|
+
|
|
36
|
+
def _base_loader(
|
|
37
|
+
self,
|
|
38
|
+
branch: Optional[str] = None,
|
|
39
|
+
whitelist: Optional[List[str]] = None,
|
|
40
|
+
blacklist: Optional[List[str]] = None,
|
|
41
|
+
**kwargs) -> Generator[Document, None, None]:
|
|
42
|
+
"""Index repository files in the vector store using code parsing."""
|
|
43
|
+
yield from self.loader(
|
|
44
|
+
branch=branch,
|
|
45
|
+
whitelist=whitelist,
|
|
46
|
+
blacklist=blacklist
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def _extend_data(self, documents: Generator[Document, None, None]):
|
|
50
|
+
yield from documents
|
|
51
|
+
|
|
52
|
+
def _index_tool_params(self):
|
|
53
|
+
"""Return the parameters for indexing data."""
|
|
54
|
+
return {
|
|
55
|
+
"branch": (Optional[str], Field(
|
|
56
|
+
description="Branch to index files from. Defaults to active branch if None.",
|
|
57
|
+
default=None)),
|
|
58
|
+
"whitelist": (Optional[List[str]], Field(
|
|
59
|
+
description='File extensions or paths to include. Defaults to all files if None. Example: ["*.md", "*.java"]',
|
|
60
|
+
default=None)),
|
|
61
|
+
"blacklist": (Optional[List[str]], Field(
|
|
62
|
+
description='File extensions or paths to exclude. Defaults to no exclusions if None. Example: ["*.md", "*.java"]',
|
|
63
|
+
default=None)),
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
def loader(self,
|
|
67
|
+
branch: Optional[str] = None,
|
|
68
|
+
whitelist: Optional[List[str]] = None,
|
|
69
|
+
blacklist: Optional[List[str]] = None,
|
|
70
|
+
chunked: bool = True) -> Generator[Document, None, None]:
|
|
71
|
+
"""
|
|
72
|
+
Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
|
|
73
|
+
|
|
74
|
+
Parameters:
|
|
75
|
+
- branch (Optional[str]): Branch for listing files. Defaults to the current branch if None.
|
|
76
|
+
- whitelist (Optional[List[str]]): File extensions or paths to include. Defaults to all files if None.
|
|
77
|
+
- blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
|
|
78
|
+
- chunked (bool): If True (default), applies universal chunker based on file type.
|
|
79
|
+
If False, returns raw Documents without chunking.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
- generator: Yields Documents from files matching the whitelist but not the blacklist.
|
|
83
|
+
Each document has exactly the key 'filename' in metadata, which is used as an ID
|
|
84
|
+
for further operations (indexing, deduplication, and retrieval).
|
|
85
|
+
|
|
86
|
+
Example:
|
|
87
|
+
# Use 'feature-branch', include '.py' files, exclude 'test_' files
|
|
88
|
+
for doc in loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*']):
|
|
89
|
+
print(doc.page_content)
|
|
90
|
+
|
|
91
|
+
Notes:
|
|
92
|
+
- Whitelist and blacklist use Unix shell-style wildcards.
|
|
93
|
+
- Files must match the whitelist and not the blacklist to be included.
|
|
94
|
+
- Each document MUST have exactly the key 'filename' in metadata. This key is used as an ID
|
|
95
|
+
for further operations such as indexing, deduplication, and retrieval.
|
|
96
|
+
- When chunked=True:
|
|
97
|
+
- .md files → markdown chunker (header-based splitting)
|
|
98
|
+
- .py/.js/.ts/etc → code parser (TreeSitter-based)
|
|
99
|
+
- .json files → JSON chunker
|
|
100
|
+
- other files → default text chunker
|
|
101
|
+
"""
|
|
102
|
+
import hashlib
|
|
103
|
+
|
|
104
|
+
_files = self.__handle_get_files("", self.__get_branch(branch))
|
|
105
|
+
self._log_tool_event(message="Listing files in branch", tool_name="loader")
|
|
106
|
+
logger.info(f"Files in branch: {_files}")
|
|
107
|
+
|
|
108
|
+
def is_whitelisted(file_path: str) -> bool:
|
|
109
|
+
if whitelist:
|
|
110
|
+
return (any(fnmatch.fnmatch(file_path, pattern) for pattern in whitelist)
|
|
111
|
+
or any(file_path.endswith(f'.{pattern}') for pattern in whitelist))
|
|
112
|
+
return True
|
|
113
|
+
|
|
114
|
+
def is_blacklisted(file_path: str) -> bool:
|
|
115
|
+
if blacklist:
|
|
116
|
+
return (any(fnmatch.fnmatch(file_path, pattern) for pattern in blacklist)
|
|
117
|
+
or any(file_path.endswith(f'.{pattern}') for pattern in blacklist))
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
def raw_document_generator() -> Generator[Document, None, None]:
|
|
121
|
+
"""Yields raw Documents without chunking."""
|
|
122
|
+
self._log_tool_event(message="Reading the files", tool_name="loader")
|
|
123
|
+
total_files = len(_files)
|
|
124
|
+
processed = 0
|
|
125
|
+
|
|
126
|
+
for idx, file in enumerate(_files, 1):
|
|
127
|
+
if is_whitelisted(file) and not is_blacklisted(file):
|
|
128
|
+
try:
|
|
129
|
+
file_content = self._read_file(file, self.__get_branch(branch))
|
|
130
|
+
except Exception as e:
|
|
131
|
+
logger.error(f"Failed to read file {file}: {e}")
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
if not file_content:
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
# Ensure file content is a string
|
|
138
|
+
if isinstance(file_content, bytes):
|
|
139
|
+
file_content = file_content.decode("utf-8", errors="ignore")
|
|
140
|
+
elif isinstance(file_content, dict) and file.endswith('.json'):
|
|
141
|
+
file_content = json.dumps(file_content)
|
|
142
|
+
elif not isinstance(file_content, str):
|
|
143
|
+
file_content = str(file_content)
|
|
144
|
+
|
|
145
|
+
# Hash the file content for uniqueness tracking
|
|
146
|
+
file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
|
|
147
|
+
processed += 1
|
|
148
|
+
|
|
149
|
+
yield Document(
|
|
150
|
+
page_content=file_content,
|
|
151
|
+
metadata={
|
|
152
|
+
'file_path': file,
|
|
153
|
+
'filename': file,
|
|
154
|
+
'source': file,
|
|
155
|
+
'commit_hash': file_hash,
|
|
156
|
+
}
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
if idx % 10 == 0 or idx == total_files:
|
|
160
|
+
self._log_tool_event(
|
|
161
|
+
message=f"{idx} out of {total_files} files checked, {processed} matched",
|
|
162
|
+
tool_name="loader"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
self._log_tool_event(message=f"{processed} files loaded", tool_name="loader")
|
|
166
|
+
|
|
167
|
+
if not chunked:
|
|
168
|
+
# Return raw documents without chunking
|
|
169
|
+
return raw_document_generator()
|
|
170
|
+
|
|
171
|
+
# Apply universal chunker based on file type
|
|
172
|
+
from .chunkers.universal_chunker import universal_chunker
|
|
173
|
+
return universal_chunker(raw_document_generator())
|
|
174
|
+
|
|
175
|
+
def __handle_get_files(self, path: str, branch: str):
|
|
176
|
+
"""
|
|
177
|
+
Handles the retrieval of files from a specific path and branch.
|
|
178
|
+
This method should be implemented in subclasses to provide the actual file retrieval logic.
|
|
179
|
+
"""
|
|
180
|
+
_files = self._get_files(path=path, branch=branch)
|
|
181
|
+
if isinstance(_files, str):
|
|
182
|
+
try:
|
|
183
|
+
# Attempt to convert the string to a list using ast.literal_eval
|
|
184
|
+
_files = ast.literal_eval(_files)
|
|
185
|
+
# Ensure that the result is actually a list of strings
|
|
186
|
+
if not isinstance(_files, list) or not all(isinstance(item, str) for item in _files):
|
|
187
|
+
raise ValueError("The evaluated result is not a list of strings")
|
|
188
|
+
except (SyntaxError, ValueError):
|
|
189
|
+
# Handle the case where the string cannot be converted to a list
|
|
190
|
+
raise ValueError("Expected a list of strings, but got a string that cannot be converted")
|
|
191
|
+
|
|
192
|
+
# Ensure _files is a list of strings
|
|
193
|
+
if not isinstance(_files, list) or not all(isinstance(item, str) for item in _files):
|
|
194
|
+
raise ValueError("Expected a list of strings")
|
|
195
|
+
return _files
|
|
196
|
+
|
|
197
|
+
def __get_branch(self, branch):
|
|
198
|
+
return (branch or getattr(self, 'active_branch', None)
|
|
199
|
+
or getattr(self, '_active_branch', None) or getattr(self, 'branch', None))
|
|
@@ -4,7 +4,9 @@ from .api_wrapper import ConfluenceAPIWrapper
|
|
|
4
4
|
from langchain_core.tools import BaseTool
|
|
5
5
|
from ..base.tool import BaseAction
|
|
6
6
|
from pydantic import create_model, BaseModel, ConfigDict, Field
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
from ..elitea_base import filter_missconfigured_index_tools
|
|
9
|
+
from ..utils import clean_string, get_max_toolkit_length, parse_list, check_connection_response
|
|
8
10
|
from ...configurations.confluence import ConfluenceConfiguration
|
|
9
11
|
from ...configurations.pgvector import PgVectorConfiguration
|
|
10
12
|
import requests
|
|
@@ -14,7 +16,6 @@ name = "confluence"
|
|
|
14
16
|
def get_tools(tool):
|
|
15
17
|
return ConfluenceToolkit().get_toolkit(
|
|
16
18
|
selected_tools=tool['settings'].get('selected_tools', []),
|
|
17
|
-
base_url=tool['settings']['base_url'],
|
|
18
19
|
space=tool['settings'].get('space', None),
|
|
19
20
|
cloud=tool['settings'].get('cloud', True),
|
|
20
21
|
confluence_configuration=tool['settings']['confluence_configuration'],
|
|
@@ -37,13 +38,11 @@ def get_tools(tool):
|
|
|
37
38
|
|
|
38
39
|
class ConfluenceToolkit(BaseToolkit):
|
|
39
40
|
tools: List[BaseTool] = []
|
|
40
|
-
toolkit_max_length: int = 0
|
|
41
41
|
|
|
42
42
|
@staticmethod
|
|
43
43
|
def toolkit_config_schema() -> BaseModel:
|
|
44
44
|
selected_tools = {x['name']: x['args_schema'].schema() for x in
|
|
45
45
|
ConfluenceAPIWrapper.model_construct().get_available_tools()}
|
|
46
|
-
ConfluenceToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
47
46
|
|
|
48
47
|
@check_connection_response
|
|
49
48
|
def check_connection(self):
|
|
@@ -66,8 +65,7 @@ class ConfluenceToolkit(BaseToolkit):
|
|
|
66
65
|
|
|
67
66
|
model = create_model(
|
|
68
67
|
name,
|
|
69
|
-
space=(str, Field(description="Space",
|
|
70
|
-
'max_toolkit_length': ConfluenceToolkit.toolkit_max_length})),
|
|
68
|
+
space=(str, Field(description="Space")),
|
|
71
69
|
cloud=(bool, Field(description="Hosting Option", json_schema_extra={'configuration': True})),
|
|
72
70
|
limit=(int, Field(description="Pages limit per request", default=5)),
|
|
73
71
|
labels=(Optional[str], Field(
|
|
@@ -80,8 +78,8 @@ class ConfluenceToolkit(BaseToolkit):
|
|
|
80
78
|
min_retry_seconds=(int, Field(description="Min retry, sec", default=10)),
|
|
81
79
|
max_retry_seconds=(int, Field(description="Max retry, sec", default=60)),
|
|
82
80
|
# optional field for custom headers as dictionary
|
|
83
|
-
custom_headers=(Optional[dict], Field(description="Custom headers for API requests", default=
|
|
84
|
-
confluence_configuration=(
|
|
81
|
+
custom_headers=(Optional[dict], Field(description="Custom headers for API requests", default={})),
|
|
82
|
+
confluence_configuration=(ConfluenceConfiguration, Field(description="Confluence Configuration", json_schema_extra={'configuration_types': ['confluence']})),
|
|
85
83
|
pgvector_configuration=(Optional[PgVectorConfiguration], Field(default = None,
|
|
86
84
|
description="PgVector Configuration",
|
|
87
85
|
json_schema_extra={'configuration_types': ['pgvector']})),
|
|
@@ -103,6 +101,7 @@ class ConfluenceToolkit(BaseToolkit):
|
|
|
103
101
|
return model
|
|
104
102
|
|
|
105
103
|
@classmethod
|
|
104
|
+
@filter_missconfigured_index_tools
|
|
106
105
|
def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
|
|
107
106
|
if selected_tools is None:
|
|
108
107
|
selected_tools = []
|
|
@@ -113,17 +112,21 @@ class ConfluenceToolkit(BaseToolkit):
|
|
|
113
112
|
**(kwargs.get('pgvector_configuration') or {}),
|
|
114
113
|
}
|
|
115
114
|
confluence_api_wrapper = ConfluenceAPIWrapper(**wrapper_payload)
|
|
116
|
-
prefix = clean_string(toolkit_name, ConfluenceToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
|
117
115
|
available_tools = confluence_api_wrapper.get_available_tools()
|
|
118
116
|
tools = []
|
|
119
117
|
for tool in available_tools:
|
|
120
118
|
if selected_tools:
|
|
121
119
|
if tool["name"] not in selected_tools:
|
|
122
120
|
continue
|
|
121
|
+
description = tool["description"]
|
|
122
|
+
if toolkit_name:
|
|
123
|
+
description = f"Toolkit: {toolkit_name}\n{description}"
|
|
124
|
+
description = f"Confluence space: {confluence_api_wrapper.space}\n{description}"
|
|
125
|
+
description = description[:1000]
|
|
123
126
|
tools.append(BaseAction(
|
|
124
127
|
api_wrapper=confluence_api_wrapper,
|
|
125
|
-
name=
|
|
126
|
-
description=
|
|
128
|
+
name=tool["name"],
|
|
129
|
+
description=description,
|
|
127
130
|
args_schema=tool["args_schema"]
|
|
128
131
|
))
|
|
129
132
|
return cls(tools=tools)
|